From 4f88e1a9be89a257fd6ed3045703db6e900027ee Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Jul 2019 22:14:42 +0200 Subject: Adding upstream version 1.16.0. Signed-off-by: Daniel Baumann --- .gitattributes | 2 + .github/CODEOWNERS | 56 +- .github/ISSUE_TEMPLATE.md | 5 + .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- .github/ISSUE_TEMPLATE/question.md | 1 + .github/stale.yml | 9 +- .gitignore | 10 + .remarkrc.js | 121 ++ .travis.yml | 409 +++++- .travis/README.md | 46 + .travis/labeler.sh | 15 - .../build_package_in_container.sh | 82 ++ .travis/package_management/common.py | 46 + .../configure_deb_lxc_environment.py | 65 + .../configure_rpm_lxc_environment.py | 159 +++ .travis/package_management/create_lxc_for_build.sh | 101 ++ .travis/package_management/functions.sh | 33 + .../package_management/package_cloud_wrapper.sh | 48 + .travis/package_management/prepare_packages.sh | 56 + .../package_management/trigger_deb_lxc_build.py | 51 + .../package_management/trigger_rpm_lxc_build.py | 55 + .travis/package_management/yank_stale_rpm.sh | 35 + CHANGELOG.md | 253 +++- CMakeLists.txt | 92 +- CONTRIBUTING.md | 12 + CONTRIBUTORS.md | 1 + Makefile.am | 56 +- README.md | 148 ++- SECURITY.md | 43 + backends/README.md | 12 +- backends/aws_kinesis/README.md | 6 +- backends/backends.c | 522 ++++++-- backends/backends.h | 18 + backends/opentsdb/README.md | 26 + backends/opentsdb/opentsdb.c | 115 ++ backends/opentsdb/opentsdb.h | 23 + backends/prometheus/Makefile.am | 4 + backends/prometheus/backend_prometheus.c | 198 ++- backends/prometheus/backend_prometheus.h | 15 + backends/prometheus/remote_write/Makefile.am | 14 + backends/prometheus/remote_write/README.md | 30 + backends/prometheus/remote_write/remote_write.cc | 117 ++ backends/prometheus/remote_write/remote_write.h | 30 + .../prometheus/remote_write/remote_write.proto | 29 + collectors/Makefile.am | 1 + collectors/README.md | 1 + collectors/apps.plugin/apps_groups.conf | 7 +- collectors/cgroups.plugin/README.md | 9 + collectors/cgroups.plugin/sys_fs_cgroup.c | 11 +- .../charts.d.plugin/apcupsd/apcupsd.chart.sh | 4 +- collectors/charts.d.plugin/charts.d.conf | 2 + collectors/charts.d.plugin/charts.d.plugin.in | 2 +- collectors/diskspace.plugin/README.md | 3 + collectors/freeipmi.plugin/freeipmi_plugin.c | 52 +- collectors/nfacct.plugin/plugin_nfacct.c | 4 +- collectors/perf.plugin/Makefile.am | 8 + collectors/perf.plugin/README.md | 72 ++ collectors/perf.plugin/perf_plugin.c | 1348 ++++++++++++++++++++ collectors/plugins.d/README.md | 1 + collectors/plugins.d/plugins_d.c | 7 +- collectors/proc.plugin/README.md | 2 +- collectors/proc.plugin/proc_mdstat.c | 8 +- collectors/python.d.plugin/Makefile.am | 1 + collectors/python.d.plugin/README.md | 2 +- .../adaptec_raid/adaptec_raid.chart.py | 4 +- .../dns_query_time/dns_query_time.chart.py | 19 +- .../elasticsearch/elasticsearch.chart.py | 24 +- collectors/python.d.plugin/monit/monit.chart.py | 316 +++-- collectors/python.d.plugin/mysql/README.md | 22 +- collectors/python.d.plugin/mysql/mysql.chart.py | 134 +- collectors/python.d.plugin/python.d.conf | 5 +- .../bases/FrameworkServices/UrlService.py | 23 + collectors/python.d.plugin/riakkv/Makefile.inc | 13 + collectors/python.d.plugin/riakkv/README.md | 110 ++ collectors/python.d.plugin/riakkv/riakkv.chart.py | 315 +++++ collectors/python.d.plugin/riakkv/riakkv.conf | 68 + collectors/python.d.plugin/smartd_log/README.md | 6 +- collectors/python.d.plugin/tomcat/tomcat.chart.py | 41 +- .../python.d.plugin/varnish/varnish.chart.py | 58 +- .../python.d.plugin/web_log/web_log.chart.py | 5 +- collectors/tc.plugin/README.md | 1 + configs.signatures | 1 + configure.ac | 185 ++- daemon/anonymous-statistics.sh.in | 5 +- daemon/common.h | 1 + daemon/config/README.md | 10 +- daemon/global_statistics.c | 76 +- daemon/main.c | 81 +- database/README.md | 10 +- database/engine/README.md | 40 +- database/engine/datafile.c | 224 +++- database/engine/datafile.h | 7 +- database/engine/journalfile.c | 117 +- database/engine/journalfile.h | 2 + database/engine/pagecache.c | 317 +++-- database/engine/pagecache.h | 89 +- database/engine/rrdengine.c | 234 ++-- database/engine/rrdengine.h | 22 +- database/engine/rrdengineapi.c | 277 ++-- database/engine/rrdengineapi.h | 12 +- database/engine/rrdenginelib.c | 97 +- database/engine/rrdenginelib.h | 21 +- database/engine/rrdenglocking.c | 233 ++++ database/engine/rrdenglocking.h | 17 + database/rrd.c | 5 +- database/rrd.h | 32 +- database/rrdcalc.c | 108 +- database/rrdcalc.h | 25 +- database/rrdcalctemplate.c | 2 +- database/rrdcalctemplate.h | 6 + database/rrddim.c | 6 +- database/rrdhost.c | 106 +- database/rrdset.c | 33 +- docs/Add-more-charts-to-netdata.md | 181 +-- docs/Charts.md | 4 +- docs/Demo-Sites.md | 6 +- docs/Donations-netdata-has-received.md | 6 +- docs/GettingStarted.md | 38 +- .../Netdata-Security-and-Disclosure-Information.md | 39 - docs/Performance.md | 52 +- docs/Running-behind-apache.md | 54 +- docs/Running-behind-caddy.md | 10 +- docs/Running-behind-lighttpd.md | 12 +- docs/Running-behind-nginx.md | 36 +- docs/Third-Party-Plugins.md | 2 +- docs/a-github-star-is-important.md | 6 +- docs/anonymous-statistics.md | 15 +- docs/configuration-guide.md | 34 +- docs/generator/buildhtml.sh | 8 +- docs/generator/buildyaml.sh | 7 +- docs/generator/checklinks.sh | 6 +- docs/generator/custom/img/favicon.ico | Bin 1150 -> 349 bytes docs/high-performance-netdata.md | 20 +- docs/netdata-for-IoT.md | 14 +- docs/netdata-security.md | 10 +- docs/privacy-policy.md | 27 +- docs/why-netdata/1s-granularity.md | 4 +- docs/why-netdata/immediate-results.md | 2 +- docs/why-netdata/meaningful-presentation.md | 4 +- docs/why-netdata/unlimited-metrics.md | 4 +- health/Makefile.am | 6 + health/README.md | 22 +- health/health.c | 181 ++- health/health.d/dbengine.conf | 26 + health/health.d/disks.conf | 4 +- health/health.d/dnsmasq_dhcp.conf | 12 + health/health.d/pihole.conf | 67 + health/health.d/processes.conf | 27 + health/health.d/ram.conf | 2 +- health/health.d/riakkv.conf | 80 ++ health/health.d/wmi.conf | 130 ++ health/health.d/x509check.conf | 14 + health/health.h | 46 +- health/health_config.c | 138 +- health/health_json.c | 4 + health/health_log.c | 57 +- health/notifications/README.md | 3 + health/notifications/alarm-notify.sh.in | 9 +- health/notifications/custom/README.md | 84 +- health/notifications/email/README.md | 2 + libnetdata/Makefile.am | 2 + libnetdata/config/appconfig.c | 83 ++ libnetdata/config/appconfig.h | 3 + libnetdata/health/Makefile.am | 8 + libnetdata/health/health.c | 170 +++ libnetdata/health/health.h | 55 + libnetdata/json/Makefile.am | 9 + libnetdata/json/README.md | 5 + libnetdata/json/jsmn.c | 326 +++++ libnetdata/json/jsmn.h | 75 ++ libnetdata/json/json.c | 546 ++++++++ libnetdata/json/json.h | 72 ++ libnetdata/libnetdata.h | 5 + libnetdata/socket/security.c | 277 ++++ libnetdata/socket/security.h | 47 + libnetdata/socket/socket.c | 59 +- libnetdata/socket/socket.h | 5 + netdata-installer.sh | 90 +- netdata.spec.in | 365 +++++- package.json | 54 +- packaging/docker/README.md | 26 +- packaging/docker/publish.sh | 6 + packaging/docker/run.sh | 51 +- packaging/go.d.checksums | 32 +- packaging/installer/README.md | 37 +- packaging/installer/functions.sh | 43 +- packaging/installer/kickstart-static64.sh | 2 +- packaging/installer/kickstart.sh | 10 +- packaging/installer/netdata-uninstaller.sh | 2 +- packaging/installer/netdata-updater.sh | 4 +- packaging/makeself/install-or-update.sh | 2 +- packaging/version | 2 +- registry/README.md | 37 +- registry/registry_init.c | 1 + streaming/README.md | 65 +- streaming/rrdpush.c | 121 +- streaming/stream.conf | 16 +- system/netdata-lsb.in | 16 +- tests/backends/prometheus-avg-oldunits.txt | 58 +- tests/backends/prometheus-avg.txt | 58 +- tests/backends/prometheus-raw.txt | 66 +- tests/backends/prometheus.bats | 2 +- tests/health_mgmtapi/README.md | 5 +- .../expected_list/ALARM_CPU_IOWAIT-list.json | 1 + .../expected_list/ALARM_CPU_USAGE-list.json | 1 + .../expected_list/CONTEXT_SYSTEM_CPU-list.json | 1 + .../health_mgmtapi/expected_list/DISABLE-list.json | 1 + .../expected_list/DISABLE_ALL-list.json | 1 + .../expected_list/DISABLE_ALL_ERROR-list.json | 1 + .../expected_list/DISABLE_SYSTEM_LOAD-list.json | 1 + .../expected_list/FAMILIES_LOAD-list.json | 1 + tests/health_mgmtapi/expected_list/HOSTS-list.json | 1 + tests/health_mgmtapi/expected_list/RESET-list.json | 1 + .../health_mgmtapi/expected_list/SILENCE-list.json | 1 + .../expected_list/SILENCE_2-list.json | 1 + .../expected_list/SILENCE_3-list.json | 1 + .../SILENCE_ALARM_CPU_USAGE-list.json | 1 + .../SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json | 1 + .../expected_list/SILENCE_ALL-list.json | 1 + tests/health_mgmtapi/health-cmdapi-test.sh.in | 176 +-- tests/health_mgmtapi/python-example.conf | 16 - tests/installer/slack.sh | 7 + tests/updater_checks.bats | 1 - tests/updater_checks.sh | 70 +- web/README.md | 2 +- web/api/health/README.md | 49 + web/api/health/health_cmdapi.c | 172 ++- web/api/health/health_cmdapi.h | 2 +- web/api/netdata-swagger.json | 110 +- web/api/netdata-swagger.yaml | 77 +- web/gui/Makefile.am | 7 +- web/gui/console.html | 72 ++ web/gui/console/index.html | 72 -- web/gui/dashboard_info.js | 37 +- web/gui/demosites.html | 1 + web/gui/favicon.ico | Bin 34494 -> 1150 bytes web/gui/images/android-icon-144x144.png | Bin 5534 -> 2721 bytes web/gui/images/android-icon-192x192.png | Bin 6680 -> 3923 bytes web/gui/images/android-icon-36x36.png | Bin 1668 -> 539 bytes web/gui/images/android-icon-48x48.png | Bin 1932 -> 762 bytes web/gui/images/android-icon-72x72.png | Bin 2716 -> 1153 bytes web/gui/images/android-icon-96x96.png | Bin 3500 -> 1747 bytes web/gui/images/apple-icon-114x114.png | Bin 4248 -> 3651 bytes web/gui/images/apple-icon-120x120.png | Bin 4471 -> 2315 bytes web/gui/images/apple-icon-144x144.png | Bin 5534 -> 4001 bytes web/gui/images/apple-icon-152x152.png | Bin 5883 -> 5026 bytes web/gui/images/apple-icon-180x180.png | Bin 7297 -> 3645 bytes web/gui/images/apple-icon-57x57.png | Bin 2212 -> 1250 bytes web/gui/images/apple-icon-60x60.png | Bin 2246 -> 1052 bytes web/gui/images/apple-icon-72x72.png | Bin 2716 -> 1427 bytes web/gui/images/apple-icon-76x76.png | Bin 2751 -> 1711 bytes web/gui/images/apple-icon-precomposed.png | Bin 7254 -> 3926 bytes web/gui/images/apple-icon.png | Bin 7254 -> 3926 bytes web/gui/images/banner-icon-144x144.png | Bin 5534 -> 2724 bytes web/gui/images/favicon-128.png | Bin 0 -> 2436 bytes web/gui/images/favicon-16x16.png | Bin 1096 -> 285 bytes web/gui/images/favicon-196x196.png | Bin 0 -> 10025 bytes web/gui/images/favicon-32x32.png | Bin 1575 -> 454 bytes web/gui/images/favicon-96x96.png | Bin 3500 -> 1925 bytes web/gui/images/favicon.ico | Bin 1150 -> 1150 bytes web/gui/images/ms-icon-144x144.png | Bin 5534 -> 4001 bytes web/gui/images/ms-icon-150x150.png | Bin 5772 -> 2867 bytes web/gui/images/ms-icon-310x150.png | Bin 0 -> 3632 bytes web/gui/images/ms-icon-310x310.png | Bin 16102 -> 7215 bytes web/gui/images/ms-icon-36x36.png | Bin 0 -> 536 bytes web/gui/images/ms-icon-70x70.png | Bin 2523 -> 2436 bytes web/gui/images/netdata-logomark.svg | 11 +- web/gui/index.html | 7 +- web/gui/main.css | 1 + web/gui/main.js | 17 +- web/gui/static/static/img/netdata-logomark.svg | 3 - web/gui/tv.html | 14 +- web/server/README.md | 71 +- web/server/static/static-threaded.c | 68 +- web/server/web_client.c | 182 ++- web/server/web_client.h | 4 + web/server/web_client_cache.c | 38 + web/server/web_server.c | 2 - 279 files changed, 12038 insertions(+), 1983 deletions(-) create mode 100644 .gitattributes create mode 100644 .remarkrc.js create mode 100755 .travis/package_management/build_package_in_container.sh create mode 100755 .travis/package_management/common.py create mode 100755 .travis/package_management/configure_deb_lxc_environment.py create mode 100755 .travis/package_management/configure_rpm_lxc_environment.py create mode 100755 .travis/package_management/create_lxc_for_build.sh create mode 100644 .travis/package_management/functions.sh create mode 100755 .travis/package_management/package_cloud_wrapper.sh create mode 100755 .travis/package_management/prepare_packages.sh create mode 100755 .travis/package_management/trigger_deb_lxc_build.py create mode 100755 .travis/package_management/trigger_rpm_lxc_build.py create mode 100755 .travis/package_management/yank_stale_rpm.sh create mode 100644 SECURITY.md create mode 100644 backends/opentsdb/README.md create mode 100644 backends/prometheus/remote_write/Makefile.am create mode 100644 backends/prometheus/remote_write/README.md create mode 100644 backends/prometheus/remote_write/remote_write.cc create mode 100644 backends/prometheus/remote_write/remote_write.h create mode 100644 backends/prometheus/remote_write/remote_write.proto create mode 100644 collectors/perf.plugin/Makefile.am create mode 100644 collectors/perf.plugin/README.md create mode 100644 collectors/perf.plugin/perf_plugin.c create mode 100644 collectors/python.d.plugin/riakkv/Makefile.inc create mode 100644 collectors/python.d.plugin/riakkv/README.md create mode 100644 collectors/python.d.plugin/riakkv/riakkv.chart.py create mode 100644 collectors/python.d.plugin/riakkv/riakkv.conf create mode 100644 database/engine/rrdenglocking.c create mode 100644 database/engine/rrdenglocking.h delete mode 100644 docs/Netdata-Security-and-Disclosure-Information.md create mode 100644 health/health.d/dbengine.conf create mode 100644 health/health.d/dnsmasq_dhcp.conf create mode 100644 health/health.d/pihole.conf create mode 100644 health/health.d/processes.conf create mode 100644 health/health.d/riakkv.conf create mode 100644 health/health.d/wmi.conf create mode 100644 libnetdata/health/Makefile.am create mode 100644 libnetdata/health/health.c create mode 100644 libnetdata/health/health.h create mode 100644 libnetdata/json/Makefile.am create mode 100644 libnetdata/json/README.md create mode 100644 libnetdata/json/jsmn.c create mode 100644 libnetdata/json/jsmn.h create mode 100644 libnetdata/json/json.c create mode 100644 libnetdata/json/json.h create mode 100644 libnetdata/socket/security.c create mode 100644 libnetdata/socket/security.h mode change 100644 => 100755 packaging/docker/run.sh create mode 100644 tests/health_mgmtapi/expected_list/ALARM_CPU_IOWAIT-list.json create mode 100644 tests/health_mgmtapi/expected_list/ALARM_CPU_USAGE-list.json create mode 100644 tests/health_mgmtapi/expected_list/CONTEXT_SYSTEM_CPU-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_ALL-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_ALL_ERROR-list.json create mode 100644 tests/health_mgmtapi/expected_list/DISABLE_SYSTEM_LOAD-list.json create mode 100644 tests/health_mgmtapi/expected_list/FAMILIES_LOAD-list.json create mode 100644 tests/health_mgmtapi/expected_list/HOSTS-list.json create mode 100644 tests/health_mgmtapi/expected_list/RESET-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_2-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_3-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json create mode 100644 tests/health_mgmtapi/expected_list/SILENCE_ALL-list.json delete mode 100644 tests/health_mgmtapi/python-example.conf create mode 100644 web/gui/console.html delete mode 100644 web/gui/console/index.html create mode 100644 web/gui/images/favicon-128.png create mode 100644 web/gui/images/favicon-196x196.png create mode 100644 web/gui/images/ms-icon-310x150.png create mode 100644 web/gui/images/ms-icon-36x36.png delete mode 100644 web/gui/static/static/img/netdata-logomark.svg diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..45ec5156a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.c diff=cpp +*.h diff=cpp diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08765d5c1..7d5dfa8b4 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,40 +7,40 @@ # Ownership by directory structure .travis/ @paulkatsoulakis @cakrit .github/ @paulkatsoulakis @cakrit -backends/ @ktsaou @vlvkobal -backends/graphite/ @ktsaou @vlvkobal -backends/json/ @ktsaou @vlvkobal -backends/opentsdb/ @ktsaou @vlvkobal -backends/prometheus/ @ktsaou @vlvkobal @paulkatsoulakis +backends/ @thiagoftsm @vlvkobal +backends/graphite/ @thiagoftsm @vlvkobal +backends/json/ @thiagoftsm @vlvkobal +backends/opentsdb/ @thiagoftsm @vlvkobal +backends/prometheus/ @vlvkobal @paulkatsoulakis @thiagoftsm build/ @paulkatsoulakis @cakrit -collectors/ @ktsaou @vlvkobal @cakrit -collectors/charts.d.plugin/ @ktsaou @paulkatsoulakis @cakrit +collectors/ @vlvkobal @cakrit +collectors/charts.d.plugin/ @paulkatsoulakis @cakrit collectors/freebsd.plugin/ @vlvkobal @cakrit collectors/macos.plugin/ @vlvkobal @cakrit -collectors/node.d.plugin/ @ktsaou @gmosx @cakrit -collectors/node.d.plugin/fronius/ @ktsaou @gmosx @ccremer @cakrit -collectors/node.d.plugin/snmp/ @ktsaou @gmosx @cakrit -collectors/node.d.plugin/stiebeleltron/ @ktsaou @gmosx @ccremer @cakrit +collectors/node.d.plugin/ @gmosx @cakrit +collectors/node.d.plugin/fronius/ @ccremer @cakrit +collectors/node.d.plugin/snmp/ @gmosx @cakrit +collectors/node.d.plugin/stiebeleltron/ @ccremer @cakrit collectors/python.d.plugin/ @ilyam8 -collectors/cups.plugin/ @simonnagl @ktsaou @vlvkobal @cakrit -daemon/ @ktsaou @mfundul @cakrit -database/ @ktsaou @mfundul +collectors/cups.plugin/ @simonnagl @vlvkobal @cakrit +daemon/ @thiagoftsm @mfundul @cakrit +database/ @cakrit @mfundul docs/ @cakrit -health/ @ktsaou @cakrit -health/health.d/ @ktsaou @cakrit -health/notifications/ @ktsaou @Ferroin @cakrit -libnetdata/ @ktsaou @cakrit +health/ @thiagoftsm @cakrit +health/health.d/ @thiagoftsm @cakrit +health/notifications/ @Ferroin @cakrit +libnetdata/ @thiagofsm @cakrit packaging/ @paulkatsoulakis @cakrit -packaging/installer/ @ktsaou @paulkatsoulakis @cakrit -packaging/makeself/ @ktsaou @paulkatsoulakis @cakrit -registry/ @ktsaou @gmosx @cakrit -streaming/ @ktsaou @mfundul -web/ @ktsaou @cakrit -web/gui/ @ktsaou @gmosx @cakrit +packaging/installer/ @paulkatsoulakis @cakrit +packaging/makeself/ @paulkatsoulakis @cakrit +registry/ @gmosx @cakrit +streaming/ @cakrit @thiagoftsm +web/ @thiagoftsm @cakrit +web/gui/ @gmosx @cakrit # Ownership by filetype (overwrites ownership by directory) -*.md @ktsaou @cakrit -*.am @paulkatsoulakis @ktsaou +*.md @cakrit +*.am @paulkatsoulakis # Ownership of specific files .gitignore @paulkatsoulakis @cakrit @@ -52,10 +52,10 @@ web/gui/ @ktsaou @gmosx @cakrit .codeclimate.yml @paulkatsoulakis .codacy.yml @paulkatsoulakis netdata.spec.in @paulkatsoulakis -netdata-installer.sh @ktsaou @paulkatsoulakis @cakrit +netdata-installer.sh @paulkatsoulakis @cakrit netlify.toml @cakrit package.json @gmosx packaging/version @netdatabot -LICENSE.md @ktsaou +LICENSE.md @cakrit CHANGELOG.md @netdatabot diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4fe94ad65..bd939baba 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,3 +1,8 @@ +--- +about: General issue template +labels: "needs triage", "no changelog" +--- + len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:", - page_cache_descr->page, uuid_str, - page_cache_descr->page_length, - (uint64_t)page_cache_descr->start_time, - (uint64_t)page_cache_descr->end_time); - if (!page_cache_descr->extent) { - pos += snprintfz(str + pos, 512 - pos, "N/A"); + pg_cache_descr->page, uuid_str, + descr->page_length, + (uint64_t)descr->start_time, + (uint64_t)descr->end_time); + if (!descr->extent) { + pos += snprintfz(str + pos, BUFSIZE - pos, "N/A"); + } else { + pos += snprintfz(str + pos, BUFSIZE - pos, "%"PRIu64, descr->extent->offset); + } + + snprintfz(str + pos, BUFSIZE - pos, " flags:0x%2.2lX refcnt:%u\n\n", pg_cache_descr->flags, pg_cache_descr->refcnt); + debug(D_RRDENGINE, "%s", str); +} + +void print_page_descr(struct rrdeng_page_descr *descr) +{ + char uuid_str[UUID_STR_LEN]; + char str[BUFSIZE]; + int pos = 0; + + uuid_unparse_lower(*descr->id, uuid_str); + pos += snprintfz(str, BUFSIZE - pos, "id=%s\n" + "--->len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:", + uuid_str, + descr->page_length, + (uint64_t)descr->start_time, + (uint64_t)descr->end_time); + if (!descr->extent) { + pos += snprintfz(str + pos, BUFSIZE - pos, "N/A"); } else { - pos += snprintfz(str + pos, 512 - pos, "%"PRIu64, page_cache_descr->extent->offset); + pos += snprintfz(str + pos, BUFSIZE - pos, "%"PRIu64, descr->extent->offset); } - snprintfz(str + pos, 512 - pos, " flags:0x%2.2lX refcnt:%u\n\n", page_cache_descr->flags, page_cache_descr->refcnt); + snprintfz(str + pos, BUFSIZE - pos, "\n\n"); fputs(str, stderr); } @@ -51,6 +78,48 @@ int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size) return 0; } +/* + * Tries to open a file in direct I/O mode, falls back to buffered mode if not possible. + * Returns UV error number that is < 0 on failure. + * On success sets (*file) to be the uv_file that was opened. + */ +int open_file_direct_io(char *path, int flags, uv_file *file) +{ + uv_fs_t req; + int fd, current_flags, direct; + + for (direct = 1 ; direct >= 0 ; --direct) { +#ifdef __APPLE__ + /* Apple OS does not support O_DIRECT */ + direct = 0; +#endif + current_flags = flags; + if (direct) { + current_flags |= O_DIRECT; + } + fd = uv_fs_open(NULL, &req, path, current_flags, S_IRUSR | S_IWUSR, NULL); + if (fd < 0) { + if ((direct) && (UV_EINVAL == fd)) { + error("File \"%s\" does not support direct I/O, falling back to buffered I/O.", path); + } else { + error("Failed to open file \"%s\".", path); + --direct; /* break the loop */ + } + } else { + assert(req.result >= 0); + *file = req.result; +#ifdef __APPLE__ + info("Disabling OS X caching for file \"%s\".", path); + fcntl(fd, F_NOCACHE, 1); +#endif + --direct; /* break the loop */ + } + uv_fs_req_cleanup(&req); + } + + return fd; +} + char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size) { struct page_cache *pg_cache; @@ -60,6 +129,7 @@ char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t si "metric_API_producers: %ld\n" "metric_API_consumers: %ld\n" "page_cache_total_pages: %ld\n" + "page_cache_descriptors: %ld\n" "page_cache_populated_pages: %ld\n" "page_cache_commited_pages: %ld\n" "page_cache_insertions: %ld\n" @@ -87,6 +157,7 @@ char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t si (long)ctx->stats.metric_API_producers, (long)ctx->stats.metric_API_consumers, (long)pg_cache->page_descriptors, + (long)ctx->stats.page_cache_descriptors, (long)pg_cache->populated_pages, (long)pg_cache->commited_page_index.nr_commited_pages, (long)ctx->stats.pg_cache_insertions, diff --git a/database/engine/rrdenginelib.h b/database/engine/rrdenginelib.h index bb6f072bf..36d414e89 100644 --- a/database/engine/rrdenginelib.h +++ b/database/engine/rrdenginelib.h @@ -6,11 +6,17 @@ #include "rrdengine.h" /* Forward declarations */ -struct rrdeng_page_cache_descr; +struct rrdeng_page_descr; #define STR_HELPER(x) #x #define STR(x) STR_HELPER(x) +#define BITS_PER_ULONG (sizeof(unsigned long) * 8) + +#ifndef UUID_STR_LEN +#define UUID_STR_LEN (37) +#endif + /* Taken from linux kernel */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -25,6 +31,15 @@ typedef uintptr_t rrdeng_stats_t; #define rrd_stat_atomic_add(p, n) do {(void) __sync_fetch_and_add(p, n);} while(0) #endif +#define RRDENG_PATH_MAX (4096) + +/* returns old *ptr value */ +static inline unsigned long ulong_compare_and_swap(volatile unsigned long *ptr, + unsigned long oldval, unsigned long newval) +{ + return __sync_val_compare_and_swap(ptr, oldval, newval); +} + #ifndef O_DIRECT /* Workaround for OS X */ #define O_DIRECT (0) @@ -77,8 +92,10 @@ static inline void crc32set(void *crcp, uLong crc) *(uint32_t *)crcp = crc; } -extern void print_page_cache_descr(struct rrdeng_page_cache_descr *page_cache_descr); +extern void print_page_cache_descr(struct rrdeng_page_descr *page_cache_descr); +extern void print_page_descr(struct rrdeng_page_descr *descr); extern int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size); +extern int open_file_direct_io(char *path, int flags, uv_file *file); extern char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size); #endif /* NETDATA_RRDENGINELIB_H */ \ No newline at end of file diff --git a/database/engine/rrdenglocking.c b/database/engine/rrdenglocking.c new file mode 100644 index 000000000..0eb9019b4 --- /dev/null +++ b/database/engine/rrdenglocking.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#include "rrdengine.h" + +struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx) +{ + struct page_cache_descr *pg_cache_descr; + + pg_cache_descr = mallocz(sizeof(*pg_cache_descr)); + rrd_stat_atomic_add(&ctx->stats.page_cache_descriptors, 1); + pg_cache_descr->page = NULL; + pg_cache_descr->flags = 0; + pg_cache_descr->prev = pg_cache_descr->next = NULL; + pg_cache_descr->refcnt = 0; + pg_cache_descr->waiters = 0; + assert(0 == uv_cond_init(&pg_cache_descr->cond)); + assert(0 == uv_mutex_init(&pg_cache_descr->mutex)); + + return pg_cache_descr; +} + +void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr) +{ + uv_cond_destroy(&pg_cache_descr->cond); + uv_mutex_destroy(&pg_cache_descr->mutex); + freez(pg_cache_descr); + rrd_stat_atomic_add(&ctx->stats.page_cache_descriptors, -1); +} + +/* also allocates page cache descriptor if missing */ +void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, old_users, new_state, ret_state; + struct page_cache_descr *pg_cache_descr = NULL; + uint8_t we_locked; + + we_locked = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(we_locked)) { + assert(old_state & PG_CACHE_DESCR_LOCKED); + new_state = (1 << PG_CACHE_DESCR_SHIFT) | PG_CACHE_DESCR_ALLOCATED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + assert(0 == old_users); + continue; /* spin */ + } + if (0 == old_state) { + /* no page cache descriptor has been allocated */ + + if (NULL == pg_cache_descr) { + pg_cache_descr = rrdeng_create_pg_cache_descr(ctx); + } + new_state = PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, 0, new_state); + if (0 == ret_state) { + we_locked = 1; + descr->pg_cache_descr = pg_cache_descr; + pg_cache_descr->descr = descr; + pg_cache_descr = NULL; /* make sure we don't free pg_cache_descr */ + /* retry */ + continue; + } + continue; /* spin */ + } + /* page cache descriptor is already allocated */ + assert(old_state & PG_CACHE_DESCR_ALLOCATED); + + new_state = (old_users + 1) << PG_CACHE_DESCR_SHIFT; + new_state |= old_state & PG_CACHE_DESCR_FLAGS_MASK; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + /* spin */ + } + + if (pg_cache_descr) { + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + } + pg_cache_descr = descr->pg_cache_descr; + uv_mutex_lock(&pg_cache_descr->mutex); +} + +void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, new_state, ret_state, old_users; + struct page_cache_descr *pg_cache_descr; + uint8_t we_locked; + + uv_mutex_unlock(&descr->pg_cache_descr->mutex); + + we_locked = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(we_locked)) { + assert(0 == old_users); + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, 0); + if (old_state == ret_state) { + /* success */ + break; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + assert(0 == old_users); + continue; /* spin */ + } + assert(old_state & PG_CACHE_DESCR_ALLOCATED); + pg_cache_descr = descr->pg_cache_descr; + /* caller is the only page cache descriptor user and there are no pending references on the page */ + if ((old_state & PG_CACHE_DESCR_DESTROY) && (1 == old_users) && + !pg_cache_descr->flags && !pg_cache_descr->refcnt) { + new_state = PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + we_locked = 1; + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + /* retry */ + continue; + } + continue; /* spin */ + } + assert(old_users > 0); + new_state = (old_users - 1) << PG_CACHE_DESCR_SHIFT; + new_state |= old_state & PG_CACHE_DESCR_FLAGS_MASK; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + break; + } + /* spin */ + } + +} + +/* + * Tries to deallocate page cache descriptor. If it fails, it postpones deallocation by setting the + * PG_CACHE_DESCR_DESTROY flag which will be eventually cleared by a different context after doing + * the deallocation. + */ +void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) +{ + unsigned long old_state, new_state, ret_state, old_users; + struct page_cache_descr *pg_cache_descr; + uint8_t just_locked, we_freed, must_unlock; + + just_locked = 0; + we_freed = 0; + must_unlock = 0; + while (1) { /* spin */ + old_state = descr->pg_cache_descr_state; + old_users = old_state >> PG_CACHE_DESCR_SHIFT; + + if (unlikely(just_locked)) { + assert(0 == old_users); + + must_unlock = 1; + just_locked = 0; + /* Try deallocate if there are no pending references on the page */ + if (!pg_cache_descr->flags && !pg_cache_descr->refcnt) { + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + we_freed = 1; + /* success */ + continue; + } + continue; /* spin */ + } + if (unlikely(must_unlock)) { + assert(0 == old_users); + + if (we_freed) { + /* success */ + new_state = 0; + } else { + new_state = old_state | PG_CACHE_DESCR_DESTROY; + new_state &= ~PG_CACHE_DESCR_LOCKED; + } + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* unlocked */ + return; + } + continue; /* spin */ + } + if (!(old_state & PG_CACHE_DESCR_ALLOCATED)) { + /* don't do anything */ + return; + } + if (old_state & PG_CACHE_DESCR_LOCKED) { + assert(0 == old_users); + continue; /* spin */ + } + pg_cache_descr = descr->pg_cache_descr; + /* caller is the only page cache descriptor user */ + if (0 == old_users) { + new_state = old_state | PG_CACHE_DESCR_LOCKED; + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + just_locked = 1; + /* retry */ + continue; + } + continue; /* spin */ + } + if (old_state & PG_CACHE_DESCR_DESTROY) { + /* don't do anything */ + return; + } + /* plant PG_CACHE_DESCR_DESTROY so that other contexts eventually free the page cache descriptor */ + new_state = old_state | PG_CACHE_DESCR_DESTROY; + + ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); + if (old_state == ret_state) { + /* success */ + return; + } + /* spin */ + } +} \ No newline at end of file diff --git a/database/engine/rrdenglocking.h b/database/engine/rrdenglocking.h new file mode 100644 index 000000000..127ddc90c --- /dev/null +++ b/database/engine/rrdenglocking.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDENGLOCKING_H +#define NETDATA_RRDENGLOCKING_H + +#include "rrdengine.h" + +/* Forward declarations */ +struct page_cache_descr; + +extern struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx); +extern void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr); +extern void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +extern void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +extern void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); + +#endif /* NETDATA_RRDENGLOCKING_H */ \ No newline at end of file diff --git a/database/rrd.c b/database/rrd.c index 2457cac01..31ad3f07e 100644 --- a/database/rrd.c +++ b/database/rrd.c @@ -132,7 +132,6 @@ const char *rrdset_type_name(RRDSET_TYPE chart_type) { } } - // ---------------------------------------------------------------------------- // RRD - cache directory @@ -146,8 +145,7 @@ char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section snprintfz(n, FILENAME_MAX, "%s/%s", host->cache_dir, b); ret = config_get(config_section, "cache directory", n); - if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || - host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { int r = mkdir(ret, 0775); if(r != 0 && errno != EEXIST) error("Cannot create directory '%s'", ret); @@ -155,3 +153,4 @@ char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section return ret; } + diff --git a/database/rrd.h b/database/rrd.h index 3f57b9037..5b09c2dda 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -17,7 +17,7 @@ typedef struct alarm_entry ALARM_ENTRY; // forward declarations struct rrddim_volatile; #ifdef ENABLE_DBENGINE -struct rrdeng_page_cache_descr; +struct rrdeng_page_descr; struct rrdengine_instance; struct pg_cache_page_index; #endif @@ -246,10 +246,12 @@ union rrddim_collect_handle { } slotted; // state the legacy code uses #ifdef ENABLE_DBENGINE struct rrdeng_collect_handle { - struct rrdeng_page_cache_descr *descr, *prev_descr; + struct rrdeng_page_descr *descr, *prev_descr; unsigned long page_correlation_id; struct rrdengine_instance *ctx; struct pg_cache_page_index *page_index; + // set to 1 when this dimension is not page aligned with the other dimensions in the chart + uint8_t unaligned_page; } rrdeng; // state the database engine uses #endif }; @@ -268,7 +270,7 @@ struct rrddim_query_handle { } slotted; // state the legacy code uses #ifdef ENABLE_DBENGINE struct rrdeng_query_handle { - struct rrdeng_page_cache_descr *descr; + struct rrdeng_page_descr *descr; struct rrdengine_instance *ctx; struct pg_cache_page_index *page_index; time_t now; //TODO: remove now to implement next point iteration @@ -351,7 +353,7 @@ typedef enum rrdset_flags { RRDSET_FLAG_UPSTREAM_EXPOSED = 1 << 8, // if set, we have sent this chart definition to netdata master (streaming) RRDSET_FLAG_STORE_FIRST = 1 << 9, // if set, do not eliminate the first collection during interpolation RRDSET_FLAG_HETEROGENEOUS = 1 << 10, // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) - RRDSET_FLAG_HOMEGENEOUS_CHECK = 1 << 11, // if set, the chart should be checked to determine if the dimensions as homogeneous + RRDSET_FLAG_HOMOGENEOUS_CHECK = 1 << 11, // if set, the chart should be checked to determine if the dimensions are homogeneous RRDSET_FLAG_HIDDEN = 1 << 12, // if set, do not show this chart on the dashboard, but use it for backends RRDSET_FLAG_SYNC_CLOCK = 1 << 13, // if set, microseconds on next data collection will be ignored (the chart will be synced to now) RRDSET_FLAG_OBSOLETE_DIMENSIONS = 1 << 14 // this is marked by the collector/module when a chart has obsolete dimensions @@ -431,7 +433,9 @@ struct rrdset { char *plugin_name; // the name of the plugin that generated this char *module_name; // the name of the plugin module that generated this - size_t unused[6]; + size_t unused[5]; + + size_t rrddim_page_alignment; // keeps metric pages in alignment when using dbengine uint32_t hash; // a simple hash on the id, to speed up searching // we first compare hashes, and only if the hashes are equal we do string comparisons @@ -568,6 +572,8 @@ struct alarm_entry { uint32_t updated_by_id; uint32_t updates_id; + time_t last_repeat; + struct alarm_entry *next; }; @@ -682,11 +688,16 @@ struct rrdhost { char *health_log_filename; // the alarms event log filename size_t health_log_entries_written; // the number of alarm events writtern to the alarms event log FILE *health_log_fp; // the FILE pointer to the open alarms event log file + uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications + uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications + // all RRDCALCs are primarily allocated and linked here // RRDCALCs may be linked to charts at any point // (charts may or may not exist when these are loaded) RRDCALC *alarms; + avl_tree_lock alarms_idx_health_log; + avl_tree_lock alarms_idx_name; ALARM_LOG health_log; // alarms historical events (event log) uint32_t health_last_processed_id; // the last processed health id from the log @@ -723,6 +734,10 @@ struct rrdhost { struct rrdengine_instance *rrdeng_ctx; // DB engine instance for this host #endif +#ifdef ENABLE_HTTPS + struct netdata_ssl ssl; //Structure used to encrypt the connection +#endif + struct rrdhost *next; }; extern RRDHOST *localhost; @@ -781,7 +796,6 @@ extern RRDHOST *rrdhost_find_or_create( ); extern int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value); -extern struct rrdhost_system_info *rrdhost_system_info_dup(struct rrdhost_system_info *system_info); #if defined(NETDATA_INTERNAL_CHECKS) && defined(NETDATA_VERIFY_LOCKS) extern void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); @@ -1014,6 +1028,12 @@ extern collected_number rrddim_set(RRDSET *st, const char *id, collected_number extern long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries); +// ---------------------------------------------------------------------------- +// Miscellaneous functions + +extern int alarm_compare_id(void *a, void *b); +extern int alarm_compare_name(void *a, void *b); + // ---------------------------------------------------------------------------- // RRD internal functions diff --git a/database/rrdcalc.c b/database/rrdcalc.c index 7f6a896b6..908fc2ebf 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -81,9 +81,9 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { if(!rc->units) rc->units = strdupz(st->units); - { + if(!rrdcalc_isrepeating(rc)) { time_t now = now_realtime_sec(); - health_alarm_log( + ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, rc->next_event_id++, @@ -104,6 +104,7 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { 0, 0 ); + health_alarm_log(host, ae); } } @@ -142,9 +143,9 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { RRDHOST *host = st->rrdhost; - { + if(!rrdcalc_isrepeating(rc)) { time_t now = now_realtime_sec(); - health_alarm_log( + ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, rc->next_event_id++, @@ -165,6 +166,7 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { 0, 0 ); + health_alarm_log(host, ae); } debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); @@ -253,7 +255,7 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch return host->health_log.next_alarm_id++; } -inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { +inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { rrdhost_check_rdlock(host); if(rc->calculation) { @@ -301,8 +303,7 @@ inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { } } -inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { - +inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); if(rrdcalc_exists(host, chart, rt->name, 0, 0)) @@ -328,6 +329,10 @@ inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *c rc->delay_max_duration = rt->delay_max_duration; rc->delay_multiplier = rt->delay_multiplier; + rc->last_repeat = 0; + rc->warn_repeat_every = rt->warn_repeat_every; + rc->crit_repeat_every = rt->crit_repeat_every; + rc->group = rt->group; rc->after = rt->after; rc->before = rt->before; @@ -356,7 +361,7 @@ inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *c error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); } - debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", (rc->chart)?rc->chart:"NOCHART", rc->name, (rc->exec)?rc->exec:"DEFAULT", @@ -376,16 +381,24 @@ inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *c rc->delay_up_duration, rc->delay_down_duration, rc->delay_max_duration, - rc->delay_multiplier + rc->delay_multiplier, + rc->warn_repeat_every, + rc->crit_repeat_every ); - rrdcalc_create_part2(host, rc); + rrdcalc_add_to_host(host, rc); + RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc); + if (rdcmp != rc) { + error("Cannot insert the alarm index ID %s",rc->name); + } + return rc; } void rrdcalc_free(RRDCALC *rc) { if(unlikely(!rc)) return; + expression_free(rc->calculation); expression_free(rc->warning); expression_free(rc->critical); @@ -413,7 +426,6 @@ void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { // unlink it from RRDHOST if(unlikely(rc == host->alarms)) host->alarms = rc->next; - else { RRDCALC *t; for(t = host->alarms; t && t->next != rc; t = t->next) ; @@ -425,5 +437,79 @@ void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); } + if (rc) { + RRDCALC *rdcmp = (RRDCALC *) avl_search_lock(&(host)->alarms_idx_health_log, (avl *)rc); + if (rdcmp) { + rdcmp = (RRDCALC *) avl_remove_lock(&(host)->alarms_idx_health_log, (avl *)rc); + if (!rdcmp) { + error("Cannot remove the health alarm index from health_log"); + } + } + + rdcmp = (RRDCALC *) avl_search_lock(&(host)->alarms_idx_name, (avl *)rc); + if (rdcmp) { + rdcmp = (RRDCALC *) avl_remove_lock(&(host)->alarms_idx_name, (avl *)rc); + if (!rdcmp) { + error("Cannot remove the health alarm index from idx_name"); + } + } + } + rrdcalc_free(rc); } + +// ---------------------------------------------------------------------------- +// Alarm + + +/** + * Alarm is repeating + * + * Is this alarm repeating ? + * + * @param host The structure that has the binary tree + * @param alarm_id the id of the alarm to search + * + * @return It returns 1 case it is repeating and 0 otherwise + */ +int alarm_isrepeating(RRDHOST *host, uint32_t alarm_id) { + RRDCALC findme; + findme.id = alarm_id; + RRDCALC *rc = (RRDCALC *)avl_search_lock(&host->alarms_idx_health_log, (avl *)&findme); + if (!rc) { + return 0; + } + return rrdcalc_isrepeating(rc); +} + +/** + * Entry is repeating + * + * Check whether the id of alarm entry is yet present in the host structure + * + * @param host The structure that has the binary tree + * @param ae the alarm entry + * + * @return It returns 1 case it is repeating and 0 otherwise + */ +int alarm_entry_isrepeating(RRDHOST *host, ALARM_ENTRY *ae) { + return alarm_isrepeating(host, ae->alarm_id); +} + +/** + * Max last repeat + * + * Check the maximum last_repeat for the alarms associated a host + * + * @param host The structure that has the binary tree + * + * @return It returns 1 case it is repeating and 0 otherwise + */ +RRDCALC *alarm_max_last_repeat(RRDHOST *host, char *alarm_name,uint32_t hash) { + RRDCALC findme; + findme.name = alarm_name; + findme.hash = hash; + RRDCALC *rc = (RRDCALC *)avl_search_lock(&host->alarms_idx_name, (avl *)&findme); + + return rc; +} diff --git a/database/rrdcalc.h b/database/rrdcalc.h index 4df4381ae..3400f711c 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -29,7 +29,9 @@ #define RRDCALC_FLAG_SILENCED 0x00000100 #define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 + struct rrdcalc { + avl avl; // the index, with key the id - this has to be first! uint32_t id; // the unique id of this alarm uint32_t next_event_id; // the next event id that will be used for this alarm @@ -77,9 +79,16 @@ struct rrdcalc { float delay_multiplier; // multiplier for all delays when alarms switch status // while now < delay_up_to + // ------------------------------------------------------------------------ + // notification repeat settings + + uint32_t warn_repeat_every; // interval between repeating warning notifications + uint32_t crit_repeat_every; // interval between repeating critical notifications + // ------------------------------------------------------------------------ // runtime information + RRDCALC_STATUS old_status; // the old status of the alarm RRDCALC_STATUS status; // the current status of the alarm calculated_number value; // the current value of the alarm @@ -90,6 +99,7 @@ struct rrdcalc { time_t last_updated; // the last update timestamp of the alarm time_t next_update; // the next update timestamp of the alarm time_t last_status_change; // the timestamp of the last time this alarm changed status + time_t last_repeat; // the last time the alarm got repeated time_t db_after; // the first timestamp evaluated by the db lookup time_t db_before; // the last timestamp evaluated by the db lookup @@ -119,6 +129,10 @@ struct rrdcalc { struct rrdcalc *next; }; +extern int alarm_isrepeating(RRDHOST *host, uint32_t alarm_id); +extern int alarm_entry_isrepeating(RRDHOST *host, ALARM_ENTRY *ae); +extern RRDCALC *alarm_max_last_repeat(RRDHOST *host, char *alarm_name, uint32_t hash); + #define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after) extern void rrdsetcalc_link_matching(RRDSET *st); @@ -132,7 +146,14 @@ extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc); extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name); extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id); -extern RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); -extern void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc); +extern RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); +extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc); + +static inline int rrdcalc_isrepeating(RRDCALC *rc) { + if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) { + return 1; + } + return 0; +} #endif //NETDATA_RRDCALC_H diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c index ba7e7ec94..f2b9767c6 100644 --- a/database/rrdcalctemplate.c +++ b/database/rrdcalctemplate.c @@ -13,7 +13,7 @@ void rrdcalctemplate_link_matching(RRDSET *st) { for(rt = host->templates; rt ; rt = rt->next) { if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context) && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) { - RRDCALC *rc = rrdcalc_create(host, rt, st->id); + RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id); if(unlikely(!rc)) info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h index b8996bc14..92bb4138e 100644 --- a/database/rrdcalctemplate.h +++ b/database/rrdcalctemplate.h @@ -48,6 +48,12 @@ struct rrdcalctemplate { int delay_max_duration; // the absolute max delay to apply to this alarm float delay_multiplier; // multiplier for all delays when alarms switch status + // ------------------------------------------------------------------------ + // notification repeat settings + + uint32_t warn_repeat_every; // interval between repeating warning notifications + uint32_t crit_repeat_every; // interval between repeating critical notifications + // ------------------------------------------------------------------------ // expressions related to the alarm diff --git a/database/rrddim.c b/database/rrddim.c index 0cf6734a6..088c80d0b 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -60,7 +60,7 @@ inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", st->id, rd->name, rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); rd->algorithm = algorithm; rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); return 1; } @@ -72,7 +72,7 @@ inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multip debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->multiplier, multiplier); rd->multiplier = multiplier; rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); return 1; } @@ -84,7 +84,7 @@ inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->divisor, divisor); rd->divisor = divisor; rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); return 1; } diff --git a/database/rrdhost.c b/database/rrdhost.c index c552c6c39..d6252d206 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -147,6 +147,10 @@ RRDHOST *rrdhost_create(const char *hostname, host->rrdpush_sender_pipe[0] = -1; host->rrdpush_sender_pipe[1] = -1; host->rrdpush_sender_socket = -1; +#ifdef ENABLE_HTTPS + host->ssl.conn = NULL; + host->ssl.flags = NETDATA_SSL_START; +#endif netdata_mutex_init(&host->rrdpush_sender_buffer_mutex); netdata_rwlock_init(&host->rrdhost_rwlock); @@ -162,7 +166,7 @@ RRDHOST *rrdhost_create(const char *hostname, host->program_version = strdupz((program_version && *program_version)?program_version:"unknown"); host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); - host->system_info = rrdhost_system_info_dup(system_info); + host->system_info = system_info; avl_init_lock(&(host->rrdset_root_index), rrdset_compare); avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name); @@ -175,6 +179,10 @@ RRDHOST *rrdhost_create(const char *hostname, if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost) rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST); + host->health_default_warn_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never"); + host->health_default_crit_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never"); + avl_init_lock(&(host->alarms_idx_health_log), alarm_compare_id); + avl_init_lock(&(host->alarms_idx_name), alarm_compare_name); // ------------------------------------------------------------------------ // initialize health variables @@ -270,12 +278,12 @@ RRDHOST *rrdhost_create(const char *hostname, // load health configuration if(host->health_enabled) { - health_alarm_log_load(host); - health_alarm_log_open(host); - rrdhost_wrlock(host); health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL); rrdhost_unlock(host); + + health_alarm_log_load(host); + health_alarm_log_open(host); } @@ -812,81 +820,103 @@ restart_after_removal: // RRDHOST - set system info from environment variables int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) { + int res = 0; + if(!strcmp(name, "NETDATA_SYSTEM_OS_NAME")){ + freez(system_info->os_name); system_info->os_name = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_OS_ID")){ + freez(system_info->os_id); system_info->os_id = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_OS_ID_LIKE")){ + freez(system_info->os_id_like); system_info->os_id_like = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_OS_VERSION")){ + freez(system_info->os_version); system_info->os_version = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_OS_VERSION_ID")){ + freez(system_info->os_version_id); system_info->os_version_id = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION")){ + freez(system_info->os_detection); system_info->os_detection = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){ + freez(system_info->kernel_name); system_info->kernel_name = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){ + freez(system_info->kernel_version); system_info->kernel_version = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){ + freez(system_info->architecture); system_info->architecture = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){ + freez(system_info->virtualization); system_info->virtualization = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){ + freez(system_info->virt_detection); system_info->virt_detection = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){ + freez(system_info->container); system_info->container = strdupz(value); } else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){ + freez(system_info->container_detection); system_info->container_detection = strdupz(value); } - else return 1; + else { + res = 1; + } - return 0; + return res; } -struct rrdhost_system_info *rrdhost_system_info_dup(struct rrdhost_system_info *system_info) { - struct rrdhost_system_info *ret = callocz(1, sizeof(struct rrdhost_system_info)); +/** + * Alarm Compare ID + * + * Callback function used with the binary trees to compare the id of RRDCALC + * + * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree + * @param b the pointer to the binary tree. + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. + */ +int alarm_compare_id(void *a, void *b) { + register uint32_t hash1 = ((RRDCALC *)a)->id; + register uint32_t hash2 = ((RRDCALC *)b)->id; + + if(hash1 < hash2) return -1; + else if(hash1 > hash2) return 1; - if(likely(system_info)) { - if(system_info->os_name) - ret->os_name = strdupz(system_info->os_name); - if(system_info->os_id) - ret->os_id = strdupz(system_info->os_id); - if(system_info->os_id_like) - ret->os_id_like = strdupz(system_info->os_id_like); - if(system_info->os_version) - ret->os_version = strdupz(system_info->os_version); - if(system_info->os_version_id) - ret->os_version_id = strdupz(system_info->os_version_id); - if(system_info->os_detection) - ret->os_detection = strdupz(system_info->os_detection); - if(system_info->kernel_name) - ret->kernel_name = strdupz(system_info->kernel_name); - if(system_info->kernel_version) - ret->kernel_version = strdupz(system_info->kernel_version); - if(system_info->architecture) - ret->architecture = strdupz(system_info->architecture); - if(system_info->virtualization) - ret->virtualization = strdupz(system_info->virtualization); - if(system_info->virt_detection) - ret->virt_detection = strdupz(system_info->virt_detection); - if(system_info->container) - ret->container = strdupz(system_info->container); - if(system_info->container_detection) - ret->container_detection = strdupz(system_info->container_detection); - } - - return ret; + return 0; +} + +/** + * Alarm Compare NAME + * + * Callback function used with the binary trees to compare the name of RRDCALC + * + * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree + * @param b the pointer to the binary tree. + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. + */ +int alarm_compare_name(void *a, void *b) { + RRDCALC *in1 = (RRDCALC *)a; + RRDCALC *in2 = (RRDCALC *)b; + + if(in1->hash < in2->hash) return -1; + else if(in1->hash > in2->hash) return 1; + + return strcmp(in1->name,in2->name); } diff --git a/database/rrdset.c b/database/rrdset.c index 689591468..f8962b2fb 100644 --- a/database/rrdset.c +++ b/database/rrdset.c @@ -210,7 +210,7 @@ inline void rrdset_update_heterogeneous_flag(RRDSET *st) { RRDDIM *rd; - rrdset_flag_clear(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_clear(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); RRD_ALGORITHM algorithm = st->dimensions->algorithm; collected_number multiplier = abs(st->dimensions->multiplier); @@ -251,6 +251,7 @@ void rrdset_reset(RRDSET *st) { st->current_entry = 0; st->counter = 0; st->counter_done = 0; + st->rrddim_page_alignment = 0; RRDDIM *rd; rrddim_foreach_read(rd, st) { @@ -258,6 +259,11 @@ void rrdset_reset(RRDSET *st) { rd->last_collected_time.tv_usec = 0; rd->collections_counter = 0; // memset(rd->values, 0, rd->entries * sizeof(storage_number)); +#ifdef ENABLE_DBENGINE + if (RRD_MEMORY_MODE_DBENGINE == st->rrd_memory_mode) { + rrdeng_store_metric_flush_current_page(rd); + } +#endif } } @@ -505,6 +511,12 @@ RRDSET *rrdset_create_custom( if(st) { rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + if(unlikely(name)) + rrdset_set_name(st, name); + else + rrdset_set_name(st, id); + return st; } @@ -613,7 +625,7 @@ RRDSET *rrdset_create_custom( memset(st, 0, size); } else if((now - st->last_updated.tv_sec) > update_every * entries) { - error("File %s is too old. Clearing it.", fullfilename); + info("File %s is too old. Clearing it.", fullfilename); memset(st, 0, size); } else if(st->last_updated.tv_sec > now + update_every) { @@ -702,6 +714,7 @@ RRDSET *rrdset_create_custom( st->last_collected_time.tv_sec = 0; st->last_collected_time.tv_usec = 0; st->counter_done = 0; + st->rrddim_page_alignment = 0; st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); @@ -1273,6 +1286,22 @@ void rrdset_done(RRDSET *st) { first_entry = 1; } +#ifdef ENABLE_DBENGINE + // check if we will re-write the entire page + if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && + dt_usec(&st->last_collected_time, &st->last_updated) > (RRDENG_BLOCK_SIZE / sizeof(storage_number)) * update_every_ut)) { + info("%s: too old data (last updated at %ld.%ld, last collected at %ld.%ld). Resetting it. Will not store the next entry.", st->name, st->last_updated.tv_sec, st->last_updated.tv_usec, st->last_collected_time.tv_sec, st->last_collected_time.tv_usec); + rrdset_reset(st); + rrdset_init_last_updated_time(st); + + st->usec_since_last_update = update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } +#endif + // these are the 3 variables that will help us in interpolation // last_stored_ut = the last time we added a value to the storage // now_collect_ut = the time the current value has been collected diff --git a/docs/Add-more-charts-to-netdata.md b/docs/Add-more-charts-to-netdata.md index 382cd8d3a..285713b02 100644 --- a/docs/Add-more-charts-to-netdata.md +++ b/docs/Add-more-charts-to-netdata.md @@ -1,8 +1,8 @@ -# Add more charts to netdata +# Add more charts to Netdata -netdata collects system metrics by itself. It has many [internal plugins](../collectors) for collecting most of the metrics presented by default when it starts, collecting data from `/proc`, `/sys` and other Linux kernel sources. +Netdata collects system metrics by itself. It has many [internal plugins](../collectors) for collecting most of the metrics presented by default when it starts, collecting data from `/proc`, `/sys` and other Linux kernel sources. -To collect non-system metrics, netdata supports a plugin architecture. The following are the currently available external plugins: +To collect non-system metrics, Netdata supports a plugin architecture. The following are the currently available external plugins: - **[Web Servers](#web-servers)**, such as apache, nginx, nginx_plus, tomcat, litespeed - **[Web Logs](#web-log-parsers)**, such as apache, nginx, lighttpd, gunicorn, squid access logs, apache cache.log @@ -39,16 +39,16 @@ Check also [Third Party Plugins](Third-Party-Plugins.md) for a list of plugins d ## configuring plugins -netdata comes with **internal** and **external** plugins: +Netdata comes with **internal** and **external** plugins: -1. The **internal** ones are written in `C` and run as threads within the netdata daemon. -2. The **external** ones can be written in any computer language. The netdata daemon spawns these as processes (shown with `ps fax`) and reads their metrics using pipes (so the `stdout` of external plugins is connected to netdata for metrics collection and the `stderr` of external plugins is connected to `/var/log/netdata/error.log`). +1. The **internal** ones are written in `C` and run as threads within the Netdata daemon. +2. The **external** ones can be written in any computer language. The Netdata daemon spawns these as processes (shown with `ps fax`) and reads their metrics using pipes (so the `stdout` of external plugins is connected to Netdata for metrics collection and the `stderr` of external plugins is connected to `/var/log/netdata/error.log`). -To make it easier to develop plugins, and minimize the number of threads and processes running, netdata supports **plugin orchestrators**, each of them supporting one or more data collection **modules**. Currently we ship plugin orchestrators for 4 languages: `C`, `python`, `node.js` and `bash` and 2 more are under development (`go` and `java`). +To make it easier to develop plugins, and minimize the number of threads and processes running, Netdata supports **plugin orchestrators**, each of them supporting one or more data collection **modules**. Currently we ship plugin orchestrators for 4 languages: `C`, `python`, `node.js` and `bash` and 2 more are under development (`go` and `java`). #### enabling and disabling plugins -To control which plugins netdata run, edit `netdata.conf` and check the `[plugins]` section. It looks like this: +To control which plugins Netdata run, edit `netdata.conf` and check the `[plugins]` section. It looks like this: ``` [plugins] @@ -69,6 +69,7 @@ To control which plugins netdata run, edit `netdata.conf` and check the `[plugin # charts.d = yes # apps = yes # xenstat = yes + # perf = no ``` The default for all plugins is the option `enable running new plugins`. So, setting this to `no` will disable all the plugins, except the ones specifically enabled. @@ -81,9 +82,9 @@ Each of the **plugins** may support one or more data collection **modules**. To Most **modules** come with **auto-detection**, configured to work out-of-the-box on popular operating systems with the default settings. -However, there are cases that auto-detection fails. Usually the reason is that the applications to be monitored do not allow netdata to connect. In most of the cases, allowing the user `netdata` from `localhost` to connect and collect metrics, will automatically enable data collection for the application in question (it will require a netdata restart). +However, there are cases that auto-detection fails. Usually the reason is that the applications to be monitored do not allow Netdata to connect. In most of the cases, allowing the user `netdata` from `localhost` to connect and collect metrics, will automatically enable data collection for the application in question (it will require a Netdata restart). -You can verify netdata **external plugins and their modules** are able to collect metrics, following this procedure: +You can verify Netdata **external plugins and their modules** are able to collect metrics, following this procedure: ```sh # become user netdata @@ -95,9 +96,9 @@ sudo su -s /bin/bash netdata ``` Similarly, you can use `charts.d.plugin` for BASH plugins and `node.d.plugin` for node.js plugins. -Other plugins (like `apps.plugin`, `freeipmi.plugin`, `fping.plugin`, `ioping.plugin`) use the native netdata plugin API and can be run directly. +Other plugins (like `apps.plugin`, `freeipmi.plugin`, `fping.plugin`, `ioping.plugin`, `nfacct.plugin`, `xenstat.plugin`, `perf.plugin`) use the native Netdata plugin API and can be run directly. -If you need to configure a netdata plugin or module, all user supplied configuration is kept at `/etc/netdata` while the stock versions of all files is at `/usr/lib/netdata/conf.d`. +If you need to configure a Netdata plugin or module, all user supplied configuration is kept at `/etc/netdata` while the stock versions of all files is at `/usr/lib/netdata/conf.d`. To copy a stock file and edit it, run `/etc/netdata/edit-config`. Running this command without an argument, will list the available stock files. Each file should provide plenty of examples and documentation about each module and plugin. @@ -116,6 +117,9 @@ plugin | language | plugin
configuration | modules
configuration | `fping.plugin`
(external plugin for collecting network latencies)|`C`|`fping.conf`|This plugin is a wrapper for the `fping` command. `ioping.plugin`
(external plugin for collecting disk latencies)|`C`|`ioping.conf`|This plugin is a wrapper for the `ioping` command. `freeipmi.plugin`
(external plugin for collecting IPMI h/w sensors)|`C`|`netdata.conf` section `[plugin:freeipmi]` +`nfacct.plugin`
(external plugin for monitoring netfilter firewall and connection tracker)|`C`|`netdata.conf` section `[plugin:nfacct]`|N/A +`xenstat.plugin`
(external plugin for monitoring XCP-ng and XenServer)|`C`|`netdata.conf` section `[plugin:xenstat]`|N/A +`perf.plugin`
(external plugin for monitoring CPU performance on Linux)|`C`|`netdata.conf` section `[plugin:perf]`|N/A `idlejitter.plugin`
(internal plugin for monitoring CPU jitter)|`C`|N/A|N/A `macos.plugin`
(internal plugin for monitoring MacOS system resources)|`C`|`netdata.conf` section `[plugin:macos]`|one section for each module `[plugin:macos:MODULE]`. Each module may provide additional sections in the form of `[plugin:macos:MODULE:SUBSECTION]`. `node.d.plugin`
(external plugin orchestrator of node.js modules)|`node.js`|`node.d.conf`|a file for each module in `/etc/netdata/node.d/`. @@ -139,17 +143,17 @@ These are all the data collection plugins currently available. application|language|notes| :---------:|:------:|:----| -apache|python
v2 or v3|Connects to multiple apache servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [apache.chart.py](../collectors/python.d.plugin/apache)
configuration file: [python.d/apache.conf](../collectors/python.d.plugin/apache)| -apache|BASH
Shell Script|Connects to an apache server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [apache.chart.sh](../collectors/charts.d.plugin/apache)
configuration file: [charts.d/apache.conf](../collectors/charts.d.plugin/apache)| -ipfs|python
v2 or v3|Connects to multiple ipfs servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ipfs.chart.py](../collectors/python.d.plugin/ipfs)
configuration file: [python.d/ipfs.conf](../collectors/python.d.plugin/ipfs)| -litespeed|python
v2 or v3|reads the litespeed `rtreport` files to collect metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [litespeed.chart.py](../collectors/python.d.plugin/litespeed)
configuration file: [python.d/litespeed.conf](../collectors/python.d.plugin/litespeed) -nginx|python
v2 or v3|Connects to multiple nginx servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nginx.chart.py](../collectors/python.d.plugin/nginx)
configuration file: [python.d/nginx.conf](../collectors/python.d.plugin/nginx)| -nginx_plus|python
v2 or v3|Connects to multiple nginx_plus servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nginx_plus.chart.py](../collectors/python.d.plugin/nginx_plus)
configuration file: [python.d/nginx_plus.conf](../collectors/python.d.plugin/nginx_plus)| -nginx|BASH
Shell Script|Connects to an nginx server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [nginx.chart.sh](../collectors/charts.d.plugin/nginx)
configuration file: [charts.d/nginx.conf](../collectors/charts.d.plugin/nginx)| -phpfpm|python
v2 or v3|Connects to multiple phpfpm servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [phpfpm.chart.py](../collectors/python.d.plugin/phpfpm)
configuration file: [python.d/phpfpm.conf](../collectors/python.d.plugin/phpfpm)| -phpfpm|BASH
Shell Script|Connects to one or more phpfpm servers (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [phpfpm.chart.sh](../collectors/charts.d.plugin/phpfpm)
configuration file: [charts.d/phpfpm.conf](../collectors/charts.d.plugin/phpfpm)| -tomcat|python
v2 or v3|Connects to multiple tomcat servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [tomcat.chart.py](../collectors/python.d.plugin/tomcat)
configuration file: [python.d/tomcat.conf](../collectors/python.d.plugin/tomcat)| -tomcat|BASH
Shell Script|Connects to a tomcat server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [tomcat.chart.sh](../collectors/charts.d.plugin/tomcat)
configuration file: [charts.d/tomcat.conf](../collectors/charts.d.plugin/tomcat)| +apache|python
v2 or v3|Connects to multiple apache servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [apache.chart.py](../collectors/python.d.plugin/apache)
configuration file: [python.d/apache.conf](../collectors/python.d.plugin/apache)| +apache|BASH
Shell Script|Connects to an apache server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [apache.chart.sh](../collectors/charts.d.plugin/apache)
configuration file: [charts.d/apache.conf](../collectors/charts.d.plugin/apache)| +ipfs|python
v2 or v3|Connects to multiple ipfs servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ipfs.chart.py](../collectors/python.d.plugin/ipfs)
configuration file: [python.d/ipfs.conf](../collectors/python.d.plugin/ipfs)| +litespeed|python
v2 or v3|reads the litespeed `rtreport` files to collect metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [litespeed.chart.py](../collectors/python.d.plugin/litespeed)
configuration file: [python.d/litespeed.conf](../collectors/python.d.plugin/litespeed) +nginx|python
v2 or v3|Connects to multiple nginx servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nginx.chart.py](../collectors/python.d.plugin/nginx)
configuration file: [python.d/nginx.conf](../collectors/python.d.plugin/nginx)| +nginx_plus|python
v2 or v3|Connects to multiple nginx_plus servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nginx_plus.chart.py](../collectors/python.d.plugin/nginx_plus)
configuration file: [python.d/nginx_plus.conf](../collectors/python.d.plugin/nginx_plus)| +nginx|BASH
Shell Script|Connects to an nginx server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [nginx.chart.sh](../collectors/charts.d.plugin/nginx)
configuration file: [charts.d/nginx.conf](../collectors/charts.d.plugin/nginx)| +phpfpm|python
v2 or v3|Connects to multiple phpfpm servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [phpfpm.chart.py](../collectors/python.d.plugin/phpfpm)
configuration file: [python.d/phpfpm.conf](../collectors/python.d.plugin/phpfpm)| +phpfpm|BASH
Shell Script|Connects to one or more phpfpm servers (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [phpfpm.chart.sh](../collectors/charts.d.plugin/phpfpm)
configuration file: [charts.d/phpfpm.conf](../collectors/charts.d.plugin/phpfpm)| +tomcat|python
v2 or v3|Connects to multiple tomcat servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [tomcat.chart.py](../collectors/python.d.plugin/tomcat)
configuration file: [python.d/tomcat.conf](../collectors/python.d.plugin/tomcat)| +tomcat|BASH
Shell Script|Connects to a tomcat server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [tomcat.chart.sh](../collectors/charts.d.plugin/tomcat)
configuration file: [charts.d/tomcat.conf](../collectors/charts.d.plugin/tomcat)| --- @@ -158,7 +162,7 @@ tomcat|BASH
Shell Script|Connects to a tomcat server (local or remote) to co application|language|notes| :---------:|:------:|:----| -web_log|python
v2 or v3|powerful plugin, capable of incrementally parsing any number of web server log files
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [web_log.chart.py](../collectors/python.d.plugin/web_log)
configuration file: [python.d/web_log.conf](../collectors/python.d.plugin/web_log)| +web_log|python
v2 or v3|powerful plugin, capable of incrementally parsing any number of web server log files
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [web_log.chart.py](../collectors/python.d.plugin/web_log)
configuration file: [python.d/web_log.conf](../collectors/python.d.plugin/web_log)| --- @@ -167,14 +171,14 @@ web_log|python
v2 or v3|powerful plugin, capable of incrementally parsing an application|language|notes| :---------:|:------:|:----| -couchdb|python
v2 or v3|Connects to multiple couchdb servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [couchdb.chart.py](../collectors/python.d.plugin/couchdb)
configuration file: [python.d/couchdb.conf](../collectors/python.d.plugin/couchdb)| -memcached|python
v2 or v3|Connects to multiple memcached servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [memcached.chart.py](../collectors/python.d.plugin/memcached)
configuration file: [python.d/memcached.conf](../collectors/python.d.plugin/memcached)| -mongodb|python
v2 or v3|Connects to multiple `mongodb` servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-pymongo`.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [mongodb.chart.py](../collectors/python.d.plugin/mongodb)
configuration file: [python.d/mongodb.conf](../collectors/python.d.plugin/mongodb)| -mysql
mariadb|python
v2 or v3|Connects to multiple mysql or mariadb servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-mysqldb` (faster and preferred), or `python-pymysql`.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [mysql.chart.py](../collectors/python.d.plugin/mysql)
configuration file: [python.d/mysql.conf](../collectors/python.d.plugin/mysql)| -mysql
mariadb|BASH
Shell Script|Connects to multiple mysql or mariadb servers (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [mysql.chart.sh](../collectors/charts.d.plugin/mysql)
configuration file: [charts.d/mysql.conf](../collectors/charts.d.plugin/mysql)| -postgres|python
v2 or v3|Connects to multiple postgres servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-psycopg2`.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [postgres.chart.py](../collectors/python.d.plugin/postgres)
configuration file: [python.d/postgres.conf](../collectors/python.d.plugin/postgres)| -redis|python
v2 or v3|Connects to multiple redis servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [redis.chart.py](../collectors/python.d.plugin/redis)
configuration file: [python.d/redis.conf](../collectors/python.d.plugin/redis)| -rethinkdb|python
v2 or v3|Connects to multiple rethinkdb servers (local or remote) to collect real-time metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [rethinkdb.chart.py](../collectors/python.d.plugin/rethinkdbs)
configuration file: [python.d/rethinkdb.conf](../collectors/python.d.plugin/rethinkdbs)| +couchdb|python
v2 or v3|Connects to multiple couchdb servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [couchdb.chart.py](../collectors/python.d.plugin/couchdb)
configuration file: [python.d/couchdb.conf](../collectors/python.d.plugin/couchdb)| +memcached|python
v2 or v3|Connects to multiple memcached servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [memcached.chart.py](../collectors/python.d.plugin/memcached)
configuration file: [python.d/memcached.conf](../collectors/python.d.plugin/memcached)| +mongodb|python
v2 or v3|Connects to multiple `mongodb` servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-pymongo`.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [mongodb.chart.py](../collectors/python.d.plugin/mongodb)
configuration file: [python.d/mongodb.conf](../collectors/python.d.plugin/mongodb)| +mysql
mariadb|python
v2 or v3|Connects to multiple mysql or mariadb servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-mysqldb` (faster and preferred), or `python-pymysql`.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [mysql.chart.py](../collectors/python.d.plugin/mysql)
configuration file: [python.d/mysql.conf](../collectors/python.d.plugin/mysql)| +mysql
mariadb|BASH
Shell Script|Connects to multiple mysql or mariadb servers (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [mysql.chart.sh](../collectors/charts.d.plugin/mysql)
configuration file: [charts.d/mysql.conf](../collectors/charts.d.plugin/mysql)| +postgres|python
v2 or v3|Connects to multiple postgres servers (local or remote) to collect real-time performance metrics.
 
Requires package `python-psycopg2`.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [postgres.chart.py](../collectors/python.d.plugin/postgres)
configuration file: [python.d/postgres.conf](../collectors/python.d.plugin/postgres)| +redis|python
v2 or v3|Connects to multiple redis servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [redis.chart.py](../collectors/python.d.plugin/redis)
configuration file: [python.d/redis.conf](../collectors/python.d.plugin/redis)| +rethinkdb|python
v2 or v3|Connects to multiple rethinkdb servers (local or remote) to collect real-time metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [rethinkdb.chart.py](../collectors/python.d.plugin/rethinkdbs)
configuration file: [python.d/rethinkdb.conf](../collectors/python.d.plugin/rethinkdbs)| --- @@ -193,7 +197,7 @@ retroshare|python
v2 or v3|Connects to multiple retroshare servers (local or application|language|notes| :---------:|:------:|:----| squid|python
v2 or v3|Connects to multiple squid servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [squid.chart.py](../collectors/python.d.plugin/squid)
configuration file: [python.d/squid.conf](../collectors/python.d.plugin/squid)| -squid|BASH
Shell Script|Connects to a squid server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [squid.chart.sh](../collectors/charts.d.plugin/squid)
configuration file: [charts.d/squid.conf](../collectors/charts.d.plugin/squid)| +squid|BASH
Shell Script|Connects to a squid server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [squid.chart.sh](../collectors/charts.d.plugin/squid)
configuration file: [charts.d/squid.conf](../collectors/charts.d.plugin/squid)| --- @@ -202,7 +206,7 @@ squid|BASH
Shell Script|Connects to a squid server (local or remote) to coll application|language|notes| :---------:|:------:|:----| -varnish|python
v2 or v3|Uses the varnishstat command to provide varnish cache statistics (client metrics, cache perfomance, thread-related metrics, backend health, memory usage etc.).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [varnish.chart.py](../collectors/python.d.plugin/varnish)
configuration file: [python.d/varnish.conf](../collectors/python.d.plugin/varnish)| +varnish|python
v2 or v3|Uses the varnishstat command to provide varnish cache statistics (client metrics, cache perfomance, thread-related metrics, backend health, memory usage etc.).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [varnish.chart.py](../collectors/python.d.plugin/varnish)
configuration file: [python.d/varnish.conf](../collectors/python.d.plugin/varnish)| --- @@ -211,7 +215,7 @@ varnish|python
v2 or v3|Uses the varnishstat command to provide varnish cach application|language|notes| :---------:|:------:|:----| -elasticsearch|python
v2 or v3|Monitor elasticsearch performance and health metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [elasticsearch.chart.py](../collectors/python.d.plugin/elasticsearch)
configuration file: [python.d/elasticsearch.conf](../collectors/python.d.plugin/elasticsearch)| +elasticsearch|python
v2 or v3|Monitor elasticsearch performance and health metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [elasticsearch.chart.py](../collectors/python.d.plugin/elasticsearch)
configuration file: [python.d/elasticsearch.conf](../collectors/python.d.plugin/elasticsearch)| --- @@ -220,12 +224,12 @@ elasticsearch|python
v2 or v3|Monitor elasticsearch performance and health m application|language|notes| :---------:|:------:|:----| -named|node.js|Connects to multiple named (ISC-Bind) servers (local or remote) to collect real-time performance metrics. All versions of bind after 9.9.10 are supported.
 
netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [named.node.js](../collectors/node.d.plugin/named)
configuration file: [node.d/named.conf](../collectors/node.d.plugin/named)| -bind_rndc|python
v2 or v3|Parses named.stats dump file to collect real-time performance metrics. All versions of bind after 9.6 are supported.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [bind_rndc.chart.py](../collectors/python.d.plugin/bind_rndc)
configuration file: [python.d/bind_rndc.conf](../collectors/python.d.plugin/bind_rndc)| -nsd|python
v2 or v3|Charts the nsd received queries and zones.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nsd.chart.py](../collectors/python.d.plugin/nsd)
configuration file: [python.d/nsd.conf](../collectors/python.d.plugin/nsd) -powerdns|python
v2 or v3|Monitors powerdns performance and health metrics
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [powerdns.chart.py](../collectors/python.d.plugin/powerdns)
configuration file: [python.d/powerdns.conf](../collectors/python.d.plugin/powerdns)| -dnsdist|python
v2 or v3|Monitors dnsdist performance and health metrics
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dnsdist.chart.py](../collectors/python.d.plugin/dnsdist)
configuration file: [python.d/dnsdist.conf](../collectors/python.d.plugin/dnsdist)| -unbound|python
v2 or v3|Monitors Unbound performance and resource usage metrics
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [unbound.chart.py](../collectors/python.d.plugin/unbound)
configuration file: [python.d/unbound.conf](../collectors/python.d.plugin/unbound)| +named|node.js|Connects to multiple named (ISC-Bind) servers (local or remote) to collect real-time performance metrics. All versions of bind after 9.9.10 are supported.
 
Netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [named.node.js](../collectors/node.d.plugin/named)
configuration file: [node.d/named.conf](../collectors/node.d.plugin/named)| +bind_rndc|python
v2 or v3|Parses named.stats dump file to collect real-time performance metrics. All versions of bind after 9.6 are supported.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [bind_rndc.chart.py](../collectors/python.d.plugin/bind_rndc)
configuration file: [python.d/bind_rndc.conf](../collectors/python.d.plugin/bind_rndc)| +nsd|python
v2 or v3|Charts the nsd received queries and zones.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [nsd.chart.py](../collectors/python.d.plugin/nsd)
configuration file: [python.d/nsd.conf](../collectors/python.d.plugin/nsd) +powerdns|python
v2 or v3|Monitors powerdns performance and health metrics
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [powerdns.chart.py](../collectors/python.d.plugin/powerdns)
configuration file: [python.d/powerdns.conf](../collectors/python.d.plugin/powerdns)| +dnsdist|python
v2 or v3|Monitors dnsdist performance and health metrics
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dnsdist.chart.py](../collectors/python.d.plugin/dnsdist)
configuration file: [python.d/dnsdist.conf](../collectors/python.d.plugin/dnsdist)| +unbound|python
v2 or v3|Monitors Unbound performance and resource usage metrics
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [unbound.chart.py](../collectors/python.d.plugin/unbound)
configuration file: [python.d/unbound.conf](../collectors/python.d.plugin/unbound)| --- @@ -234,7 +238,7 @@ unbound|python
v2 or v3|Monitors Unbound performance and resource usage metr application|language|notes| :---------:|:------:|:----| -isc dhcp|python
v2 or v3|Monitor lease database to show all active leases.
 
Python v2 requires package `python-ipaddress`.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [isc-dhcpd.chart.py](../collectors/python.d.plugin/isc_dhcpd)
configuration file: [python.d/isc-dhcpd.conf](../collectors/python.d.plugin/isc_dhcpd)| +isc dhcp|python
v2 or v3|Monitor lease database to show all active leases.
 
Python v2 requires package `python-ipaddress`.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [isc-dhcpd.chart.py](../collectors/python.d.plugin/isc_dhcpd)
configuration file: [python.d/isc-dhcpd.conf](../collectors/python.d.plugin/isc_dhcpd)| --- @@ -243,8 +247,8 @@ isc dhcp|python
v2 or v3|Monitor lease database to show all active leases.v2 or v3|Monitor frontend, backend and health metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [haproxy.chart.py](../collectors/python.d.plugin/haproxy)
configuration file: [python.d/haproxy.conf](../collectors/python.d.plugin/haproxy)| -traefik|python
v2 or v3|Connects to multiple traefik instances (local or remote) to collect API metrics (response status code, response time, average response time and server uptime).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [traefik.chart.py](../collectors/python.d.plugin/traefik)
configuration file: [python.d/traefik.conf](../collectors/python.d.plugin/traefik)| +haproxy|python
v2 or v3|Monitor frontend, backend and health metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [haproxy.chart.py](../collectors/python.d.plugin/haproxy)
configuration file: [python.d/haproxy.conf](../collectors/python.d.plugin/haproxy)| +traefik|python
v2 or v3|Connects to multiple traefik instances (local or remote) to collect API metrics (response status code, response time, average response time and server uptime).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [traefik.chart.py](../collectors/python.d.plugin/traefik)
configuration file: [python.d/traefik.conf](../collectors/python.d.plugin/traefik)| --- @@ -252,8 +256,8 @@ traefik|python
v2 or v3|Connects to multiple traefik instances (local or rem application|language|notes| :---------:|:------:|:----| -rabbitmq|python
v2 or v3|Monitor rabbitmq performance and health metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [rabbitmq.chart.py](../collectors/python.d.plugin/rabbitmq)
configuration file: [python.d/rabbitmq.conf](../collectors/python.d.plugin/rabbitmq)| -beanstalkd|python
v2 or v3|Provides server and tube level statistics.
 
Requires beanstalkc python package (`pip install beanstalkc` or install package `python-beanstalkc`, which also installs `python-yaml`).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [beanstalk.chart.py](../collectors/python.d.plugin/beanstalk)
configuration file: [python.d/beanstalk.conf](../collectors/python.d.plugin/beanstalk)| +rabbitmq|python
v2 or v3|Monitor rabbitmq performance and health metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [rabbitmq.chart.py](../collectors/python.d.plugin/rabbitmq)
configuration file: [python.d/rabbitmq.conf](../collectors/python.d.plugin/rabbitmq)| +beanstalkd|python
v2 or v3|Provides server and tube level statistics.
 
Requires beanstalkc python package (`pip install beanstalkc` or install package `python-beanstalkc`, which also installs `python-yaml`).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [beanstalk.chart.py](../collectors/python.d.plugin/beanstalk)
configuration file: [python.d/beanstalk.conf](../collectors/python.d.plugin/beanstalk)| --- @@ -262,8 +266,8 @@ beanstalkd|python
v2 or v3|Provides server and tube level statistics.
&n application|language|notes| :---------:|:------:|:----| -apcupsd|BASH
Shell Script|Connects to an apcupsd server to collect real-time statistics of an APC UPS.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [apcupsd.chart.sh](../collectors/charts.d.plugin/apcupsd)
configuration file: [charts.d/apcupsd.conf](../collectors/charts.d.plugin/apcupsd)| -nut|BASH
Shell Script|Connects to a nut server (upsd) to collect real-time UPS statistics.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [nut.chart.sh](../collectors/charts.d.plugin/nut)
configuration file: [charts.d/nut.conf](../collectors/charts.d.plugin/nut)| +apcupsd|BASH
Shell Script|Connects to an apcupsd server to collect real-time statistics of an APC UPS.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [apcupsd.chart.sh](../collectors/charts.d.plugin/apcupsd)
configuration file: [charts.d/apcupsd.conf](../collectors/charts.d.plugin/apcupsd)| +nut|BASH
Shell Script|Connects to a nut server (upsd) to collect real-time UPS statistics.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [nut.chart.sh](../collectors/charts.d.plugin/nut)
configuration file: [charts.d/nut.conf](../collectors/charts.d.plugin/nut)| --- @@ -272,7 +276,7 @@ nut|BASH
Shell Script|Connects to a nut server (upsd) to collect real-time U application|language|notes| :---------:|:------:|:----| -megacli|python
v2 or v3|Collects adapter, physical drives and battery stats..
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [megacli.chart.py](../collectors/python.d.plugin/megacli)
configuration file: [python.d/megacli.conf](../collectors/python.d.plugin/megacli)| +megacli|python
v2 or v3|Collects adapter, physical drives and battery stats..
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [megacli.chart.py](../collectors/python.d.plugin/megacli)
configuration file: [python.d/megacli.conf](../collectors/python.d.plugin/megacli)| --- @@ -280,11 +284,11 @@ megacli|python
v2 or v3|Collects adapter, physical drives and battery stats. application|language|notes| :---------:|:------:|:----| -dovecot|python
v2 or v3|Connects to multiple dovecot servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dovecot.chart.py](../collectors/python.d.plugin/dovecot)
configuration file: [python.d/dovecot.conf](../collectors/python.d.plugin/dovecot)| -exim|python
v2 or v3|Charts the exim queue size.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [exim.chart.py](../collectors/python.d.plugin/exim)
configuration file: [python.d/exim.conf](../collectors/python.d.plugin/exim)| -exim|BASH
Shell Script|Charts the exim queue size.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [exim.chart.sh](../collectors/charts.d.plugin/exim)
configuration file: [charts.d/exim.conf](../collectors/charts.d.plugin/exim)| -postfix|python
v2 or v3|Charts the postfix queue size (supports multiple queues).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [postfix.chart.py](../collectors/python.d.plugin/postfix)
configuration file: [python.d/postfix.conf](../collectors/python.d.plugin/postfix)| -postfix|BASH
Shell Script|Charts the postfix queue size.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [postfix.chart.sh](../collectors/charts.d.plugin/postfix)
configuration file: [charts.d/postfix.conf](../collectors/charts.d.plugin/postfix)| +dovecot|python
v2 or v3|Connects to multiple dovecot servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dovecot.chart.py](../collectors/python.d.plugin/dovecot)
configuration file: [python.d/dovecot.conf](../collectors/python.d.plugin/dovecot)| +exim|python
v2 or v3|Charts the exim queue size.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [exim.chart.py](../collectors/python.d.plugin/exim)
configuration file: [python.d/exim.conf](../collectors/python.d.plugin/exim)| +exim|BASH
Shell Script|Charts the exim queue size.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [exim.chart.sh](../collectors/charts.d.plugin/exim)
configuration file: [charts.d/exim.conf](../collectors/charts.d.plugin/exim)| +postfix|python
v2 or v3|Charts the postfix queue size (supports multiple queues).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [postfix.chart.py](../collectors/python.d.plugin/postfix)
configuration file: [python.d/postfix.conf](../collectors/python.d.plugin/postfix)| +postfix|BASH
Shell Script|Charts the postfix queue size.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [postfix.chart.sh](../collectors/charts.d.plugin/postfix)
configuration file: [charts.d/postfix.conf](../collectors/charts.d.plugin/postfix)| --- @@ -293,7 +297,7 @@ postfix|BASH
Shell Script|Charts the postfix queue size.

DEPRECATED application|language|notes| :---------:|:------:|:----| -NFS Client|`C`|This is handled entirely by the netdata daemon.
 
Configuration: `netdata.conf`, section `[plugin:proc:/proc/net/rpc/nfs]`. +NFS Client|`C`|This is handled entirely by the Netdata daemon.
 
Configuration: `netdata.conf`, section `[plugin:proc:/proc/net/rpc/nfs]`. NFS Server|`C`|This is handled entirely by the netdata daemon.
 
Configuration: `netdata.conf`, section `[plugin:proc:/proc/net/rpc/nfsd]`. samba|python
v2 or v3|Performance metrics of Samba SMB2 file sharing.
 
documentation page: [python.d.plugin module samba](../collectors/python.d.plugin/samba)
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [samba.chart.py](../collectors/python.d.plugin/samba)
configuration file: [python.d/samba.conf](../collectors/python.d.plugin/samba)| @@ -319,11 +323,12 @@ xenstat|C|Collects host and domain statistics for XenServer or XCP-ng hypervisor application|language|notes| :---------:|:------:|:----| -apps|C|`apps.plugin` collects resource usage statistics for all processes running in the system. It groups the entire process tree and reports dozens of metrics for CPU utilization, memory footprint, disk I/O, swap memory, network connections, open files and sockets, etc. It reports metrics for application groups, users and user groups.
 
[Documentation of `apps.plugin`](../collectors/apps.plugin/).
 
netdata plugin: [`apps_plugin.c`](../collectors/apps.plugin)
configuration file: [`apps_groups.conf`](../collectors/apps.plugin)| -ioping|C|Charts disk latency statistics for a directory/file/device, using the `ioping` command. A recent (probably unreleased) version of ioping is required. The plugin supplied can install it in `/usr/local`.
 
netdata plugin: [ioping.plugin](../collectors/ioping.plugin) (this is a shell wrapper to start ioping - once ioping is started, netdata and ioping communicate directly - it can also install the right version of ioping)
configuration file: [ioping.conf](../collectors/ioping.plugin)| -cpu_apps|BASH
Shell Script|Collects the CPU utilization of select apps.

DEPRECATED IN FAVOR OF `apps.plugin`. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [cpu_apps.chart.sh](../collectors/charts.d.plugin/cpu_apps)
configuration file: [charts.d/cpu_apps.conf](../collectors/charts.d.plugin/cpu_apps)| -load_average|BASH
Shell Script|Collects the current system load average.

DEPRECATED IN FAVOR OF THE NETDATA INTERNAL ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [load_average.chart.sh](../collectors/charts.d.plugin/load_average)
configuration file: [charts.d/load_average.conf](../collectors/charts.d.plugin/load_average)| -mem_apps|BASH
Shell Script|Collects the memory footprint of select applications.

DEPRECATED IN FAVOR OF `apps.plugin`. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [mem_apps.chart.sh](../collectors/charts.d.plugin/mem_apps)
configuration file: [charts.d/mem_apps.conf](../collectors/charts.d.plugin/mem_apps)| +apps|C|`apps.plugin` collects resource usage statistics for all processes running in the system. It groups the entire process tree and reports dozens of metrics for CPU utilization, memory footprint, disk I/O, swap memory, network connections, open files and sockets, etc. It reports metrics for application groups, users and user groups.
 
[Documentation of `apps.plugin`](../collectors/apps.plugin/).
 
Netdata plugin: [`apps_plugin.c`](../collectors/apps.plugin)
configuration file: [`apps_groups.conf`](../collectors/apps.plugin)| +ioping|C|Charts disk latency statistics for a directory/file/device, using the `ioping` command. A recent (probably unreleased) version of ioping is required. The plugin supplied can install it in `/usr/local`.
 
Netdata plugin: [ioping.plugin](../collectors/ioping.plugin) (this is a shell wrapper to start ioping - once ioping is started, Netdata and ioping communicate directly - it can also install the right version of ioping)
configuration file: [ioping.conf](../collectors/ioping.plugin)| +perf|C|`perf.plugin` collects CPU performance metrics using hardware performance monitoring units (PMU).
 
[Documentation of `perf.plugin`](../collectors/perf.plugin/).
 
Netdata plugin: [`perf_plugin.c`](../collectors/perf.plugin)| +cpu_apps|BASH
Shell Script|Collects the CPU utilization of select apps.

DEPRECATED IN FAVOR OF `apps.plugin`. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [cpu_apps.chart.sh](../collectors/charts.d.plugin/cpu_apps)
configuration file: [charts.d/cpu_apps.conf](../collectors/charts.d.plugin/cpu_apps)| +load_average|BASH
Shell Script|Collects the current system load average.

DEPRECATED IN FAVOR OF THE NETDATA INTERNAL ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [load_average.chart.sh](../collectors/charts.d.plugin/load_average)
configuration file: [charts.d/load_average.conf](../collectors/charts.d.plugin/load_average)| +mem_apps|BASH
Shell Script|Collects the memory footprint of select applications.

DEPRECATED IN FAVOR OF `apps.plugin`. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [mem_apps.chart.sh](../collectors/charts.d.plugin/mem_apps)
configuration file: [charts.d/mem_apps.conf](../collectors/charts.d.plugin/mem_apps)| --- @@ -332,14 +337,14 @@ mem_apps|BASH
Shell Script|Collects the memory footprint of select applicati application|language|notes| :---------:|:------:|:----| -cpufreq|BASH
Shell Script|Collects current CPU frequency from `/sys/devices`.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [cpufreq.chart.sh](../collectors/charts.d.plugin/cpufreq)
configuration file: [charts.d/cpufreq.conf](../collectors/charts.d.plugin/cpufreq)| -IPMI|C|Collects temperatures, voltages, currents, power, fans and `SEL` events from IPMI using `libipmimonitoring`.
Check [Monitoring IPMI](../collectors/freeipmi.plugin/) for more information
 
netdata plugin: [freeipmi.plugin](../collectors/freeipmi.plugin)
configuration file: none required - to enable it, compile/install netdata with `--enable-plugin-freeipmi`| -hddtemp|python
v2 or v3|Connects to multiple hddtemp servers (local or remote) to collect real-time performance metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [hddtemp.chart.py](../collectors/python.d.plugin/hddtemp)
configuration file: [python.d/hddtemp.conf](../collectors/python.d.plugin/hddtemp)| -hddtemp|BASH
Shell Script|Connects to a hddtemp server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [hddtemp.chart.sh](../collectors/charts.d.plugin/hddtemp)
configuration file: [charts.d/hddtemp.conf](../collectors/charts.d.plugin/hddtemp)| -sensors|BASH
Shell Script|Collects sensors values from files in `/sys`.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [sensors.chart.sh](../collectors/charts.d.plugin/sensors)
configuration file: [charts.d/sensors.conf](../collectors/charts.d.plugin/sensors)| -sensors|python
v2 or v3|Uses `lm-sensors` to collect sensor data.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [sensors.chart.py](../collectors/python.d.plugin/sensors)
configuration file: [python.d/sensors.conf](../collectors/python.d.plugin/sensors)| -smartd_log|python
v2 or v3|Collects the S.M.A.R.T attributes from `smartd` log files.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [smartd_log.chart.py](../collectors/python.d.plugin/smartd_log)
configuration file: [python.d/smartd_log.conf](../collectors/python.d.plugin/smartd_log)| -w1sensor|python
v2 or v3|Collects data from connected 1-Wire sensors.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [w1sensor.chart.py](../collectors/python.d.plugin/w1sensor)
configuration file: [python.d/w1sensor.conf](../collectors/python.d.plugin/w1sensor)| +cpufreq|BASH
Shell Script|Collects current CPU frequency from `/sys/devices`.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [cpufreq.chart.sh](../collectors/charts.d.plugin/cpufreq)
configuration file: [charts.d/cpufreq.conf](../collectors/charts.d.plugin/cpufreq)| +IPMI|C|Collects temperatures, voltages, currents, power, fans and `SEL` events from IPMI using `libipmimonitoring`.
Check [Monitoring IPMI](../collectors/freeipmi.plugin/) for more information
 
Netdata plugin: [freeipmi.plugin](../collectors/freeipmi.plugin)
configuration file: none required - to enable it, compile/install Netdata with `--enable-plugin-freeipmi`| +hddtemp|python
v2 or v3|Connects to multiple hddtemp servers (local or remote) to collect real-time performance metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [hddtemp.chart.py](../collectors/python.d.plugin/hddtemp)
configuration file: [python.d/hddtemp.conf](../collectors/python.d.plugin/hddtemp)| +hddtemp|BASH
Shell Script|Connects to a hddtemp server (local or remote) to collect real-time performance metrics.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [hddtemp.chart.sh](../collectors/charts.d.plugin/hddtemp)
configuration file: [charts.d/hddtemp.conf](../collectors/charts.d.plugin/hddtemp)| +sensors|BASH
Shell Script|Collects sensors values from files in `/sys`.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [sensors.chart.sh](../collectors/charts.d.plugin/sensors)
configuration file: [charts.d/sensors.conf](../collectors/charts.d.plugin/sensors)| +sensors|python
v2 or v3|Uses `lm-sensors` to collect sensor data.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [sensors.chart.py](../collectors/python.d.plugin/sensors)
configuration file: [python.d/sensors.conf](../collectors/python.d.plugin/sensors)| +smartd_log|python
v2 or v3|Collects the S.M.A.R.T attributes from `smartd` log files.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [smartd_log.chart.py](../collectors/python.d.plugin/smartd_log)
configuration file: [python.d/smartd_log.conf](../collectors/python.d.plugin/smartd_log)| +w1sensor|python
v2 or v3|Collects data from connected 1-Wire sensors.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [w1sensor.chart.py](../collectors/python.d.plugin/w1sensor)
configuration file: [python.d/w1sensor.conf](../collectors/python.d.plugin/w1sensor)| --- @@ -348,11 +353,11 @@ w1sensor|python
v2 or v3|Collects data from connected 1-Wire sensors.
&n application|language|notes| :---------:|:------:|:----| -ap|BASH
Shell Script|Uses the `iw` command to provide statistics of wireless clients connected to a wireless access point running on this host (works well with `hostapd`).
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [ap.chart.sh](../collectors/charts.d.plugin/ap)
configuration file: [charts.d/ap.conf](../collectors/charts.d.plugin/ap)| -fping|C|Charts network latency statistics for any number of nodes, using the `fping` command. A recent (probably unreleased) version of fping is required. The plugin supplied can install it in `/usr/local`.
 
netdata plugin: [fping.plugin](../collectors/fping.plugin) (this is a shell wrapper to start fping - once fping is started, netdata and fping communicate directly - it can also install the right version of fping)
configuration file: [fping.conf](../collectors/fping.plugin)| -snmp|node.js|Connects to multiple snmp servers to collect real-time performance metrics.
 
netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [snmp.node.js](../collectors/node.d.plugin/snmp)
configuration file: [node.d/snmp.conf](../collectors/node.d.plugin/snmp)| +ap|BASH
Shell Script|Uses the `iw` command to provide statistics of wireless clients connected to a wireless access point running on this host (works well with `hostapd`).
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [ap.chart.sh](../collectors/charts.d.plugin/ap)
configuration file: [charts.d/ap.conf](../collectors/charts.d.plugin/ap)| +fping|C|Charts network latency statistics for any number of nodes, using the `fping` command. A recent (probably unreleased) version of fping is required. The plugin supplied can install it in `/usr/local`.
 
Netdata plugin: [fping.plugin](../collectors/fping.plugin) (this is a shell wrapper to start fping - once fping is started, Netdata and fping communicate directly - it can also install the right version of fping)
configuration file: [fping.conf](../collectors/fping.plugin)| +snmp|node.js|Connects to multiple snmp servers to collect real-time performance metrics.
 
Netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [snmp.node.js](../collectors/node.d.plugin/snmp)
configuration file: [node.d/snmp.conf](../collectors/node.d.plugin/snmp)| nfacct|C|collects netfilter firewall, connection tracker and accounting metrics using `libmnl` and `libnetfilter_acct`| -dns_query_time|python
v2 or v3|Provides DNS query time statistics.
 
Requires package `dnspython` (`pip install dnspython` or install package `python-dnspython`).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dns_query_time.chart.py](../collectors/python.d.plugin/dns_query_time)
configuration file: [python.d/dns_query_time.conf](../collectors/python.d.plugin/dns_query_time)| +dns_query_time|python
v2 or v3|Provides DNS query time statistics.
 
Requires package `dnspython` (`pip install dnspython` or install package `python-dnspython`).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [dns_query_time.chart.py](../collectors/python.d.plugin/dns_query_time)
configuration file: [python.d/dns_query_time.conf](../collectors/python.d.plugin/dns_query_time)| http|python
v2 or v3|Monitors a generic web page for status code and returned content in HTML port|ptyhon
v2 or v3|Checks if a generic TCP port for its availability and response time @@ -363,8 +368,8 @@ port|ptyhon
v2 or v3|Checks if a generic TCP port for its availability and application|language|notes| :---------:|:------:|:----| -chrony|python
v2 or v3|Uses the chronyc command to provide chrony statistics (Frequency, Last offset, RMS offset, Residual freq, Root delay, Root dispersion, Skew, System time).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [chrony.chart.py](../collectors/python.d.plugin/chrony)
configuration file: [python.d/chrony.conf](../collectors/python.d.plugin/chrony)| -ntpd|python
v2 or v3|Connects to multiple ntpd servers (local or remote) to provide statistics of system variables and optional also peer variables (if enabled in the configuration).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ntpd.chart.py](../collectors/python.d.plugin/ntpd)
configuration file: [python.d/ntpd.conf](../collectors/python.d.plugin/ntpd)| +chrony|python
v2 or v3|Uses the chronyc command to provide chrony statistics (Frequency, Last offset, RMS offset, Residual freq, Root delay, Root dispersion, Skew, System time).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [chrony.chart.py](../collectors/python.d.plugin/chrony)
configuration file: [python.d/chrony.conf](../collectors/python.d.plugin/chrony)| +ntpd|python
v2 or v3|Connects to multiple ntpd servers (local or remote) to provide statistics of system variables and optional also peer variables (if enabled in the configuration).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ntpd.chart.py](../collectors/python.d.plugin/ntpd)
configuration file: [python.d/ntpd.conf](../collectors/python.d.plugin/ntpd)| --- @@ -373,9 +378,9 @@ ntpd|python
v2 or v3|Connects to multiple ntpd servers (local or remote) to application|language|notes| :---------:|:------:|:----| -freeradius|python
v2 or v3|Uses the radclient command to provide freeradius statistics (authentication, accounting, proxy-authentication, proxy-accounting).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [freeradius.chart.py](../collectors/python.d.plugin/freeradius)
configuration file: [python.d/freeradius.conf](../collectors/python.d.plugin/freeradius)| -openvpn|python
v2 or v3|All data from openvpn-status.log in your dashboard!
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ovpn_status_log.chart.py](../collectors/python.d.plugin/ovpn_status_log)
configuration file: [python.d/ovpn_status_log.conf](../collectors/python.d.plugin/ovpn_status_log)| -fail2ban|python
v2 or v3|Monitor fail2ban log file to show all bans for all active jails
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [fail2ban.chart.py](../collectors/python.d.plugin/fail2ban)
configuration file: [python.d/fail2ban.conf](../collectors/python.d.plugin/fail2ban)| +freeradius|python
v2 or v3|Uses the radclient command to provide freeradius statistics (authentication, accounting, proxy-authentication, proxy-accounting).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [freeradius.chart.py](../collectors/python.d.plugin/freeradius)
configuration file: [python.d/freeradius.conf](../collectors/python.d.plugin/freeradius)| +openvpn|python
v2 or v3|All data from openvpn-status.log in your dashboard!
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [ovpn_status_log.chart.py](../collectors/python.d.plugin/ovpn_status_log)
configuration file: [python.d/ovpn_status_log.conf](../collectors/python.d.plugin/ovpn_status_log)| +fail2ban|python
v2 or v3|Monitor fail2ban log file to show all bans for all active jails
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [fail2ban.chart.py](../collectors/python.d.plugin/fail2ban)
configuration file: [python.d/fail2ban.conf](../collectors/python.d.plugin/fail2ban)| --- @@ -384,7 +389,7 @@ fail2ban|python
v2 or v3|Monitor fail2ban log file to show all bans for all application|language|notes| :---------:|:------:|:----| -opensips|BASH
Shell Script|Connects to an opensips server (local only) to collect real-time performance metrics.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [opensips.chart.sh](../collectors/charts.d.plugin/opensips)
configuration file: [charts.d/opensips.conf](../collectors/charts.d.plugin/opensips)| +opensips|BASH
Shell Script|Connects to an opensips server (local only) to collect real-time performance metrics.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [opensips.chart.sh](../collectors/charts.d.plugin/opensips)
configuration file: [charts.d/opensips.conf](../collectors/charts.d.plugin/opensips)| --- @@ -393,7 +398,7 @@ opensips|BASH
Shell Script|Connects to an opensips server (local only) to co application|language|notes| :---------:|:------:|:----| -go_expvar|python
v2 or v3|Parses metrics exposed by applications written in the Go programming language using the [expvar package](https://golang.org/pkg/expvar/).
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [go_expvar.chart.py](../collectors/python.d.plugin/go_expvar)
configuration file: [python.d/go_expvar.conf](../collectors/python.d.plugin/go_expvar)
documentation: [Monitoring Go Applications](../collectors/python.d.plugin/go_expvar/)| +go_expvar|python
v2 or v3|Parses metrics exposed by applications written in the Go programming language using the [expvar package](https://golang.org/pkg/expvar/).
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [go_expvar.chart.py](../collectors/python.d.plugin/go_expvar)
configuration file: [python.d/go_expvar.conf](../collectors/python.d.plugin/go_expvar)
documentation: [Monitoring Go Applications](../collectors/python.d.plugin/go_expvar/)| --- @@ -402,9 +407,9 @@ go_expvar|python
v2 or v3|Parses metrics exposed by applications written in application|language|notes| :---------:|:------:|:----| -sma_webbox|node.js|Connects to multiple remote SMA webboxes to collect real-time performance metrics of the photovoltaic (solar) power generation.
 
netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [sma_webbox.node.js](../collectors/node.d.plugin/sma_webbox)
configuration file: [node.d/sma_webbox.conf](../collectors/node.d.plugin/sma_webbox)| -fronius|node.js|Connects to multiple remote Fronius Symo servers to collect real-time performance metrics of the photovoltaic (solar) power generation.
 
netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [fronius.node.js](../collectors/node.d.plugin/fronius)
configuration file: [node.d/fronius.conf](../collectors/node.d.plugin/fronius)| -stiebeleltron|node.js|Collects the temperatures and other metrics from your Stiebel Eltron heating system using their Internet Service Gateway (ISG web).
 
netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [stiebeleltron.node.js](../collectors/node.d.plugin/stiebeleltron)
configuration file: [node.d/stiebeleltron.conf](../collectors/node.d.plugin/stiebeleltron)| +sma_webbox|node.js|Connects to multiple remote SMA webboxes to collect real-time performance metrics of the photovoltaic (solar) power generation.
 
Netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [sma_webbox.node.js](../collectors/node.d.plugin/sma_webbox)
configuration file: [node.d/sma_webbox.conf](../collectors/node.d.plugin/sma_webbox)| +fronius|node.js|Connects to multiple remote Fronius Symo servers to collect real-time performance metrics of the photovoltaic (solar) power generation.
 
Netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [fronius.node.js](../collectors/node.d.plugin/fronius)
configuration file: [node.d/fronius.conf](../collectors/node.d.plugin/fronius)| +stiebeleltron|node.js|Collects the temperatures and other metrics from your Stiebel Eltron heating system using their Internet Service Gateway (ISG web).
 
Netdata plugin: [node.d.plugin](../collectors/node.d.plugin#nodedplugin)
plugin module: [stiebeleltron.node.js](../collectors/node.d.plugin/stiebeleltron)
configuration file: [node.d/stiebeleltron.conf](../collectors/node.d.plugin/stiebeleltron)| --- @@ -413,7 +418,7 @@ stiebeleltron|node.js|Collects the temperatures and other metrics from your Stie application|language|notes| :---------:|:------:|:----| -Spring Boot Application|java|Monitors running Java [Spring Boot](https://spring.io/) applications that expose their metrics with the use of the **Spring Boot Actuator** included in Spring Boot library.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [springboot](../collectors/python.d.plugin/springboot)
configuration file: [python.d/springboot.conf](../collectors/python.d.plugin/springboot) +Spring Boot Application|java|Monitors running Java [Spring Boot](https://spring.io/) applications that expose their metrics with the use of the **Spring Boot Actuator** included in Spring Boot library.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [springboot](../collectors/python.d.plugin/springboot)
configuration file: [python.d/springboot.conf](../collectors/python.d.plugin/springboot) --- @@ -422,7 +427,7 @@ Spring Boot Application|java|Monitors running Java [Spring Boot](https://spring. application|language|notes| :---------:|:------:|:----| -puppet|python
v2 or v3|Connects to multiple Puppet Server and Puppet DB instances (local or remote) to collect real-time status metrics.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [puppet.chart.py](../collectors/python.d.plugin/puppet)
configuration file: [python.d/puppet.conf](../collectors/python.d.plugin/puppet)| +puppet|python
v2 or v3|Connects to multiple Puppet Server and Puppet DB instances (local or remote) to collect real-time status metrics.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [puppet.chart.py](../collectors/python.d.plugin/puppet)
configuration file: [python.d/puppet.conf](../collectors/python.d.plugin/puppet)| --- @@ -430,7 +435,7 @@ puppet|python
v2 or v3|Connects to multiple Puppet Server and Puppet DB inst application|language|notes| :---------:|:------:|:----| -SpigotMC|Python
v2 or v3|Monitors Spigot Minecraft server ticks per second and number of online players using the Minecraft remote console.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [spigotmc.chart.py](../collectors/python.d.plugin/spigotmc)
configuration file: [python.d/spigotmc.conf](../collectors/python.d.plugin/spigotmc)| +SpigotMC|Python
v2 or v3|Monitors Spigot Minecraft server ticks per second and number of online players using the Minecraft remote console.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [spigotmc.chart.py](../collectors/python.d.plugin/spigotmc)
configuration file: [python.d/spigotmc.conf](../collectors/python.d.plugin/spigotmc)| --- @@ -438,7 +443,7 @@ SpigotMC|Python
v2 or v3|Monitors Spigot Minecraft server ticks per second a application|language|notes| :---------:|:------:|:----| -BOINC|Python
v2 or v3|Monitors task states for local and remote BOINC client software using the remote GUI RPC interface. Also provides alarms for a handful of error conditions. Requires manual configuration
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [boinc.chart.py](../collectors/python.d.plugin/boinc)
configuration file: [python.d/boinc.conf](../collectors/python.d.plugin/boinc)| +BOINC|Python
v2 or v3|Monitors task states for local and remote BOINC client software using the remote GUI RPC interface. Also provides alarms for a handful of error conditions. Requires manual configuration
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [boinc.chart.py](../collectors/python.d.plugin/boinc)
configuration file: [python.d/boinc.conf](../collectors/python.d.plugin/boinc)| --- @@ -446,7 +451,7 @@ BOINC|Python
v2 or v3|Monitors task states for local and remote BOINC client application|language|notes| :---------:|:------:|:----| -example|BASH
Shell Script|Skeleton plugin in BASH.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [example.chart.sh](../collectors/charts.d.plugin/example)
configuration file: [charts.d/example.conf](../collectors/charts.d.plugin/example)| -example|python
v2 or v3|Skeleton plugin in Python.
 
netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [example.chart.py](../collectors/python.d.plugin/example)
configuration file: [python.d/example.conf](../collectors/python.d.plugin/example)| +example|BASH
Shell Script|Skeleton plugin in BASH.

DEPRECATED IN FAVOR OF THE PYTHON ONE. It is still supplied only as an example module to shell scripting plugins.
 
Netdata plugin: [charts.d.plugin](../collectors/charts.d.plugin#chartsdplugin)
plugin module: [example.chart.sh](../collectors/charts.d.plugin/example)
configuration file: [charts.d/example.conf](../collectors/charts.d.plugin/example)| +example|python
v2 or v3|Skeleton plugin in Python.
 
Netdata plugin: [python.d.plugin](../collectors/python.d.plugin)
plugin module: [example.chart.py](../collectors/python.d.plugin/example)
configuration file: [python.d/example.conf](../collectors/python.d.plugin/example)| [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FAdd-more-charts-to-netdata&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Charts.md b/docs/Charts.md index 64c36302f..42ac4453b 100644 --- a/docs/Charts.md +++ b/docs/Charts.md @@ -4,7 +4,7 @@ Before configuring an alarm or writing a collector, it's important to understand ## Charts -Each chart that you see on the netdata dashboard contains one or more dimensions, one for each collected or calculated metric. +Each chart that you see on the Netdata dashboard contains one or more dimensions, one for each collected or calculated metric. The chart name or chart id is what you see in parentheses at the top left corner of the chart you are interested in. For example, if you go to the system cpu chart: `http://your.netdata.ip:19999/#menu_system_submenu_cpu`, you will see at the top left of the chart the label "Total CPU utilization (system.cpu)". In this case, the chart name is `system.cpu`. @@ -16,7 +16,7 @@ Most charts depict more than one dimensions. The dimensions of a chart are calle When you have several instances of a monitored hardware or software resource (e.g. network interfaces, mysql instances etc.), you need to be able to identify each one separately. Netdata uses "families" to identify such instances. For example, if I have the network interfaces `eth0` and `eth1`, `eth0` will be one family, and `eth1` will be another. -The reasoning behind calling these instances "families" is that different charts for the same instance can and many times are related (relatives, family, you get it). The family of a chart is usually the name of the netdata dashboard submenu that you see selected on the right navigation pane, when you are looking at a chart. For the example of the two network interfaces, you would see a submenu `eth0` and a submenu `eth1` under the "Network Interfaces" menu on the right navigation pane. +The reasoning behind calling these instances "families" is that different charts for the same instance can and many times are related (relatives, family, you get it). The family of a chart is usually the name of the Netdata dashboard submenu that you see selected on the right navigation pane, when you are looking at a chart. For the example of the two network interfaces, you would see a submenu `eth0` and a submenu `eth1` under the "Network Interfaces" menu on the right navigation pane. ## Contexts diff --git a/docs/Demo-Sites.md b/docs/Demo-Sites.md index f6aad1398..0d478d73f 100644 --- a/docs/Demo-Sites.md +++ b/docs/Demo-Sites.md @@ -1,10 +1,10 @@ # Demo sites -Live demo installations of netdata are available at **[https://my-netdata.io](https://my-netdata.io)**: +Live demo installations of Netdata are available at **[https://www.netdata.cloud](https://www.netdata.cloud/#live-demo)**: -Location | netdata demo URL | 60 mins reqs | VM Donated by +Location | Netdata demo URL | 60 mins reqs | VM Donated by :-------:|:-----------------:|:----------:|:------------- -London (UK)|**[london.my-netdata.io](https://london.my-netdata.io)**
(this is the global netdata **registry** and has **named** and **mysql** charts)|[![Requests Per Second](https://london.my-netdata.io/api/v1/badge.svg?chart=netdata.requests&dimensions=requests&after=-3600&options=unaligned&group=sum&label=reqs&units=empty&value_color=blue&precision=0&v42)](https://london.my-netdata.io)|[DigitalOcean.com](https://m.do.co/c/83dc9f941745) +London (UK)|**[london.my-netdata.io](https://london.my-netdata.io)**
(this is the global Netdata **registry** and has **named** and **mysql** charts)|[![Requests Per Second](https://london.my-netdata.io/api/v1/badge.svg?chart=netdata.requests&dimensions=requests&after=-3600&options=unaligned&group=sum&label=reqs&units=empty&value_color=blue&precision=0&v42)](https://london.my-netdata.io)|[DigitalOcean.com](https://m.do.co/c/83dc9f941745) Atlanta (USA)|**[cdn77.my-netdata.io](https://cdn77.my-netdata.io)**
(with **named** and **mysql** charts)|[![Requests Per Second](https://cdn77.my-netdata.io/api/v1/badge.svg?chart=netdata.requests&dimensions=requests&after=-3600&options=unaligned&group=sum&label=reqs&units=empty&value_color=blue&precision=0&v42)](https://cdn77.my-netdata.io)|[CDN77.com](https://www.cdn77.com/) Israel|**[octopuscs.my-netdata.io](https://octopuscs.my-netdata.io)**|[![Requests Per Second](https://octopuscs.my-netdata.io/api/v1/badge.svg?chart=netdata.requests&dimensions=requests&after=-3600&options=unaligned&group=sum&label=reqs&units=empty&value_color=blue&precision=0&v42)](https://octopuscs.my-netdata.io)|[OctopusCS.com](https://www.octopuscs.com) Roubaix (France)|**[ventureer.my-netdata.io](https://ventureer.my-netdata.io)**|[![Requests Per Second](https://ventureer.my-netdata.io/api/v1/badge.svg?chart=netdata.requests&dimensions=requests&after=-3600&options=unaligned&group=sum&label=reqs&units=empty&value_color=blue&precision=0&v42)](https://ventureer.my-netdata.io)|[Ventureer.com](https://ventureer.com/) diff --git a/docs/Donations-netdata-has-received.md b/docs/Donations-netdata-has-received.md index 3c737be8a..062cb02b5 100644 --- a/docs/Donations-netdata-has-received.md +++ b/docs/Donations-netdata-has-received.md @@ -1,13 +1,13 @@ # Donations -This is a list of the donations we have received for netdata (sorted alphabetically on their name): +This is a list of the donations we have received for Netdata (sorted alphabetically on their name): what donated|related links|who donated|description of the donation ----:|:-----:|:---:|:----------- Packages Distribution|-|**[PackageCloud.io](https://packagecloud.io/)**|**PackageCloud.io** donated to a free open-source subscription to their awesome Package Distribution services. Cross Browser Testing|-|**[BrowserStack.com](https://www.browserstack.com/)**|**BrowserStack.com** donated a free subscription to their awesome Browser Testing services (all three of them: Live, Screenshots, Responsive). Cloud VM|[cdn77.my-netdata.io](http://cdn77.my-netdata.io)|**[CDN77.com](https://www.cdn77.com/)**|**CDN77.com** donated a VM with 2 CPU cores, 4GB RAM and 20GB HD, on their excellent CDN network. -Localization Management|[netdata localization project](https://crowdin.com/project/netdata) (check issue [#279](https://github.com/netdata/netdata/issues/279))|**[Crowdin.com](https://crowdin.com/)**|**Crowdin.com** donated an open source license to their Localization Management Platform. +Localization Management|[Netdata localization project](https://crowdin.com/project/netdata) (check issue [#279](https://github.com/netdata/netdata/issues/279))|**[Crowdin.com](https://crowdin.com/)**|**Crowdin.com** donated an open source license to their Localization Management Platform. Cloud VMs|[london.my-netdata.io](https://london.my-netdata.io) (Several VMs)|**[DigitalOcean.com](https://www.digitalocean.com/)**|**DigitalOcean.com** donated 1000 USD to be used in their excellent Cloud Computing services. Many thanks to [Justin Paine](https://github.com/xxdesmus) for making this happen. Development IDE|-|**[JetBrains.com](https://www.jetbrains.com/)**|**JetBrains.com** donated an open source license for 4 developers for 1 year, to their excellent IDEs. Cloud VM|[octopuscs.my-netdata.io](https://octopuscs.my-netdata.io)|**[OctopusCS.com](https://octopuscs.com/)**|**OctopusCS.com** donated a VM with 4 CPU cores, 16GB RAM and 50GB HD in their excellent Cloud Computing services. @@ -18,7 +18,7 @@ Thank you! --- -**Do you want to donate?** We are thirsty for on-line services that can help us make netdata better. We also try to build a network of demo sites (VMs) that can help us show the full potential of netdata. +**Do you want to donate?** We are thirsty for on-line services that can help us make Netdata better. We also try to build a network of demo sites (VMs) that can help us show the full potential of Netdata. Please contact me at costa@tsaousis.gr. diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md index cc58634f1..3ddf4c388 100644 --- a/docs/GettingStarted.md +++ b/docs/GettingStarted.md @@ -1,10 +1,10 @@ # Getting Started -These are your first steps **after** you have installed netdata. If you haven't installed it already, please check the [installation page](../packaging/installer). +These are your first steps **after** you have installed Netdata. If you haven't installed it already, please check the [installation page](../packaging/installer). ## Accessing the dashboard -To access the netdata dashboard, navigate with your browser to: +To access the Netdata dashboard, navigate with your browser to: ``` http://your.server.ip:19999/ @@ -14,7 +14,7 @@ http://your.server.ip:19999/ **Verify Netdata is running.** -Open an ssh session to the server and execute `sudo ps -e | grep netdata`. It should respond with the PID of the netdata daemon. If it prints nothing, Netdata is not running. Check the [installation page](../packaging/installer) to install it. +Open an ssh session to the server and execute `sudo ps -e | grep netdata`. It should respond with the PID of the Netdata daemon. If it prints nothing, Netdata is not running. Check the [installation page](../packaging/installer) to install it. **Verify Netdata responds to HTTP requests.** @@ -32,9 +32,9 @@ If still Netdata does not receive the requests, something is blocking them. A fi  
-When you install multiple Netdata servers, all your servers will appear at the `my-netdata` menu at the top left of the dashboard. For this to work, you have to manually access just once, the dashboard of each of your netdata servers. +When you install multiple Netdata servers, all your servers will appear at the node menu at the top left of the dashboard. For this to work, you have to manually access just once, the dashboard of each of your netdata servers. -The `my-netdata` menu is more than just browser bookmarks. When switching Netdata servers from that menu, any settings of the current view are propagated to the other netdata server: +The node menu is more than just browser bookmarks. When switching Netdata servers from that menu, any settings of the current view are propagated to the other netdata server: - the current charts panning (drag the charts left or right), - the current charts zooming (`SHIFT` + mouse wheel over a chart), @@ -43,7 +43,7 @@ The `my-netdata` menu is more than just browser bookmarks. When switching Netdat - the theme you use, - etc. -are all sent over to other netdata server, to allow you troubleshoot cross-server performance issues easily. +are all sent over to other Netdata server, to allow you troubleshoot cross-server performance issues easily. ## Starting and stopping Netdata @@ -55,15 +55,15 @@ To start/stop Netdata, depending on your environment, you should use: - `service netdata start` and `service netdata stop` - `/etc/init.d/netdata start` and `/etc/init.d/netdata stop` -Once netdata is installed, the installer configures it to start at boot and stop at shutdown. +Once Netdata is installed, the installer configures it to start at boot and stop at shutdown. For more information about using these commands, consult your system documentation. ## Sizing Netdata -The default installation of netdata is configured for a small round-robin database: just 1 hour of data. Depending on the memory your system has and the amount you can dedicate to Netdata, you should adapt this. On production systems with limited RAM, we suggest to set this to 3-4 hours. For best results you should set this to 24 or 48 hours. +The default installation of Netdata is configured for a small round-robin database: just 1 hour of data. Depending on the memory your system has and the amount you can dedicate to Netdata, you should adapt this. On production systems with limited RAM, we suggest to set this to 3-4 hours. For best results you should set this to 24 or 48 hours. -For every hour of data, Netdata needs about 25MB of RAM. If you can dedicate about 100MB of RAM to netdata, you should set its database size to 4 hours. +For every hour of data, Netdata needs about 25MB of RAM. If you can dedicate about 100MB of RAM to Netdata, you should set its database size to 4 hours. To do this, edit `/etc/netdata/netdata.conf` (or `/opt/netdata/etc/netdata/netdata.conf`) and set: @@ -77,24 +77,24 @@ Make sure the `history` line is not commented (comment lines start with `#`). 1 hour is 3600 seconds, so the number you need to set is the result of `HOURS * 3600`. !!! danger - Be careful when you set this on production systems. If you set it too high, your system may run out of memory. By default, netdata is configured to be killed first when the system starves for memory, but better be careful to avoid issues. + Be careful when you set this on production systems. If you set it too high, your system may run out of memory. By default, Netdata is configured to be killed first when the system starves for memory, but better be careful to avoid issues. For more information about Netdata memory requirements, [check this page](../database). -If your kernel supports KSM (most do), you can [enable KSM to half netdata memory requirement](../database#ksm). +If your kernel supports KSM (most do), you can [enable KSM to half Netdata memory requirement](../database#ksm). ## Service discovery and auto-detection Netdata supports auto-detection of data collection sources. It auto-detects almost everything: database servers, web servers, dns server, etc. -This auto-detection process happens **only once**, when netdata starts. To have Netdata re-discover data sources, you need to restart it. There are a few exceptions to this: +This auto-detection process happens **only once**, when Netdata starts. To have Netdata re-discover data sources, you need to restart it. There are a few exceptions to this: - containers and VMs are auto-detected forever (when Netdata is running at the host). - many data sources are collected but are silenced by default, until there is useful information to collect (for example network interface dropped packet, will appear after a packet has been dropped). - services that are not optimal to collect on all systems, are disabled by default. - services we received feedback from users that caused issues when monitored, are also disabled by default (for example, `chrony` is disabled by default, because CentOS ships a version of it that uses 100% CPU when queried for statistics). -Once a data collection source is detected, netdata will never quit trying to collect data from it, until Netdata is restarted. So, if you stop your web server, netdata will pick it up automatically when it is started again. +Once a data collection source is detected, Netdata will never quit trying to collect data from it, until Netdata is restarted. So, if you stop your web server, Netdata will pick it up automatically when it is started again. Since Netdata is installed on all your systems (even inside containers), auto-detection is limited to `localhost`. This simplifies significantly the security model of a Netdata monitored infrastructure, since most applications allow `localhost` access by default. @@ -104,10 +104,10 @@ A few well known data collection sources that commonly need to be configured are ## Configuration quick start -In netdata we have: +In Netdata we have: -- **internal** data collection plugins (running inside the netdata daemon) -- **external** data collection plugins (independent processes, sending data to netdata over pipes) +- **internal** data collection plugins (running inside the Netdata daemon) +- **external** data collection plugins (independent processes, sending data to Netdata over pipes) - modular plugin **orchestrators** (external plugins that have multiple data collection modules) You can enable and disable plugins (internal and external) via `netdata.conf` at the section `[plugins]`. @@ -151,7 +151,7 @@ sudo /etc/netdata/edit-config python.d/nginx.conf Netdata ships hundreds of health monitoring alarms for detecting anomalies. These are optimized for production servers. -Many users install netdata on workstations and are frustrated by the default alarms shipped with netdata. On these cases, we suggest to disable health monitoring. +Many users install Netdata on workstations and are frustrated by the default alarms shipped with Netdata. On these cases, we suggest to disable health monitoring. To disable it, edit `/etc/netdata/netdata.conf` (or `/opt/netdata/etc/netdata/netdata.conf` if you installed the static 64bit package) and set: @@ -176,7 +176,7 @@ and set `SEND_EMAIL="NO"`. - Check [Data Collection](../collectors) for configuring data collection plugins. - Check [Health Monitoring](../health) for configuring your own alarms, or setting up alarm notifications. -- Check [Streaming](../streaming) for centralizing netdata metrics. -- Check [Backends](../backends) for long term archiving of netdata metrics to time-series databases. +- Check [Streaming](../streaming) for centralizing Netdata metrics. +- Check [Backends](../backends) for long term archiving of Netdata metrics to time-series databases. [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FGettingStarted&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Netdata-Security-and-Disclosure-Information.md b/docs/Netdata-Security-and-Disclosure-Information.md deleted file mode 100644 index 8e8a66afc..000000000 --- a/docs/Netdata-Security-and-Disclosure-Information.md +++ /dev/null @@ -1,39 +0,0 @@ -# Netdata Security and Disclosure Information - -This page describes netdata security and disclosure information. - -## Security Announcements - -Every time a security issue is fixed in netdata, we immediately release a new version of it. So, to get notified of all security incidents, please subscribe to our releases on github. - -## Report a Vulnerability - -We’re extremely grateful for security researchers and users that report vulnerabilities to Netdata Open Source Community. All reports are thoroughly investigated by a set of community volunteers. - -To make a report, please email the private [security@netdata.cloud](mailto:security@netdata.cloud) list with the security details and the details expected for [all netdata bug reports](../.github/ISSUE_TEMPLATE/bug_report.md). - -## When Should I Report a Vulnerability? - -- You think you discovered a potential security vulnerability in Netdata -- You are unsure how a vulnerability affects Netdata -- You think you discovered a vulnerability in another project that Netdata depends on (e.g. python, node, etc) - -### When Should I NOT Report a Vulnerability? - -- You need help tuning Netdata for security -- You need help applying security related updates -- Your issue is not security related - -## Security Vulnerability Response - -Each report is acknowledged and analyzed by Netdata Team members within 3 working days. This will set off a Security Release Process. - -Any vulnerability information shared with Netdata Team stays within Netdata project and will not be disseminated to other projects unless it is necessary to get the issue fixed. - -As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated. - -## Public Disclosure Timing - -A public disclosure date is negotiated by the Netdata team and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to a few weeks. As a basic default, we expect report date to disclosure date to be on the order of 7 days. The Netdata team holds the final say when setting a disclosure date. - -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FNetdata-Security-and-Disclosure-Information&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Performance.md b/docs/Performance.md index b08549f11..fbc6d5761 100644 --- a/docs/Performance.md +++ b/docs/Performance.md @@ -1,6 +1,6 @@ # Performance -netdata performance is affected by: +Netdata performance is affected by: **Data collection** - the number of charts for which data are collected @@ -19,11 +19,11 @@ You can control all the above. ## Netdata Daemon -For most server systems, with a few hundred charts and a few thousand dimensions, the netdata daemon, without any web clients accessing it, should not use more than 1% of a single core. +For most server systems, with a few hundred charts and a few thousand dimensions, the Netdata daemon, without any web clients accessing it, should not use more than 1% of a single core. -To prove netdata scalability, check issue [#1323](https://github.com/netdata/netdata/issues/1323#issuecomment-265501668) where netdata collects 95.000 metrics per second, with 12% CPU utilization of a single core! +To prove Netdata scalability, check issue [#1323](https://github.com/netdata/netdata/issues/1323#issuecomment-265501668) where Netdata collects 95.000 metrics per second, with 12% CPU utilization of a single core! -In embedded systems, if the netdata daemon is using a lot of CPU without any web clients accessing it, you should lower the data collection frequency. To set the data collection frequency, edit `/etc/netdata/netdata.conf` and set `update_every` to a higher number (this is the frequency in seconds data are collected for all charts: higher number of seconds = lower frequency, the default is 1 for per second data collection). You can also set this frequency per module or chart. Check the [daemon configuration](../daemon/config) for plugins and charts. For specific modules, the configuration needs to be changed in: +In embedded systems, if the Netdata daemon is using a lot of CPU without any web clients accessing it, you should lower the data collection frequency. To set the data collection frequency, edit `/etc/netdata/netdata.conf` and set `update_every` to a higher number (this is the frequency in seconds data are collected for all charts: higher number of seconds = lower frequency, the default is 1 for per second data collection). You can also set this frequency per module or chart. Check the [daemon configuration](../daemon/config) for plugins and charts. For specific modules, the configuration needs to be changed in: - `python.d.conf` for [python](../collectors/python.d.plugin/#pythondplugin) - `node.d.conf` for [nodejs](../collectors/node.d.plugin/#nodedplugin) - `charts.d.conf` for [bash](../collectors/charts.d.plugin/#chartsdplugin) @@ -34,24 +34,24 @@ If a plugin is using a lot of CPU, you should lower its update frequency, or if ## CPU consumption when web clients are accessing dashboards -Netdata is very efficient when servicing web clients. On most server platforms, netdata should be able to serve **1800 web client requests per second per core** for auto-refreshing charts. +Netdata is very efficient when servicing web clients. On most server platforms, Netdata should be able to serve **1800 web client requests per second per core** for auto-refreshing charts. Normally, each user connected will request less than 10 chart refreshes per second (the page may have hundreds of charts, but only the visible are refreshed). So you can expect 180 users per CPU core accessing dashboards before having any delays. Netdata runs with the lowest possible process priority, so even if 1000 users are accessing dashboards, it should not influence your applications. CPU utilization will reach 100%, but your applications should get all the CPU they need. -To lower the CPU utilization of netdata when clients are accessing the dashboard, set `web compression level = 1`, or disable web compression completely by setting `enable web responses gzip compression = no`. Both settings are in the `[web]` section. +To lower the CPU utilization of Netdata when clients are accessing the dashboard, set `web compression level = 1`, or disable web compression completely by setting `enable web responses gzip compression = no`. Both settings are in the `[web]` section. ## Monitoring a heavy loaded system -Netdata, while running, does not depend on disk I/O (apart its log files and `access.log` is written with buffering enabled and can be disabled). Some plugins that need disk may stop and show gaps during heavy system load, but the netdata daemon itself should be able to work and collect values from `/proc` and `/sys` and serve web clients accessing it. +Netdata, while running, does not depend on disk I/O (apart its log files and `access.log` is written with buffering enabled and can be disabled). Some plugins that need disk may stop and show gaps during heavy system load, but the Netdata daemon itself should be able to work and collect values from `/proc` and `/sys` and serve web clients accessing it. -Keep in mind that netdata saves its database when it exits and loads it back when restarted. While it is running though, its DB is only stored in RAM and no I/O takes place for it. +Keep in mind that Netdata saves its database when it exits and loads it back when restarted. While it is running though, its DB is only stored in RAM and no I/O takes place for it. ## Netdata process priority -By default, netdata runs with the `idle` process scheduler, which assigns CPU resources to netdata, only when the system has such resources to spare. +By default, Netdata runs with the `idle` process scheduler, which assigns CPU resources to Netdata, only when the system has such resources to spare. The following `netdata.conf` settings control this: @@ -62,15 +62,15 @@ The following `netdata.conf` settings control this: process nice level = 19 ``` -The policies supported by netdata are `idle` (the netdata default), `other` (also as `nice`), `batch`, `rr`, `fifo`. netdata also recognizes `keep` and `none` to keep the current settings without changing them. +The policies supported by Netdata are `idle` (the Netdata default), `other` (also as `nice`), `batch`, `rr`, `fifo`. Netdata also recognizes `keep` and `none` to keep the current settings without changing them. -For `other`, `nice` and `batch`, the setting `process nice level = 19` is activated to configure the nice level of netdata. Nice gets values -20 (highest) to 19 (lowest). +For `other`, `nice` and `batch`, the setting `process nice level = 19` is activated to configure the nice level of Netdata. Nice gets values -20 (highest) to 19 (lowest). For `rr` and `fifo`, the setting `process scheduling priority = 0` is activated to configure the priority of the relative scheduling policy. Priority gets values 1 (lowest) to 99 (highest). For the details of each scheduler, see `man sched_setscheduler` and `man sched`. -When netdata is running under systemd, it can only lower its priority (the default is `other` with `nice level = 0`). If you want to make netdata to get more CPU than that, you will need to set in `netdata.conf`: +When Netdata is running under systemd, it can only lower its priority (the default is `other` with `nice level = 0`). If you want to make Netdata to get more CPU than that, you will need to set in `netdata.conf`: ``` [global] @@ -85,17 +85,17 @@ CPUSchedulingPriority=99 Nice=-10 ``` -## Running netdata in embedded devices +## Running Netdata in embedded devices Embedded devices usually have very limited CPU resources available, and in most cases, just a single core. -> keep in mind that netdata on RPi 2 and 3 does not require any tuning. The default settings will be good. The following tunables apply only when running netdata on RPi 1 or other very weak IoT devices. +> keep in mind that Netdata on RPi 2 and 3 does not require any tuning. The default settings will be good. The following tunables apply only when running Netdata on RPi 1 or other very weak IoT devices. We suggest to do the following: ### 1. Disable External plugins -External plugins can consume more system resources than the netdata server. Disable the ones you don't need. If you need them, increase their `update every` value (again in `/etc/netdata/netdata.conf`), so that they do not run that frequently. +External plugins can consume more system resources than the Netdata server. Disable the ones you don't need. If you need them, increase their `update every` value (again in `/etc/netdata/netdata.conf`), so that they do not run that frequently. Edit `/etc/netdata/netdata.conf`, find the `[plugins]` section: @@ -126,8 +126,8 @@ plugin|description `idlejitter`|internal plugin (written in C) that attempts show if the systems starved for CPU. Disabling it will eliminate a thread. `cgroups`|monitoring linux containers. Most probably you are not going to need it. This will also eliminate another thread. `checks`|a debugging plugin, which is disabled by default. -`apps`|a plugin that monitors system processes. It is very complex and heavy (consumes twice the CPU resources of the netdata daemon), so if you don't need to monitor the process tree, you can disable it. -`charts.d`|BASH plugins (squid, nginx, mysql, etc). This is a heavy plugin, that consumes twice the CPU resources of the netdata daemon. +`apps`|a plugin that monitors system processes. It is very complex and heavy (consumes twice the CPU resources of the Netdata daemon), so if you don't need to monitor the process tree, you can disable it. +`charts.d`|BASH plugins (squid, nginx, mysql, etc). This is a heavy plugin, that consumes twice the CPU resources of the Netdata daemon. `node.d`|node.js plugin, currently used for SNMP data collection and monitoring named (the name server). `python.d`|has many modules and can use over 20MB of memory. @@ -137,7 +137,7 @@ For most IoT devices, you can disable all plugins except `proc`. For `proc` ther ### 2. Disable internal plugins -In this section you can select which modules of the `proc` plugin you need. All these are run in a single thread, one after another. Still, each one needs some RAM and consumes some CPU cycles. With all the modules enabled, the `proc` plugin adds ~9 MiB on top of the 5 MiB required by the netdata daemon. +In this section you can select which modules of the `proc` plugin you need. All these are run in a single thread, one after another. Still, each one needs some RAM and consumes some CPU cycles. With all the modules enabled, the `proc` plugin adds ~9 MiB on top of the 5 MiB required by the Netdata daemon. ``` [plugin:proc] @@ -150,7 +150,7 @@ Refer to the [proc.plugins documentation](../collectors/proc.plugin/) for the li ### 3. Lower internal plugin update frequency -If netdata is still using a lot of CPU, lower its update frequency. Going from per second updates, to once every 2 seconds updates, will cut the CPU resources of all netdata programs **in half**, and you will still have very frequent updates. +If Netdata is still using a lot of CPU, lower its update frequency. Going from per second updates, to once every 2 seconds updates, will cut the CPU resources of all Netdata programs **in half**, and you will still have very frequent updates. If the CPU of the embedded device is too weak, try setting even lower update frequency. Experiment with `update every = 5` or `update every = 10` (higher number = lower frequency) in `netdata.conf`, until you get acceptable results. @@ -172,18 +172,14 @@ Normally, you will not need them. To disable them, set: ``` ### 5. Set memory mode to RAM -Setting the memory mode to `ram` will disable loading and saving the round robin database. This will not affect anything while running netdata, but it might be required if you have very limited storage available. +Setting the memory mode to `ram` will disable loading and saving the round robin database. This will not affect anything while running Netdata, but it might be required if you have very limited storage available. ``` [global] memory mode = ram ``` -### 6. Use the single threaded web server - -Normally, netdata spawns a thread for each web client. This allows netdata to utilize all the available cores for servicing chart refreshes. You can however disable this feature and serve all charts one after another, using a single thread / core. This will might lower the CPU pressure on the embedded device. To enable the single threaded web server, edit `/etc/netdata/netdata.conf` and set `mode = single-threaded` in the `[web]` section. - -### 7. Lower memory requirements +### 6. Lower memory requirements You can set the default size of the round robin database for all charts, using: @@ -197,9 +193,9 @@ The units for history is `[global].update every` seconds. So if `[global].update Check also [Database](../database) for directions on calculating the size of the round robin database. -### 8. Disable gzip compression of responses +### 7. Disable gzip compression of responses -Gzip compression of the web responses is using more CPU that the rest of netdata. You can lower the compression level or disable gzip compression completely. You can disable it, like this: +Gzip compression of the web responses is using more CPU that the rest of Netdata. You can lower the compression level or disable gzip compression completely. You can disable it, like this: ``` [web] @@ -214,7 +210,7 @@ To lower the compression level, do this: gzip compression level = 1 ``` -Finally, if no web server is installed on your device, you can use port tcp/80 for netdata: +Finally, if no web server is installed on your device, you can use port tcp/80 for Netdata: ``` [web] diff --git a/docs/Running-behind-apache.md b/docs/Running-behind-apache.md index f0ce70c71..a71897f4b 100644 --- a/docs/Running-behind-apache.md +++ b/docs/Running-behind-apache.md @@ -2,10 +2,10 @@ Below you can find instructions for configuring an apache server to: -1. proxy a single netdata via an HTTP and HTTPS virtual host -2. dynamically proxy any number of netdata +1. proxy a single Netdata via an HTTP and HTTPS virtual host +2. dynamically proxy any number of Netdata 3. add user authentication -4. adjust netdata settings to get optimal results +4. adjust Netdata settings to get optimal results ## Requirements @@ -33,13 +33,13 @@ sudo a2enmod rewrite --- -## netdata on an existing virtual host +## Netdata on an existing virtual host -On any **existing** and already **working** apache virtual host, you can redirect requests for URL `/netdata/` to one or more netdata servers. +On any **existing** and already **working** apache virtual host, you can redirect requests for URL `/netdata/` to one or more Netdata servers. -### proxy one netdata, running on the same server apache runs +### proxy one Netdata, running on the same server apache runs -Add the following on top of any existing virtual host. It will allow you to access netdata as `http://virtual.host/netdata/`. +Add the following on top of any existing virtual host. It will allow you to access Netdata as `http://virtual.host/netdata/`. ``` @@ -52,7 +52,7 @@ Add the following on top of any existing virtual host. It will allow you to acce Require all granted - # Local netdata server accessed with '/netdata/', at localhost:19999 + # Local Netdata server accessed with '/netdata/', at localhost:19999 ProxyPass "/netdata/" "http://localhost:19999/" connectiontimeout=5 timeout=30 keepalive=on ProxyPassReverse "/netdata/" "http://localhost:19999/" @@ -67,9 +67,9 @@ Add the following on top of any existing virtual host. It will allow you to acce ``` -### proxy multiple netdata running on multiple servers +### proxy multiple Netdata running on multiple servers -Add the following on top of any existing virtual host. It will allow you to access multiple netdata as `http://virtual.host/netdata/HOSTNAME/`, where `HOSTNAME` is the hostname of any other netdata server you have (to access the `localhost` netdata, use `http://virtual.host/netdata/localhost/`). +Add the following on top of any existing virtual host. It will allow you to access multiple Netdata as `http://virtual.host/netdata/HOSTNAME/`, where `HOSTNAME` is the hostname of any other Netdata server you have (to access the `localhost` Netdata, use `http://virtual.host/netdata/localhost/`). ``` @@ -105,9 +105,9 @@ If you want to control the servers your users can connect to, replace the `Proxy ProxyPassMatch "^/netdata/(server1|server2|server3|server4)/(.*)" "http://$1:19999/$2" connectiontimeout=5 timeout=30 keepalive=on ``` -## netdata on a dedicated virtual host +## Netdata on a dedicated virtual host -You can proxy netdata through apache, using a dedicated apache virtual host. +You can proxy Netdata through apache, using a dedicated apache virtual host. Create a new apache site: @@ -158,7 +158,7 @@ Repeat the operation for as many servers as you need. ## Enable Basic Auth -If you wish to add an authentication (user/password) to access your netdata, do these: +If you wish to add an authentication (user/password) to access your Netdata, do these: Install the package `apache2-utils`. On debian / ubuntu run `sudo apt-get install apache2-utils`. @@ -184,28 +184,28 @@ Modify the virtual host with these: ``` -Specify `Location /` if netdata is running on dedicated virtual host. +Specify `Location /` if Netdata is running on dedicated virtual host. Note: Changes are applied by reloading or restarting Apache. # Netdata configuration -You might edit `/etc/netdata/netdata.conf` to optimize your setup a bit. For applying these changes you need to restart netdata. +You might edit `/etc/netdata/netdata.conf` to optimize your setup a bit. For applying these changes you need to restart Netdata. ## Response compression -If you plan to use netdata exclusively via apache, you can gain some performance by preventing double compression of its output (netdata compresses its response, apache re-compresses it) by editing `/etc/netdata/netdata.conf` and setting: +If you plan to use Netdata exclusively via apache, you can gain some performance by preventing double compression of its output (Netdata compresses its response, apache re-compresses it) by editing `/etc/netdata/netdata.conf` and setting: ``` [web] enable gzip compression = no ``` -Once you disable compression at netdata (and restart it), please verify you receive compressed responses from apache (it is important to receive compressed responses - the charts will be more snappy). +Once you disable compression at Netdata (and restart it), please verify you receive compressed responses from apache (it is important to receive compressed responses - the charts will be more snappy). -## Limit direct access to netdata +## Limit direct access to Netdata -You would also need to instruct netdata to listen only on `localhost`, `127.0.0.1` or `::1`. +You would also need to instruct Netdata to listen only on `localhost`, `127.0.0.1` or `::1`. ``` [web] @@ -224,13 +224,13 @@ or --- -You can also use a unix domain socket. This will also provide a faster route between apache and netdata: +You can also use a unix domain socket. This will also provide a faster route between apache and Netdata: ``` [web] bind to = unix:/tmp/netdata.sock ``` -_note: netdata v1.8+ support unix domain sockets_ +_note: Netdata v1.8+ support unix domain sockets_ At the apache side, prepend the 2nd argument to `ProxyPass` with `unix:/tmp/netdata.sock|`, like this: @@ -247,13 +247,13 @@ If your apache server is not on localhost, you can set: bind to = * allow connections from = IP_OF_APACHE_SERVER ``` -_note: netdata v1.9+ support `allow connections from`_ +_note: Netdata v1.9+ support `allow connections from`_ -`allow connections from` accepts [netdata simple patterns](../libnetdata/simple_pattern/) to match against the connection IP address. +`allow connections from` accepts [Netdata simple patterns](../libnetdata/simple_pattern/) to match against the connection IP address. ## prevent the double access.log -apache logs accesses and netdata logs them too. You can prevent netdata from generating its access log, by setting this in `/etc/netdata/netdata.conf`: +apache logs accesses and Netdata logs them too. You can prevent Netdata from generating its access log, by setting this in `/etc/netdata/netdata.conf`: ``` [global] @@ -262,9 +262,9 @@ apache logs accesses and netdata logs them too. You can prevent netdata from gen ## Troubleshooting mod_proxy -Make sure the requests reach netdata, by examing `/var/log/netdata/access.log`. +Make sure the requests reach Netdata, by examing `/var/log/netdata/access.log`. -1. if the requests do not reach netdata, your apache does not forward them. -2. if the requests reach netdata by the URLs are wrong, you have not re-written them properly. +1. if the requests do not reach Netdata, your apache does not forward them. +2. if the requests reach Netdata by the URLs are wrong, you have not re-written them properly. [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FRunning-behind-apache&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Running-behind-caddy.md b/docs/Running-behind-caddy.md index 1b25b0a2e..4e530e948 100644 --- a/docs/Running-behind-caddy.md +++ b/docs/Running-behind-caddy.md @@ -1,6 +1,6 @@ # Netdata via Caddy -To run netdata via [Caddy's proxying,](https://caddyserver.com/docs/proxy) set your Caddyfile up like this: +To run Netdata via [Caddy's proxying,](https://caddyserver.com/docs/proxy) set your Caddyfile up like this: ``` netdata.domain.tld { @@ -10,7 +10,7 @@ netdata.domain.tld { Other directives can be added between the curly brackets as needed. -To run netdata in a subfolder: +To run Netdata in a subfolder: ``` netdata.domain.tld { @@ -20,10 +20,10 @@ netdata.domain.tld { } ``` -## limit direct access to netdata +## limit direct access to Netdata -You would also need to instruct netdata to listen only to `127.0.0.1` or `::1`. +You would also need to instruct Netdata to listen only to `127.0.0.1` or `::1`. -To limit access to netdata only from localhost, set `bind socket to IP = 127.0.0.1` or `bind socket to IP = ::1` in `/etc/netdata/netdata.conf`. +To limit access to Netdata only from localhost, set `bind socket to IP = 127.0.0.1` or `bind socket to IP = ::1` in `/etc/netdata/netdata.conf`. [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FRunning-behind-caddy&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Running-behind-lighttpd.md b/docs/Running-behind-lighttpd.md index 5c74439ad..8e43a0382 100644 --- a/docs/Running-behind-lighttpd.md +++ b/docs/Running-behind-lighttpd.md @@ -1,6 +1,6 @@ # Netdata via lighttpd v1.4.x -Here is a config for accessing netdata in a suburl via lighttpd 1.4.46 and newer: +Here is a config for accessing Netdata in a suburl via lighttpd 1.4.46 and newer: ```txt $HTTP["url"] =~ "^/netdata/" { @@ -24,7 +24,7 @@ $SERVER["socket"] == ":19998" { --- -If the only thing the server is exposing via the web is netdata (and thus no suburl rewriting required), +If the only thing the server is exposing via the web is Netdata (and thus no suburl rewriting required), then you can get away with just ``` proxy.server = ( "" => ( ( "host" => "127.0.0.1", "port" => 19999 ))) @@ -45,7 +45,7 @@ other auth methods, and more info on htdigest, can be found in lighttpd's [mod_a --- It seems that lighttpd (or some versions of it), fail to proxy compressed web responses. -To solve this issue, disable web response compression in netdata. +To solve this issue, disable web response compression in Netdata. Open /etc/netdata/netdata.conf and set in [global]: @@ -53,10 +53,10 @@ Open /etc/netdata/netdata.conf and set in [global]: enable web responses gzip compression = no ``` -## limit direct access to netdata +## limit direct access to Netdata -You would also need to instruct netdata to listen only to `127.0.0.1` or `::1`. +You would also need to instruct Netdata to listen only to `127.0.0.1` or `::1`. -To limit access to netdata only from localhost, set `bind socket to IP = 127.0.0.1` or `bind socket to IP = ::1` in `/etc/netdata/netdata.conf`. +To limit access to Netdata only from localhost, set `bind socket to IP = 127.0.0.1` or `bind socket to IP = ::1` in `/etc/netdata/netdata.conf`. [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FRunning-behind-lighttpd&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Running-behind-nginx.md b/docs/Running-behind-nginx.md index 3918af243..b38d27fa9 100644 --- a/docs/Running-behind-nginx.md +++ b/docs/Running-behind-nginx.md @@ -1,12 +1,12 @@ # Netdata via nginx -To pass netdata via a nginx, use this: +To pass Netdata via a nginx, use this: ### As a virtual host ``` upstream backend { - # the netdata server + # the Netdata server server 127.0.0.1:19999; keepalive 64; } @@ -69,7 +69,7 @@ server { } ``` -### As a subfolder for multiple netdata servers, via one nginx +### As a subfolder for multiple Netdata servers, via one nginx ``` upstream backend-server1 { @@ -112,11 +112,24 @@ server { Of course you can add as many backend servers as you like. -Using the above, you access netdata on the backend servers, like this: +Using the above, you access Netdata on the backend servers, like this: - `http://nginx.server/netdata/server1/` to reach `backend-server1` - `http://nginx.server/netdata/server2/` to reach `backend-server2` +### Using TLS communication + +In case the Netdata web server has been [configured to use TLS](../web/server/#enabling-tls-support), +you must also encrypt the communication between Nginx and Netdata. + +To enable encryption, first [enable SSL on nginx](http://nginx.org/en/docs/http/configuring_https_servers.html) and then put the following in the location section of the Nginx configuration: + +``` +proxy_set_header X-Forwarded-Proto https; +proxy_pass https://localhost:19999; +``` + +If nginx is not configured as described here, you will probably receive the error `SSL_ERROR_RX_RECORD_TOO_LONG`. ### Enable authentication @@ -139,9 +152,9 @@ server { } ``` -## limit direct access to netdata +## limit direct access to Netdata -If your nginx is on `localhost`, you can use this to protect your netdata: +If your nginx is on `localhost`, you can use this to protect your Netdata: ``` [web] @@ -150,13 +163,13 @@ If your nginx is on `localhost`, you can use this to protect your netdata: --- -You can also use a unix domain socket. This will also provide a faster route between nginx and netdata: +You can also use a unix domain socket. This will also provide a faster route between nginx and Netdata: ``` [web] bind to = unix:/tmp/netdata.sock ``` -_note: netdata v1.8+ support unix domain sockets_ +_note: Netdata v1.8+ support unix domain sockets_ At the nginx side, use something like this to use the same unix domain socket: @@ -177,13 +190,13 @@ If your nginx server is not on localhost, you can set: allow connections from = IP_OF_NGINX_SERVER ``` -_note: netdata v1.9+ support `allow connections from`_ +_note: Netdata v1.9+ support `allow connections from`_ -`allow connections from` accepts [netdata simple patterns](../libnetdata/simple_pattern/) to match against the connection IP address. +`allow connections from` accepts [Netdata simple patterns](../libnetdata/simple_pattern/) to match against the connection IP address. ## prevent the double access.log -nginx logs accesses and netdata logs them too. You can prevent netdata from generating its access log, by setting this in `/etc/netdata/netdata.conf`: +nginx logs accesses and Netdata logs them too. You can prevent Netdata from generating its access log, by setting this in `/etc/netdata/netdata.conf`: ``` [global] @@ -201,4 +214,5 @@ If you get an 502 Bad Gateway error you might check your nginx error log: If you see something like the above, chances are high that SELinux prevents nginx from connecting to the backend server. To fix that, just use this policy: `setsebool -P httpd_can_network_connect true`. + [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2FRunning-behind-nginx&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/Third-Party-Plugins.md b/docs/Third-Party-Plugins.md index 38fa90e4e..8d227203f 100644 --- a/docs/Third-Party-Plugins.md +++ b/docs/Third-Party-Plugins.md @@ -4,7 +4,7 @@ The following is a list of Netdata plugins distributed by third parties: ## Nvidia GPUs -[netdata nv plugin](https://github.com/coraxx/netdata_nv_plugin) monitors nvidia GPUs. +[Netdata nv plugin](https://github.com/coraxx/netdata_nv_plugin) monitors nvidia GPUs. ![image](https://user-images.githubusercontent.com/2662304/29516895-351e905e-867b-11e7-9863-3fb6924490ab.png) diff --git a/docs/a-github-star-is-important.md b/docs/a-github-star-is-important.md index e46d56449..cac01f3e3 100644 --- a/docs/a-github-star-is-important.md +++ b/docs/a-github-star-is-important.md @@ -1,12 +1,12 @@ # A GitHub star is important -**GitHub stars** allow netdata to expand its reach, its community, especially attract people with skills willing to contribute to it. +**GitHub stars** allow Netdata to expand its reach, its community, especially attract people with skills willing to contribute to it. -Compared to its first release, netdata is now **twice as fast**, has all its bugs settled and a lot more functionality. This happened because a lot of people find it useful, use it daily at home and work, **rely on it** and **contribute to it**. +Compared to its first release, Netdata is now **twice as fast**, has all its bugs settled and a lot more functionality. This happened because a lot of people find it useful, use it daily at home and work, **rely on it** and **contribute to it**. **GitHub stars** also **motivate** us. They state that you find our work **useful**. They give us strength to continue, to work **harder** to make it even **better**. -So, give netdata a **GitHub star**, at the top right of this page. +So, give Netdata a **GitHub star**, at the top right of this page. Thank you! diff --git a/docs/anonymous-statistics.md b/docs/anonymous-statistics.md index 1e426e2c5..376a2c4aa 100644 --- a/docs/anonymous-statistics.md +++ b/docs/anonymous-statistics.md @@ -1,11 +1,11 @@ # Anonymous Statistics -From Netdata v1.12 and above, anonymous usage information is collected by default and send to Google Analytics. +From Netdata v1.12 and above, anonymous usage information is collected by default and sent to Google Analytics. The statistics calculated from this information will be used for: -1. **Quality assurance**, to help us understand if netdata behaves as expected and help us identify repeating issues for certain distributions or environment. +1. **Quality assurance**, to help us understand if Netdata behaves as expected and help us identify repeating issues for certain distributions or environment. -2. **Usage statistics**, to help us focus on the parts of netdata that are used the most, or help us identify the extend our development decisions influence the community. +2. **Usage statistics**, to help us focus on the parts of Netdata that are used the most, or help us identify the extend our development decisions influence the community. Information is sent to Netdata via two different channels: - Google Tag Manager is used when an agent's dashboard is accessed. @@ -42,7 +42,7 @@ The only thing that's impossible for us to prevent from being **sent** is the UR ## Anonymous Statistics Script -Every time the daemon is started or stopped and every time a fatal condition is encountered, netdata uses the anonymous statistics script to collect system information and send it to GA via an http call. The information collected for all events is: +Every time the daemon is started or stopped and every time a fatal condition is encountered, Netdata uses the anonymous statistics script to collect system information and send it to GA via an http call. The information collected for all events is: - Netdata version - OS name, version, id, id_like - Kernel name, version, architecture @@ -56,7 +56,8 @@ To see exactly what and how is collected, you can review the script template `da ## Opt-Out To opt-out from sending anonymous statistics, you can create a file called `.opt-out-from-anonymous-statistics` under the user configuration directory (usually `/etc/netdata`). The effect of creating the file is the following: -- The daemon will never execute the anonymous statistics script -- The anonymous statistics script will exit immediately if called via any other way (e.g. shell) -- The Google Tag Manager Javascript snippet will remain in the page, but the linked tag will not be fired. The effect is that no data will ever be sent to GA. + - The daemon will never execute the anonymous statistics script + - The anonymous statistics script will exit immediately if called via any other way (e.g. shell) + - The Google Tag Manager Javascript snippet will remain in the page, but the linked tag will not be fired. The effect is that no data will ever be sent to GA. +You can also disable telemetry by passing the option `--disable-telemetry` to any of the installers. diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md index 811a33ed2..2a9539dca 100644 --- a/docs/configuration-guide.md +++ b/docs/configuration-guide.md @@ -1,6 +1,6 @@ # Configuration guide -No configuration is required to run netdata, but you will find plenty of options to tweak, so that you can adapt it to your particular needs. +No configuration is required to run Netdata, but you will find plenty of options to tweak, so that you can adapt it to your particular needs.
Configuration files are placed in `/etc/netdata`. Depending on your installation method, Netdata will have been installed either directly under `/`, or under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will be `/opt/netdata/etc/netdata`).
@@ -20,7 +20,7 @@ Under that directory you will see the following: - `stats.d` is a directory under which you can add .conf files to add [synthetic charts](../collectors/statsd.plugin/#synthetic-statsd-charts). - Individual collector plugin config files, such as `fping.conf` for the [fping plugin](../collectors/fping.plugin/) and `apps_groups.conf` for the [apps plugin](../collectors/apps.plugin/) -So there are many configuration files to control every aspect of Netdata's behavior. It can be overwhelming at first, but you won't have to deal with any of them, unless you have specific things you need to change. The following HOWTO will guide you on how to customize your netdata, based on what you want to do. +So there are many configuration files to control every aspect of Netdata's behavior. It can be overwhelming at first, but you won't have to deal with any of them, unless you have specific things you need to change. The following HOWTO will guide you on how to customize your Netdata, based on what you want to do. ## How to @@ -57,6 +57,10 @@ Entire plugins can be turned off from the [netdata.conf [plugins]](../daemon/con - `node.d.conf` for [nodejs](../collectors/node.d.plugin/#nodedplugin) - `charts.d.conf` for [bash](../collectors/charts.d.plugin/#chartsdplugin) +##### Show charts with zero metrics + +By default, Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. + ### Modify alarms and notifications ##### Add a new alarm @@ -69,9 +73,9 @@ Just set `enabled = no` in the [netdata.conf [health]](../daemon/config/#health- ##### Modify or disable a specific alarm -The `health.d` directory that contains the alarm triggers for [health monitoring](../health/#health-monitoring). It has one .conf file per collector. You can easily find the .conf file you will need to modify, by looking for the "source" line on the table that appears on the right side of an alarm on the netdata gui. +The `health.d` directory that contains the alarm triggers for [health monitoring](../health/#health-monitoring). It has one .conf file per collector. You can easily find the .conf file you will need to modify, by looking for the "source" line on the table that appears on the right side of an alarm on the Netdata gui. -For example, if you click on Alarms and go to the tab 'All', the default netdata installation will show you at the top the configured alarm for `10 min cpu usage` (it's the name of the badge). Looking at the table on the right side, you will see a row that says: `source 4@/usr/lib/netdata/conf.d/health.d/cpu.conf`. This way, you know that you will need to run `/etc/netdata/edit-config health.d/cpu.conf` and look for alarm at line 4 of the conf file. +For example, if you click on Alarms and go to the tab 'All', the default Netdata installation will show you at the top the configured alarm for `10 min cpu usage` (it's the name of the badge). Looking at the table on the right side, you will see a row that says: `source 4@/usr/lib/netdata/conf.d/health.d/cpu.conf`. This way, you know that you will need to run `/etc/netdata/edit-config health.d/cpu.conf` and look for alarm at line 4 of the conf file. As stated at the top of the .conf file, **you can disable an alarm notification by setting the 'to' line to: silent**. To modify how the alarm gets triggered, we suggest that you go through the guide on [health monitoring](../health/#health-monitoring). @@ -82,7 +86,7 @@ You only need to configure `health_alarm_notify.conf`. To learn how to do it, re ### Make security-related customizations -##### Change the netdata web server access lists +##### Change the Netdata web server access lists You have several options under the [netdata.conf [web]](../web/server/#access-lists) section. @@ -90,38 +94,38 @@ You have several options under the [netdata.conf [web]](../web/server/#access-li You will need to configure the [registry] section in netdata.conf. First read the [registry documentation](../registry/). In it, are instructions on how to [run your own registry](../registry/#run-your-own-registry). -##### Change the IP address/port netdata listens to +##### Change the IP address/port Netdata listens to The settings are under netdata.conf [web]. Look at the [web server documentation](../web/server/#binding-netdata-to-multiple-ports) for more info. ### System resource usage -##### Reduce the resources netdata uses +##### Reduce the resources Netdata uses -The page on [netdata performance](Performance.md) has an excellent guide on how to reduce the netdata cpu/disk/RAM utilization to levels suitable even for the weakest [IoT devices](netdata-for-IoT.md). +The page on [Netdata performance](Performance.md) has an excellent guide on how to reduce the Netdata cpu/disk/RAM utilization to levels suitable even for the weakest [IoT devices](netdata-for-IoT.md). -##### Change when netdata saves metrics to disk +##### Change when Netdata saves metrics to disk [netdata.conf [global]](../daemon/config/#global-section-options) : `memory mode` -##### Prevent netdata from getting immediately killed when my server runs out of memory +##### Prevent Netdata from getting immediately killed when my server runs out of memory -You can change the netdata [OOM score](../daemon/#oom-score) in netdata.conf [global]. +You can change the Netdata [OOM score](../daemon/#oom-score) in netdata.conf [global]. ### Other -##### Move netdata directories +##### Move Netdata directories The various directory paths are in [netdata.conf [global]](../daemon/config/#global-section-options). -## How netdata configuration works +## How Netdata configuration works The configuration files are `name = value` dictionaries with `[sections]`. Write whatever you like there as long as it follows this simple format. Netdata loads this dictionary and then when the code needs a value from it, it just looks up the `name` in the dictionary at the proper `section`. In all places, in the code, there are both the `names` and their `default values`, so if something is not found in the configuration file, the default is used. The lookup is made using B-Trees and hashes (no string comparisons), so they are super fast. Also the `names` of the settings can be `my super duper setting that once set to yes, will turn the world upside down = no` - so goodbye to most of the documentation involved. -Next, netdata can generate a valid configuration for the user to edit. No need to remember anything. Just get the configuration from the server (`/netdata.conf` on your netdata server), edit it and save it. +Next, Netdata can generate a valid configuration for the user to edit. No need to remember anything. Just get the configuration from the server (`/netdata.conf` on your Netdata server), edit it and save it. Last, what about options you believe you have set, but you misspelled?When you get the configuration file from the server, there will be a comment above all `name = value` pairs the server does not use. So you know that whatever you wrote there, is not used. @@ -129,6 +133,6 @@ Last, what about options you believe you have set, but you misspelled?When you g Unix prefers regular expressions. But they are just too hard, too cryptic to use, write and understand. -So, netdata supports [simple patterns](../libnetdata/simple_pattern/). +So, Netdata supports [simple patterns](../libnetdata/simple_pattern/). [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2Fconfiguration-guide&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/docs/generator/buildhtml.sh b/docs/generator/buildhtml.sh index 043112242..e1c108fb5 100755 --- a/docs/generator/buildhtml.sh +++ b/docs/generator/buildhtml.sh @@ -19,18 +19,18 @@ GO_D_DIR="collectors/go.d.plugin" rm -rf ${GO_D_DIR} git clone https://github.com/netdata/go.d.plugin.git ${GO_D_DIR} -# Copy all netdata .md files to docs/generator/src. Exclude htmldoc itself and also the directory node_modules generatord by Netlify +# Copy all Netdata .md files to docs/generator/src. Exclude htmldoc itself and also the directory node_modules generatord by Netlify echo "Copying files" rm -rf ${SRC_DIR} find . -type d \( -path ./${GENERATOR_DIR} -o -path ./node_modules \) -prune -o -name "*.md" -print | cpio -pd ${SRC_DIR} -# Copy netdata html resources +# Copy Netdata html resources cp -a ./${GENERATOR_DIR}/custom ./${SRC_DIR}/ # Modify the first line of the main README.md, to enable proper static html generation echo "Modifying README header" -sed -i -e '0,/# netdata /s//# Introduction\n\n/' ${SRC_DIR}/README.md +sed -i -e '0,/# Netdata /s//# Introduction\n\n/' ${SRC_DIR}/README.md # Remove all GA tracking code find ${SRC_DIR} -name "*.md" -print0 | xargs -0 sed -i -e 's/\[!\[analytics.*UA-64295674-3)\]()//g' @@ -81,7 +81,7 @@ prep_html() { # Build html docs mkdocs build --config-file="${MKDOCS_CONFIG_FILE}" - # Fix edit buttons for the markdowns that are not on the main netdata repo + # Fix edit buttons for the markdowns that are not on the main Netdata repo find "${GENERATOR_DIR}/${SITE_DIR}/${GO_D_DIR}" -name "*.html" -print0 | xargs -0 sed -i -e 's/https:\/\/github.com\/netdata\/netdata\/blob\/master\/collectors\/go.d.plugin/https:\/\/github.com\/netdata\/go.d.plugin\/blob\/master/g' if [ "${lang}" != "en" ] ; then find "${GENERATOR_DIR}/${SITE_DIR}" -name "*.html" -print0 | xargs -0 sed -i -e 's/https:\/\/github.com\/netdata\/netdata\/blob\/master\/\S*md/https:\/\/github.com\/netdata\/localization\//g' diff --git a/docs/generator/buildyaml.sh b/docs/generator/buildyaml.sh index e367ab503..e4a5466a4 100755 --- a/docs/generator/buildyaml.sh +++ b/docs/generator/buildyaml.sh @@ -127,6 +127,7 @@ echo -ne " - 'docs/Demo-Sites.md' - REDISTRIBUTED.md - CHANGELOG.md - CONTRIBUTING.md + - SECURITY.md - Why Netdata: - 'docs/why-netdata/README.md' - 'docs/why-netdata/1s-granularity.md' @@ -139,7 +140,7 @@ echo -ne " - 'docs/Demo-Sites.md' - 'packaging/installer/UPDATE.md' - 'packaging/installer/UNINSTALL.md' - 'docs/GettingStarted.md' -- Running netdata: +- Running Netdata: - 'daemon/README.md' - 'docs/configuration-guide.md' - 'daemon/config/README.md' @@ -228,6 +229,7 @@ navpart 3 collectors/ioping.plugin navpart 3 collectors/freeipmi.plugin navpart 3 collectors/nfacct.plugin navpart 3 collectors/xenstat.plugin +navpart 3 collectors/perf.plugin echo -ne " - 'docs/Third-Party-Plugins.md' @@ -251,9 +253,8 @@ navpart 2 web/api/badges "" "" 2 navpart 2 web/api/health "" "" 2 navpart 2 web/api/queries "" "Queries" 2 -echo -ne "- Hacking netdata: +echo -ne "- Hacking Netdata: - CODE_OF_CONDUCT.md - - 'docs/Netdata-Security-and-Disclosure-Information.md' - CONTRIBUTORS.md " navpart 2 packaging/makeself "" "" 4 diff --git a/docs/generator/checklinks.sh b/docs/generator/checklinks.sh index 6538d39b7..acc144656 100755 --- a/docs/generator/checklinks.sh +++ b/docs/generator/checklinks.sh @@ -21,8 +21,8 @@ printhelp () { By default, nothing is actually checked. The following options tell it what to check: -a Check all link types -w Check wiki links (and just warn if you see one) - -b Check absolute links to the netdata repo (and change them to relative). Only checks links to https://github.com/netdata/netdata/????/master* - -l Check relative links to the netdata repo (and replace them with links that the html static site can live with, under docs/generator/src only) + -b Check absolute links to the Netdata repo (and change them to relative). Only checks links to https://github.com/netdata/netdata/????/master* + -l Check relative links to the Netdata repo (and replace them with links that the html static site can live with, under docs/generator/src only) -e Check external links, outside the wiki or the repo (useless without adding the -u option, to verify that they're not broken) " } @@ -233,7 +233,7 @@ checklinks () { if [ "$CHKWIKI" -eq 1 ] ; then echo "-- WARNING: $f - $lnk points to the wiki. Please replace it manually" ; fi ;; https://github.com/netdata/netdata/????/master* ) - echo "-- ERROR: $f - $lnk is an absolute link to a netdata file. Please convert to relative." + echo "-- ERROR: $f - $lnk is an absolute link to a Netdata file. Please convert to relative." EXITCODE=1 ;; http* ) diff --git a/docs/generator/custom/img/favicon.ico b/docs/generator/custom/img/favicon.ico index 7ed957252..703716cd0 100644 Binary files a/docs/generator/custom/img/favicon.ico and b/docs/generator/custom/img/favicon.ico differ diff --git a/docs/high-performance-netdata.md b/docs/high-performance-netdata.md index a9947d9bc..553ad6da0 100644 --- a/docs/high-performance-netdata.md +++ b/docs/high-performance-netdata.md @@ -1,18 +1,18 @@ -# High performance netdata +# High performance Netdata -If you plan to run a netdata public on the internet, you will get the most performance out of it by following these rules: +If you plan to run a Netdata public on the internet, you will get the most performance out of it by following these rules: ## 1. run behind nginx -The internal web server is optimized to provide the best experience with few clients connected to it. Normally a web browser will make 4-6 concurrent connections to a web server, so that it can send requests in parallel. To best serve a single client, netdata spawns a thread for each connection it receives (so 4-6 threads per connected web browser). +The internal web server is optimized to provide the best experience with few clients connected to it. Normally a web browser will make 4-6 concurrent connections to a web server, so that it can send requests in parallel. To best serve a single client, Netdata spawns a thread for each connection it receives (so 4-6 threads per connected web browser). -If you plan to have your netdata public on the internet, this strategy wastes resources. It provides a lock-free environment so each thread is autonomous to serve the browser, but it does not scale well. Running netdata behind nginx, idle connections to netdata can be reused, thus improving significantly the performance of netdata. +If you plan to have your Netdata public on the internet, this strategy wastes resources. It provides a lock-free environment so each thread is autonomous to serve the browser, but it does not scale well. Running Netdata behind nginx, idle connections to Netdata can be reused, thus improving significantly the performance of Netdata. In the following nginx configuration we do the following: -- allow nginx to maintain up to 1024 idle connections to netdata (so netdata will have up to 1024 threads waiting for requests) +- allow nginx to maintain up to 1024 idle connections to Netdata (so Netdata will have up to 1024 threads waiting for requests) -- allow nginx to compress the responses of netdata (later we will disable gzip compression at netdata) +- allow nginx to compress the responses of Netdata (later we will disable gzip compression at Netdata) - we disable wordpress pingback attacks and allow only GET, HEAD and OPTIONS requests. @@ -65,14 +65,14 @@ Then edit `/etc/netdata/netdata.conf` and set these config options: These options: -- `[global].bind socket to IP = 127.0.0.1` makes netdata listen only for requests from localhost (nginx). -- `[global].access log = none` disables the access.log of netdata. It is not needed since netdata only listens for requests on 127.0.0.1 and thus only nginx can access it. nginx has its own access.log for your record. +- `[global].bind socket to IP = 127.0.0.1` makes Netdata listen only for requests from localhost (nginx). +- `[global].access log = none` disables the access.log of Netdata. It is not needed since Netdata only listens for requests on 127.0.0.1 and thus only nginx can access it. nginx has its own access.log for your record. - `[global].disconnect idle web clients after seconds = 3600` will kill inactive web threads after an hour of inactivity. -- `[global].enable web responses gzip compression = no` disables gzip compression at netdata (nginx will compress the responses). +- `[global].enable web responses gzip compression = no` disables gzip compression at Netdata (nginx will compress the responses). ## 2. increase open files limit (non-systemd) -By default Linux limits open file descriptors per process to 1024. This means that less than half of this number of client connections can be accepted by both nginx and netdata. To increase them, create 2 new files: +By default Linux limits open file descriptors per process to 1024. This means that less than half of this number of client connections can be accepted by both nginx and Netdata. To increase them, create 2 new files: 1. `/etc/security/limits.d/nginx.conf`, with these contents: diff --git a/docs/netdata-for-IoT.md b/docs/netdata-for-IoT.md index 97fba07e6..ca5385438 100644 --- a/docs/netdata-for-IoT.md +++ b/docs/netdata-for-IoT.md @@ -2,7 +2,7 @@ ![image1](https://cloud.githubusercontent.com/assets/2662304/14252446/11ae13c4-fa90-11e5-9d03-d93a3eb3317a.gif) -> New to netdata? Check its demo: **[https://my-netdata.io/](https://my-netdata.io/)** +> New to Netdata? Check its demo: **[https://my-netdata.io/](https://my-netdata.io/)** > > [![User Base](https://registry.my-netdata.io/api/v1/badge.svg?chart=netdata.registry_entries&dimensions=persons&label=user%20base&units=null&value_color=blue&precision=0&v41)](https://registry.my-netdata.io/#netdata_registry) [![Monitored Servers](https://registry.my-netdata.io/api/v1/badge.svg?chart=netdata.registry_entries&dimensions=machines&label=servers%20monitored&units=null&value_color=orange&precision=0&v41)](https://registry.my-netdata.io/#netdata_registry) [![Sessions Served](https://registry.my-netdata.io/api/v1/badge.svg?chart=netdata.registry_sessions&label=sessions%20served&units=null&value_color=yellowgreen&precision=0&v41)](https://registry.my-netdata.io/#netdata_registry) > @@ -10,23 +10,23 @@ --- -netdata is a **very efficient** server performance monitoring solution. When running in server hardware, it can collect thousands of system and application metrics **per second** with just 1% CPU utilization of a single core. Its web server responds to most data requests in about **half a millisecond** making its web dashboards spontaneous, amazingly fast! +Netdata is a **very efficient** server performance monitoring solution. When running in server hardware, it can collect thousands of system and application metrics **per second** with just 1% CPU utilization of a single core. Its web server responds to most data requests in about **half a millisecond** making its web dashboards spontaneous, amazingly fast! -netdata can also be a very efficient real-time monitoring solution for **IoT devices** (RPIs, routers, media players, wifi access points, industrial controllers and sensors of all kinds). Netdata will generally run everywhere a Linux kernel runs (and it is glibc and [musl-libc](https://www.musl-libc.org/) friendly). +Netdata can also be a very efficient real-time monitoring solution for **IoT devices** (RPIs, routers, media players, wifi access points, industrial controllers and sensors of all kinds). Netdata will generally run everywhere a Linux kernel runs (and it is glibc and [musl-libc](https://www.musl-libc.org/) friendly). You can use it as both a data collection agent (where you pull data using its API), for embedding its charts on other web pages / consoles, but also for accessing it directly with your browser to view its dashboard. -The netdata web API already provides **reduce** functions allowing it to report **average** and **max** for any timeframe. It can also respond in many formats including JSON, JSONP, CSV, HTML. Its API is also a **google charts** provider so it can directly be used by google sheets, google charts, google widgets. +The Netdata web API already provides **reduce** functions allowing it to report **average** and **max** for any timeframe. It can also respond in many formats including JSON, JSONP, CSV, HTML. Its API is also a **google charts** provider so it can directly be used by google sheets, google charts, google widgets. ![sensors](https://cloud.githubusercontent.com/assets/2662304/15339745/8be84540-1c8e-11e6-9e9a-106dea7539b6.gif) -Although netdata has been significantly optimized to lower the CPU and RAM resources it consumes, the plethora of data collection plugins may be inappropriate for weak IoT devices. Please follow the guide on [running netdata in embedded devices](Performance.md) +Although Netdata has been significantly optimized to lower the CPU and RAM resources it consumes, the plethora of data collection plugins may be inappropriate for weak IoT devices. Please follow the guide on [running Netdata in embedded devices](Performance.md) ## Monitoring RPi temperature The python version of the sensors plugin uses `lm-sensors`. Unfortunately the temperature reading of RPi are not supported by `lm-sensors`. -netdata also has a bash version of the sensors plugin that can read RPi temperatures. It is disabled by default to avoid the conflicts with the python version. +Netdata also has a bash version of the sensors plugin that can read RPi temperatures. It is disabled by default to avoid the conflicts with the python version. To enable it, run `sudo edit-config charts.d.conf` and uncomment this line: @@ -34,7 +34,7 @@ To enable it, run `sudo edit-config charts.d.conf` and uncomment this line: sensors=force ``` -Then restart netdata. You will get this: +Then restart Netdata. You will get this: ![image](https://user-images.githubusercontent.com/2662304/29658868-23aa65ae-88c5-11e7-9dad-c159600db5cc.png) diff --git a/docs/netdata-security.md b/docs/netdata-security.md index 642881067..955abebd8 100644 --- a/docs/netdata-security.md +++ b/docs/netdata-security.md @@ -163,7 +163,7 @@ If sending this information to the central Netdata registry violates your securi Starting with v1.12 Netdata also collects [anonymous statistics](anonymous-statistics.md) on certain events for: -1. **Quality assurance**, to help us understand if netdata behaves as expected and help us identify repeating issues for certain distributions or environments. +1. **Quality assurance**, to help us understand if Netdata behaves as expected and help us identify repeating issues for certain distributions or environments. 2. **Usage statistics**, to help us focus on the parts of Netdata that are used the most, or help us identify the extent our development decisions influence the community. @@ -171,11 +171,11 @@ To opt-out from sending anonymous statistics, you can create a file called `.opt ## Netdata directories -path|owner|permissions| netdata |comments| +path|owner|permissions| Netdata |comments| :---|:----|:----------|:--------|:-------| -`/etc/netdata`|user `root`
group `netdata`|dirs `0755`
files `0640`|reads|**netdata config files**
may contain sensitive information, so group `netdata` is allowed to read them. -`/usr/libexec/netdata`|user `root`
group `root`|executable by anyone
dirs `0755`
files `0644` or `0755`|executes|**netdata plugins**
permissions depend on the file - not all of them should have the executable flag.
there are a few plugins that run with escalated privileges (Linux capabilities or `setuid`) - these plugins should be executable only by group `netdata`. -`/usr/share/netdata`|user `root`
group `netdata`|readable by anyone
dirs `0755`
files `0644`|reads and sends over the network|**Netdata web static files**
these files are sent over the network to anyone that has access to the netdata web server. Netdata checks the ownership of these files (using settings at the `[web]` section of `netdata.conf`) and refuses to serve them if they are not properly owned. Symbolic links are not supported. Netdata also refuses to serve URLs with `..` in their name. +`/etc/netdata`|user `root`
group `netdata`|dirs `0755`
files `0640`|reads|**Netdata config files**
may contain sensitive information, so group `netdata` is allowed to read them. +`/usr/libexec/netdata`|user `root`
group `root`|executable by anyone
dirs `0755`
files `0644` or `0755`|executes|**Netdata plugins**
permissions depend on the file - not all of them should have the executable flag.
there are a few plugins that run with escalated privileges (Linux capabilities or `setuid`) - these plugins should be executable only by group `netdata`. +`/usr/share/netdata`|user `root`
group `netdata`|readable by anyone
dirs `0755`
files `0644`|reads and sends over the network|**Netdata web static files**
these files are sent over the network to anyone that has access to the Netdata web server. Netdata checks the ownership of these files (using settings at the `[web]` section of `netdata.conf`) and refuses to serve them if they are not properly owned. Symbolic links are not supported. Netdata also refuses to serve URLs with `..` in their name. `/var/cache/netdata`|user `netdata`
group `netdata`|dirs `0750`
files `0660`|reads, writes, creates, deletes|**Netdata ephemeral database files**
Netdata stores its ephemeral real-time database here. `/var/lib/netdata`|user `netdata`
group `netdata`|dirs `0750`
files `0660`|reads, writes, creates, deletes|**Netdata permanent database files**
Netdata stores here the registry data, health alarm log db, etc. `/var/log/netdata`|user `netdata`
group `root`|dirs `0755`
files `0644`|writes, creates|**Netdata log files**
all the Netdata applications, logs their errors or other informational messages to files in this directory. These files should be log rotated. diff --git a/docs/privacy-policy.md b/docs/privacy-policy.md index af50b8851..e46d783ed 100644 --- a/docs/privacy-policy.md +++ b/docs/privacy-policy.md @@ -32,21 +32,34 @@ Note that you can learn about Google’s practices in connection with its analyt Information from Cookies: We and our service providers (for example, Google Analytics as described above) may collect information using cookies or similar technologies for the purposes described above and below. Cookies are pieces of information that are stored by your browser on the hard drive or memory of your computer or other Internet access device. Cookies may enable us to personalize your experience on the Services, maintain a persistent session, passively collect demographic information about your computer, and monitor advertisements and other activities. The Websites may use different kinds of cookies and other types of local storage (such as browser-based or plugin-based local storage). +ND Registry: The global registry, together with certain browser features, allow Netdata to provide unified cross-server dashboards, via the node menu. +The menu lists the Netdata servers you have visited. For example, when you jump from server to server using the node menu, several session settings +(like the currently viewed charts, the current zoom and pan operations on the charts, etc.) are propagated to the new server, so that the new dashboard will come with exactly the +same view. The global registry keeps track of 4 entities: -ND Registry: The global registry, together with certain browser features, allow netdata to provide unified cross-server dashboards, via the `my-netdata` menu. The menu lists the netdata servers you have visited. For example, when you jump from server to server using the `my-netdata` menu, several session settings (like the currently viewed charts, the current zoom and pan operations on the charts, etc.) are propagated to the new server, so that the new dashboard will come with exactly the same view. The global registry keeps track of 3 entities: +1. **machines**: i.e. the netdata installations (a random GUID generated by each netdata the first time it starts; we call this **machine_guid**) -1. **machines**: i.e. the netdata installations (a random GUID generated by each netdata the first time it starts; we call this **machine_guid**). For each netdata installation (each `machine_guid`) the registry keeps track of the different URLs it is accessed. + For each netdata installation (each `machine_guid`) the registry keeps track of the different URLs it is accessed. -2. **persons**: i.e. the web browsers accessing the netdata installations (a random GUID generated by the registry the first time it sees a new web browser; we call this **person_guid**). For each person, the registry keeps track of the netdata installations it has accessed and their URLs. +2. **persons**: i.e. the web browsers accessing the netdata installations (a random GUID generated by the registry the first time it sees a new web browser; we call this **person_guid**) -3. **URLs** of netdata installations (as seen by the web browsers). For each URL, the registry keeps the URL and nothing more. Each URL is linked to *persons* and *machines*. The only way to find a URL is to know its **machine_guid** or have a **person_guid** it is linked to it. + For each person, the registry keeps track of the netdata installations it has accessed and their URLs. + +3. **URLs** of netdata installations (as seen by the web browsers) + + For each URL, the registry keeps the URL and nothing more. Each URL is linked to *persons* and *machines*. The only way to find a URL is to know its **machine_guid** or have a **person_guid** it is linked to it. + +4. **accounts**: i.e. the information used to sign-in via one of the available sign-in methods. Depending on the method, this may include an email, an email and a profile picture. + +For *persons*/*accounts* and *machines*, the registry keeps links to *URLs*, each link with 2 timestamps (first time seen, last time seen) and a counter (number of times it has been seen). +*machines*, *persons*, and timestamps are stored in the netdata registry regardless of whether you sign in or not. If sending this information is against your policies, you can [run your own registry](../registry/#run-your-own-registry). Note that ND versions with the 'Sign in' feature of the ND Cloud do not use the global registry. ND Cloud: When you sign up to obtain a user account via the 'Sign in' link on the ND agent user interface, ND is granted access to personal information in the user profile of the authentication provider you choose (e.g. GitHub or Google). ND collects and uses this personal information pursuant to its legitimate interest in establishing and maintaining your account providing you with the features we provide Registered Users. We may use your email address to contact you regarding changes to this policy or other applicable policies. The login name or email address of your profile may be used to attribute you in connection with any content you submit to any Service. -Anonymous Usage Statistics: From Netdata v1.12 and above, anonymous usage information is collected by default on certain events of the ND daemon and send to Google Analytics. Every time the daemon is started or stopped and every time a fatal condition is encountered, netdata collects system information and sends it to GA via an http call. The information collected for all events is: +Anonymous Usage Statistics: From Netdata v1.12 and above, anonymous usage information is collected by default on certain events of the ND daemon and send to Google Analytics. Every time the daemon is started or stopped and every time a fatal condition is encountered, Netdata collects system information and sends it to GA via an http call. The information collected for all events is: - Netdata version - OS name, version, id, id_like - Kernel name, version, architecture @@ -56,9 +69,9 @@ Furthermore, the FATAL event sends the Netdata process & thread info, along with The statistics calculated from this information are used for: -1. **Quality assurance**, to help us understand if netdata behaves as expected and help us identify repeating issues for certain distributions or environment. +1. **Quality assurance**, to help us understand if Netdata behaves as expected and help us identify repeating issues for certain distributions or environment. -2. **Usage statistics**, to help us focus on the parts of netdata that are used the most, or help us identify the extend our development decisions influence the community. +2. **Usage statistics**, to help us focus on the parts of Netdata that are used the most, or help us identify the extend our development decisions influence the community. To opt-out from sending anonymous statistics, you can create reate a file called `.opt-out-from-anonymous-statistics` under the user configuration directory (usually `/etc/netdata`). diff --git a/docs/why-netdata/1s-granularity.md b/docs/why-netdata/1s-granularity.md index 089854543..0d12a2d41 100644 --- a/docs/why-netdata/1s-granularity.md +++ b/docs/why-netdata/1s-granularity.md @@ -34,13 +34,13 @@ So, the monitoring industry fails to massively provide high resolution metrics, 2. Data collection needs optimization, otherwise it will significantly affect the monitored systems. 3. Data collection is a lot harder, especially on busy virtual environments. -## What does netdata do differently? +## What does Netdata do differently? Netdata decentralizes monitoring completely. Each Netdata node is autonomous. It collects metrics locally, it stores them locally, it runs checks against them to trigger alarms locally, and provides an API for the dashboards to visualize them. This allows Netdata to scale to infinity. Of course, Netdata can centralize metrics when needed. For example, it is not practical to keep metrics locally on ephemeral nodes. For these cases, Netdata streams the metrics in real-time, from the ephemeral nodes to one or more non-ephemeral nodes nearby. This centralization is again distributed. On a large infrastructure, there may be many centralization points. -To eliminate the error introduced by data collection latencies on busy virtual environments, Netdata interpolates collected metrics. It does this using microsecond timings, per data source, offering measurements with an error rate of 0.0001%. When running [in debug mode, netdata calculates this error rate](https://github.com/netdata/netdata/blob/36199f449852f8077ea915a3a14a33fa2aff6d85/database/rrdset.c#L1070-L1099) for every point collected, ensuring that the database works with acceptable accuracy. +To eliminate the error introduced by data collection latencies on busy virtual environments, Netdata interpolates collected metrics. It does this using microsecond timings, per data source, offering measurements with an error rate of 0.0001%. When running [in debug mode, Netdata calculates this error rate](https://github.com/netdata/netdata/blob/36199f449852f8077ea915a3a14a33fa2aff6d85/database/rrdset.c#L1070-L1099) for every point collected, ensuring that the database works with acceptable accuracy. Finally, Netdata is really fast. Optimization is a core product feature. On modern hardware, Netdata can collect metrics with a rate of above 1M metrics per second per core (this includes everything, parsing data sources, interpolating data, storing data in the time series database, etc). So, for a few thousands metrics per second per node, Netdata needs negligible CPU resources (just 1-2% of a single core). diff --git a/docs/why-netdata/immediate-results.md b/docs/why-netdata/immediate-results.md index 9afe4afdc..123336711 100644 --- a/docs/why-netdata/immediate-results.md +++ b/docs/why-netdata/immediate-results.md @@ -20,7 +20,7 @@ Open-source solutions rely almost entirely on configuration. So, you have to go Monitoring SaaS providers offer a very basic set of pre-configured metrics, dashboards and alarms. They assume that you will configure the rest you may need. So, once more, the result will reflect your skills, your experience, your understanding. -## What does netdata do? +## What does Netdata do? 1. Metrics are auto-detected, so for 99% of the cases data collection works out of the box. 2. Metrics are converted to human readable units, right after data collection, before storing them into the database. diff --git a/docs/why-netdata/meaningful-presentation.md b/docs/why-netdata/meaningful-presentation.md index 6414d023f..f6fd07560 100644 --- a/docs/why-netdata/meaningful-presentation.md +++ b/docs/why-netdata/meaningful-presentation.md @@ -42,9 +42,9 @@ Of course, it is just not practical to work that way when the database has 10,00 So, they collect very limited metrics. Basic dashboards can be created with these metrics, but for any issue that needs to be troubleshooted, the monitoring system is just not adequate. It cannot help. So, engineers are using the console to access the rest of the metrics and find the root cause. -## What does netdata do? +## What does Netdata do? -In netdata, the meaning of metrics is incorporated into the database: +In Netdata, the meaning of metrics is incorporated into the database: 1. all metrics are converted and stored to human-friendly units. This is a data-collection process, not a visualization process. For example, cpu utilization in Netdata is stored as percentage, not as kernel ticks. diff --git a/docs/why-netdata/unlimited-metrics.md b/docs/why-netdata/unlimited-metrics.md index e35034a2b..a4ecaf3f2 100644 --- a/docs/why-netdata/unlimited-metrics.md +++ b/docs/why-netdata/unlimited-metrics.md @@ -33,12 +33,12 @@ They can't do otherwise! 2. It is a lot easier to provide an illusion of monitoring by using a few basic metrics. 3. Troubleshooting slowdowns is the hardest IT problem to solve, so most solutions just avoid it. -## What does netdata do? +## What does Netdata do? Netdata collects, stores and visualizes everything, every single metric exposed by systems and applications. Due to Netdata's distributed nature, the number of metrics collected does not have any noticeable effect on the performance or the cost of the monitoring infrastructure. -Of course, since netdata is also about [meaningful presentation](meaningful-presentation.md), the number of metrics makes Netdata development slower. We, the Netdata developers, need to have a good understanding of the metrics before adding them into Netdata. We need to organize the metrics, add information related to them, configure alarms for them, so that you, the Netdata users, will have the best out-of-the-box experience and all the information required to kill the console for troubleshooting slowdowns. +Of course, since Netdata is also about [meaningful presentation](meaningful-presentation.md), the number of metrics makes Netdata development slower. We, the Netdata developers, need to have a good understanding of the metrics before adding them into Netdata. We need to organize the metrics, add information related to them, configure alarms for them, so that you, the Netdata users, will have the best out-of-the-box experience and all the information required to kill the console for troubleshooting slowdowns. [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdocs%2Fwhy-netdata%2Funlimited-metrics&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/health/Makefile.am b/health/Makefile.am index 62a4c6d33..5310bd8aa 100644 --- a/health/Makefile.am +++ b/health/Makefile.am @@ -35,6 +35,7 @@ dist_healthconfig_DATA = \ health.d/cpu.conf \ health.d/couchdb.conf \ health.d/disks.conf \ + health.d/dnsmasq_dhcp.conf \ health.d/dockerd.conf \ health.d/elasticsearch.conf \ health.d/entropy.conf \ @@ -62,13 +63,16 @@ dist_healthconfig_DATA = \ health.d/netfilter.conf \ health.d/nginx.conf \ health.d/nginx_plus.conf \ + health.d/pihole.conf \ health.d/phpfpm.conf \ health.d/portcheck.conf \ health.d/postgres.conf \ + health.d/processes.conf \ health.d/qos.conf \ health.d/ram.conf \ health.d/redis.conf \ health.d/retroshare.conf \ + health.d/riakkv.conf \ health.d/softnet.conf \ health.d/squid.conf \ health.d/stiebeleltron.conf \ @@ -81,6 +85,8 @@ dist_healthconfig_DATA = \ health.d/udp_errors.conf \ health.d/varnish.conf \ health.d/web_log.conf \ + health.d/wmi.conf \ health.d/x509check.conf \ health.d/zfs.conf \ + health.d/dbengine.conf \ $(NULL) diff --git a/health/README.md b/health/README.md index 54f6a3e1f..81cc043d0 100644 --- a/health/README.md +++ b/health/README.md @@ -11,7 +11,6 @@ packet dropped). Netdata also supports alarm **templates**, so that an alarm can be attached to all the charts of the same context (i.e. all network interfaces, or all disks, or all mysql servers, etc.). - Each alarm can execute a single query to the database using statistical algorithms against past data, but alarms can be combined. So, if you need 2 queries in the database, you can combine 2 alarms together (both will run a query to the database, and the results can be combined). @@ -342,6 +341,24 @@ delay: [[[up U] [down D] multiplier M] max X] their matching one) and a delay is in place. - All are reset to their defaults when the alarm switches state without a delay in place. +--- + +#### Alarm line `repeat` + +Defines the interval between repeating notifications for the alarms in CRITICAL or WARNING mode. This will override the default interval settings inherited from health settings in `netdata.conf`. The default settings for repeating notifications are `default repeat warning = DURATION` and `default repeat critical = DURATION` which can be found in health stock configuration. + +Format: + +``` +repeat: [off] [warning DURATION] [critical DURATION] +``` + +* `off`: Turns off the repeating feature for the current alarm. This is effective when the default repeat settings has been enabled in health configuration. +* `warning DURATION`: Defines the interval when the alarm is in WARNING state. Use `0s` to turn off the repeating notification for WARNING mode. +* `critical DURATION`: Defines the interval when the alarm is in CRITICAL state. Use `0s` to turn off the repeating notification for CRITICAL mode. + +--- + #### Alarm line `option` The only possible value for the `option` line is @@ -567,12 +584,15 @@ template: disk_full_percent every: 1m warn: $this > 80 crit: $this > 95 + repeat: warning 120s critical 10s ``` `$used` and `$avail` are the `used` and `avail` chart dimensions as shown on the dashboard. So, the `calc` line finds the percentage of used space. `$this` resolves to this percentage. +This is a repeating alarm and if the alarm becomes CRITICAL it repeats the notifications every 10 seconds. It also repeats notifications every 2 minutes if the alarm goes into WARNING mode. + ### Example 3 Predict if any disk will run out of space in the near future. diff --git a/health/health.c b/health/health.c index f92a1ba6b..55bd72843 100644 --- a/health/health.c +++ b/health/health.c @@ -13,18 +13,74 @@ unsigned int default_health_enabled = 1; // ---------------------------------------------------------------------------- // health initialization +/** + * User Config directory + * + * Get the config directory for health and return it. + * + * @return a pointer to the user config directory + */ inline char *health_user_config_dir(void) { char buffer[FILENAME_MAX + 1]; snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_user_config_dir); return config_get(CONFIG_SECTION_HEALTH, "health configuration directory", buffer); } +/** + * Stock Config Directory + * + * Get the Stock config directory and return it. + * + * @return a pointer to the stock config directory. + */ inline char *health_stock_config_dir(void) { char buffer[FILENAME_MAX + 1]; snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_stock_config_dir); return config_get(CONFIG_SECTION_HEALTH, "stock health configuration directory", buffer); } +/** + * Silencers init + * + * Function used to initialize the silencer structure. + */ +void health_silencers_init(void) { + struct stat statbuf; + if (!stat(silencers_filename,&statbuf)) { + off_t length = statbuf.st_size; + if (length && length < HEALTH_SILENCERS_MAX_FILE_LEN) { + FILE *fd = fopen(silencers_filename, "r"); + if (fd) { + char *str = mallocz((length+1)* sizeof(char)); + if(str) { + size_t copied; + copied = fread(str, sizeof(char), length, fd); + if (copied == (length* sizeof(char))) { + str[length] = 0x00; + json_parse(str, NULL, health_silencers_json_read_callback); + info("Parsed health silencers file %s", silencers_filename); + } else { + error("Cannot read the data from health silencers file %s", silencers_filename); + } + freez(str); + } + fclose(fd); + } else { + error("Cannot open the file %s",silencers_filename); + } + } else { + error("Health silencers file %s has the size %ld that is out of range[ 1 , %d ]. Aborting read.", silencers_filename, length, HEALTH_SILENCERS_MAX_FILE_LEN); + } + } else { + error("Cannot open the file %s",silencers_filename); + } +} + +/** + * Health Init + * + * Initialize the health thread. + */ void health_init(void) { debug(D_HEALTH, "Health configuration initializing"); @@ -32,11 +88,20 @@ void health_init(void) { debug(D_HEALTH, "Health is disabled."); return; } + + health_silencers_init(); } // ---------------------------------------------------------------------------- // re-load health configuration +/** + * Reload host + * + * Reload configuration for a specific host. + * + * @param host the structure of the host that the function will reload the configuration. + */ void health_reload_host(RRDHOST *host) { if(unlikely(!host->health_enabled)) return; @@ -84,6 +149,11 @@ void health_reload_host(RRDHOST *host) { rrdhost_unlock(host); } +/** + * Reload + * + * Reload the host configuration for all hosts. + */ void health_reload(void) { rrd_rdlock(); @@ -255,17 +325,18 @@ static inline void health_alarm_log_process(RRDHOST *host) { netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); ALARM_ENTRY *ae; - for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id ; ae = ae->next) { - if(unlikely( - !(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) && - !(ae->flags & HEALTH_ENTRY_FLAG_UPDATED) + for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id; ae = ae->next) { + if(likely(!alarm_entry_isrepeating(host, ae))) { + if(unlikely( + !(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) && + !(ae->flags & HEALTH_ENTRY_FLAG_UPDATED) )) { + if(unlikely(ae->unique_id < first_waiting)) + first_waiting = ae->unique_id; - if(unlikely(ae->unique_id < first_waiting)) - first_waiting = ae->unique_id; - - if(likely(now >= ae->delay_up_to_timestamp)) - health_process_notifications(host, ae); + if(likely(now >= ae->delay_up_to_timestamp)) + health_process_notifications(host, ae); + } } } @@ -294,10 +365,12 @@ static inline void health_alarm_log_process(RRDHOST *host) { ALARM_ENTRY *t = ae->next; - health_alarm_log_free_one_nochecks_nounlink(ae); + if(likely(!alarm_entry_isrepeating(host, ae))) { + health_alarm_log_free_one_nochecks_nounlink(ae); + host->health_log.count--; + } ae = t; - host->health_log.count--; } netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); @@ -411,7 +484,7 @@ SILENCE_TYPE check_silenced(RRDCALC *rc, char* host, SILENCERS *silencers) { debug(D_HEALTH, "Alarm %s matched a silence entry, but no SILENCE or DISABLE command was issued via the command API. The match has no effect.", rc->name); } else { debug(D_HEALTH, "Alarm %s via the command API - name:%s context:%s chart:%s host:%s family:%s" - , (silencers->stype==STYPE_DISABLE_ALARMS)?"Disabled":"Silenced" + , (silencers->stype == STYPE_DISABLE_ALARMS)?"Disabled":"Silenced" , rc->name , (rc->rrdset)?rc->rrdset->context:"" , rc->chart @@ -425,6 +498,16 @@ SILENCE_TYPE check_silenced(RRDCALC *rc, char* host, SILENCERS *silencers) { return STYPE_NONE; } +/** + * Update Disabled Silenced + * + * Update the variable rrdcalc_flags of the structure RRDCALC according with the values of the host structure + * + * @param host structure that contains information about the host monitored. + * @param rc structure with information about the alarm + * + * @return It returns 1 case rrdcalc_flags is DISABLED or 0 otherwise + */ int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) { uint32_t rrdcalc_flags_old = rc->rrdcalc_flags; // Clear the flags @@ -454,6 +537,15 @@ int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) { return 0; } +/** + * Health Main + * + * The main thread of the health system. In this function all the alarms will be processed. + * + * @param ptr is a pointer to the netdata_static_thread structure. + * + * @return It always returns NULL + */ void *health_main(void *ptr) { netdata_thread_cleanup_push(health_main_cleanup, ptr); @@ -464,12 +556,6 @@ void *health_main(void *ptr) { time_t hibernation_delay = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60); unsigned int loop = 0; - - silencers = mallocz(sizeof(SILENCERS)); - silencers->all_alarms=0; - silencers->stype=STYPE_NONE; - silencers->silencers=NULL; - while(!netdata_exit) { loop++; debug(D_HEALTH, "Health monitoring iteration no %u started", loop); @@ -756,20 +842,22 @@ void *health_main(void *ptr) { rc->delay_last = delay; rc->delay_up_to_timestamp = now + delay; - health_alarm_log( - host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, - rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, - rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, - rc->delay_last, - ( - ((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) | - ((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0) - ) - - ); - - rc->last_status_change = now; - rc->status = status; + if(likely(!rrdcalc_isrepeating(rc))) { + ALARM_ENTRY *ae = health_create_alarm_entry( + host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, + rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, + rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, + rc->delay_last, + ( + ((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) | + ((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0) + ) + ); + health_alarm_log(host, ae); + } + rc->last_status_change = now; + rc->old_status = rc->status; + rc->status = status; } rc->last_updated = now; @@ -779,6 +867,35 @@ void *health_main(void *ptr) { next_run = rc->next_update; } + // process repeating alarms + RRDCALC *rc; + for(rc = host->alarms; rc ; rc = rc->next) { + int repeat_every = 0; + if(unlikely(rrdcalc_isrepeating(rc))) { + if(unlikely(rc->status == RRDCALC_STATUS_WARNING)) + repeat_every = rc->warn_repeat_every; + else if(unlikely(rc->status == RRDCALC_STATUS_CRITICAL)) + repeat_every = rc->crit_repeat_every; + } + if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) { + rc->last_repeat = now; + ALARM_ENTRY *ae = health_create_alarm_entry( + host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, + rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, + rc->old_value, rc->value, rc->old_status, rc->status, rc->source, rc->units, rc->info, + rc->delay_last, + ( + ((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) | + ((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0) + ) + ); + ae->last_repeat = rc->last_repeat; + health_process_notifications(host, ae); + debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id); + health_alarm_log_free_one_nochecks_nounlink(ae); + } + } + rrdhost_unlock(host); } diff --git a/health/health.d/dbengine.conf b/health/health.d/dbengine.conf new file mode 100644 index 000000000..7a623ba2b --- /dev/null +++ b/health/health.d/dbengine.conf @@ -0,0 +1,26 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + + alarm: 10min_dbengine_global_fs_errors + on: netdata.dbengine_global_errors + os: linux freebsd macos + hosts: * + lookup: sum -10m unaligned of FS errors + units: errors + every: 10s + crit: $this > 0 + delay: down 15m multiplier 1.5 max 1h + info: number of File-System errors dbengine came across the last 10 minutes (too many open files, wrong permissions etc) + to: sysadmin + + alarm: 10min_dbengine_global_io_errors + on: netdata.dbengine_global_errors + os: linux freebsd macos + hosts: * + lookup: sum -10m unaligned of I/O errors + units: errors + every: 10s + crit: $this > 0 + delay: down 1h multiplier 1.5 max 3h + info: number of IO errors dbengine came across the last 10 minutes (out of space, bad disk etc) + to: sysadmin diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf index 26f85848a..9c194ced2 100644 --- a/health/health.d/disks.conf +++ b/health/health.d/disks.conf @@ -13,7 +13,7 @@ template: disk_space_usage on: disk.space os: linux freebsd hosts: * -families: * +families: !/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % every: 1m @@ -27,7 +27,7 @@ template: disk_inode_usage on: disk.inodes os: linux freebsd hosts: * -families: * +families: !/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % every: 1m diff --git a/health/health.d/dnsmasq_dhcp.conf b/health/health.d/dnsmasq_dhcp.conf new file mode 100644 index 000000000..b7eb4e0a3 --- /dev/null +++ b/health/health.d/dnsmasq_dhcp.conf @@ -0,0 +1,12 @@ + # dhcp-range utilization + + template: dnsmasq_dhcp_dhcp_range_utilization + on: dnsmasq_dhcp.dhcp_range_utilization + every: 10s + units: % + calc: $used + warn: $this > ( ($status >= $WARNING ) ? ( 80 ) : ( 90 ) ) + crit: $this > ( ($status >= $CRITICAL) ? ( 90 ) : ( 95 ) ) + delay: down 5m + info: dhcp-range utilization above threshold! + to: sysadmin diff --git a/health/health.d/pihole.conf b/health/health.d/pihole.conf new file mode 100644 index 000000000..4a1217239 --- /dev/null +++ b/health/health.d/pihole.conf @@ -0,0 +1,67 @@ + + # Make sure Pi-hole is responding. + +template: pihole_last_collected_secs + on: pihole.dns_queries_total + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: webmaster + + # Blocked DNS queries. + + template: pihole_blocked_queries + on: pihole.dns_queries_percentage + every: 10s + units: % + calc: $blocked + warn: $this > ( ($status >= $WARNING ) ? ( 45 ) : ( 55 ) ) + crit: $this > ( ($status >= $CRITICAL) ? ( 55 ) : ( 75 ) ) + delay: up 2m down 5m + info: percentage of blocked dns queries for the last 24 hour + to: sysadmin + + + # Blocklist last update time. + # Default update interval is a week. + + template: pihole_blocklist_last_update + on: pihole.blocklist_last_update + every: 10s + units: seconds + calc: $ago + warn: $this > 60 * 60 * 24 * 8 + crit: $this > 60 * 60 * 24 * 8 * 2 + info: blocklist last update time + to: sysadmin + + + # Gravity file check (gravity.list). + + template: pihole_blocklist_gravity_file + on: pihole.blocklist_last_update + every: 10s + units: boolean + calc: $file_exists + crit: $this != 1 + delay: up 2m down 5m + info: gravity file existence + to: sysadmin + + + # Pi-hole's ability to block unwanted domains. + # Should be enabled. The whole point of Pi-hole! + + template: pihole_status + on: pihole.unwanted_domains_blocking_status + every: 10s + units: boolean + calc: $enabled + warn: $this != 1 + delay: up 2m down 5m + info: unwanted domains blocking status + to: sysadmin diff --git a/health/health.d/processes.conf b/health/health.d/processes.conf new file mode 100644 index 000000000..d96998fdf --- /dev/null +++ b/health/health.d/processes.conf @@ -0,0 +1,27 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + + alarm: active_processes_limit_freebsd + on: system.active_processes + os: freebsd + hosts: * + calc: $active + units: processes + every: 5s + warn: $this > (($status >= $WARNING) ? (75000) : (80000)) + crit: $this > (($status == $CRITICAL) ? (85000) : (90000)) + delay: down 5m multiplier 1.5 max 1h + info: the number of active processes + to: sysadmin + + alarm: active_processes_limit + on: system.active_processes + os: linux + hosts: * + calc: $active + units: processes + every: 5s + warn: $this > (($status >= $WARNING) ? (25000) : (26000)) + crit: $this > (($status == $CRITICAL) ? (28000) : (30000)) + delay: down 5m multiplier 1.5 max 1h + info: number of active processes + to: sysadmin diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf index 93883f73b..4e41bb496 100644 --- a/health/health.d/ram.conf +++ b/health/health.d/ram.conf @@ -27,7 +27,7 @@ on: mem.available os: linux hosts: * - calc: ($avail + $used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers) + calc: ($avail + $system.ram.used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers) units: % every: 10s warn: $this < (($status >= $WARNING) ? (15) : (10)) diff --git a/health/health.d/riakkv.conf b/health/health.d/riakkv.conf new file mode 100644 index 000000000..745302778 --- /dev/null +++ b/health/health.d/riakkv.conf @@ -0,0 +1,80 @@ +# Ensure that Riak is running. template: riak_last_collected_secs +template: riak_last_collected_secs + on: riak.kv.throughput + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: dba + +# Warn if a list keys operation is running. +template: riak_list_keys_active + on: riak.core.fsm_active + calc: $list_fsm_active + units: state machines + every: 10s + warn: $list_fsm_active > 0 + info: number of currently running list keys finite state machines + to: dba + + +## Timing healthchecks +# KV GET +template: 1h_kv_get_mean_latency + on: riak.kv.latency.get + calc: $node_get_fsm_time_mean + lookup: average -1h unaligned of time + every: 30s + units: ms + info: mean average KV GET latency over the last hour + +template: riak_kv_get_slow + on: riak.kv.latency.get + calc: $mean + lookup: average -3m unaligned of time + units: ms + every: 10s + warn: ($this > ($1h_kv_get_mean_latency * 2) ) + crit: ($this > ($1h_kv_get_mean_latency * 3) ) + info: average KV GET time over the last 3 minutes, compared to the average over the last hour + delay: down 5m multiplier 1.5 max 1h + to: dba + +# KV PUT +template: 1h_kv_put_mean_latency + on: riak.kv.latency.put + calc: $node_put_fsm_time_mean + lookup: average -1h unaligned of time + every: 30s + units: ms + info: mean average KV PUT latency over the last hour + +template: riak_kv_put_slow + on: riak.kv.latency.put + calc: $mean + lookup: average -3m unaligned of time + units: ms + every: 10s + warn: ($this > ($1h_kv_put_mean_latency * 2) ) + crit: ($this > ($1h_kv_put_mean_latency * 3) ) + info: average KV PUT time over the last 3 minutes, compared to the average over the last hour + delay: down 5m multiplier 1.5 max 1h + to: dba + + +## VM healthchecks + +# Default Erlang VM process limit: 262144 +# On systems observed, this is < 2000, but may grow depending on load. +template: riak_vm_high_process_count + on: riak.vm + calc: $sys_process_count + units: processes + every: 10s + warn: $this > 10000 + crit: $this > 100000 + info: number of processes running in the Erlang VM (the default limit on ERTS 10.2.4 is 262144) + to: dba diff --git a/health/health.d/wmi.conf b/health/health.d/wmi.conf new file mode 100644 index 000000000..0441fc1f3 --- /dev/null +++ b/health/health.d/wmi.conf @@ -0,0 +1,130 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + +## Availability + +template: wmi_last_collected_secs + on: cpu.collector_duration + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +## CPU + +template: wmi_10min_cpu_usage + on: wmi.cpu_utilization_total + os: linux + hosts: * + lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: cpu utilization for the last 10 minutes + to: sysadmin + + +## Memory + +template: wmi_ram_in_use + on: wmi.memory_utilization + os: linux + hosts: * + calc: ($used) * 100 / ($used + $available) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + info: used RAM + to: sysadmin + +template: wmi_swap_in_use + on: wmi.memory_swap_utilization + os: linux + hosts: * + calc: ($used) * 100 / ($used + $available) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + info: used Swap + to: sysadmin + + +## Network + +template: inbound_packets_discarded + on: wmi.net_discarded + os: linux + hosts: * +families: * + lookup: sum -10m unaligned absolute match-names of inbound + units: packets + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: interface inbound discarded packets in the last 10 minutes + to: sysadmin + +template: outbound_packets_discarded + on: wmi.net_discarded + os: linux + hosts: * +families: * + lookup: sum -10m unaligned absolute match-names of outbound + units: packets + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: interface outbound discarded packets in the last 10 minutes + to: sysadmin + +template: inbound_packets_errors + on: wmi.net_errors + os: linux + hosts: * +families: * + lookup: sum -10m unaligned absolute match-names of inbound + units: packets + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: interface inbound errors in the last 10 minutes + to: sysadmin + +template: outbound_packets_errors + on: wmi.net_errors + os: linux + hosts: * +families: * + lookup: sum -10m unaligned absolute match-names of outbound + units: packets + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: interface outbound errors in the last 10 minutes + to: sysadmin + + +## Disk + +template: wmi_disk_in_use + on: wmi.logical_disk_utilization + os: linux + hosts: * + calc: ($used) * 100 / ($used + $free) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + info: used disk space + to: sysadmin diff --git a/health/health.d/x509check.conf b/health/health.d/x509check.conf index dc0e6c695..a56f48fc3 100644 --- a/health/health.d/x509check.conf +++ b/health/health.d/x509check.conf @@ -1,4 +1,18 @@ +# make sure x509check is running + +template: x509check_last_collected_secs + on: x509check.time_until_expiration + calc: $now - $last_collected_t + units: seconds ago + every: 60s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: webmaster + + template: x509check_days_until_expiration on: x509check.time_until_expiration calc: $expiry diff --git a/health/health.h b/health/health.h index 1511f3648..6920d12de 100644 --- a/health/health.h +++ b/health/health.h @@ -35,16 +35,7 @@ extern unsigned int default_health_enabled; #define HEALTH_LISTEN_BACKLOG 4096 #endif -#define HEALTH_ALARM_KEY "alarm" -#define HEALTH_TEMPLATE_KEY "template" #define HEALTH_ON_KEY "on" -#define HEALTH_CONTEXT_KEY "context" -#define HEALTH_CHART_KEY "chart" -#define HEALTH_HOST_KEY "hosts" -#define HEALTH_OS_KEY "os" -#define HEALTH_FAMILIES_KEY "families" -#define HEALTH_LOOKUP_KEY "lookup" -#define HEALTH_CALC_KEY "calc" #define HEALTH_EVERY_KEY "every" #define HEALTH_GREEN_KEY "green" #define HEALTH_RED_KEY "red" @@ -57,38 +48,9 @@ extern unsigned int default_health_enabled; #define HEALTH_DELAY_KEY "delay" #define HEALTH_OPTIONS_KEY "options" -typedef struct silencer { - char *alarms; - SIMPLE_PATTERN *alarms_pattern; +#define HEALTH_SILENCERS_MAX_FILE_LEN 10000 - char *hosts; - SIMPLE_PATTERN *hosts_pattern; - - char *contexts; - SIMPLE_PATTERN *contexts_pattern; - - char *charts; - SIMPLE_PATTERN *charts_pattern; - - char *families; - SIMPLE_PATTERN *families_pattern; - - struct silencer *next; -} SILENCER; - -typedef enum silence_type { - STYPE_NONE, - STYPE_DISABLE_ALARMS, - STYPE_SILENCE_NOTIFICATIONS -} SILENCE_TYPE; - -typedef struct silencers { - int all_alarms; - SILENCE_TYPE stype; - SILENCER *silencers; -} SILENCERS; - -SILENCERS *silencers; +char *silencers_filename; extern void health_init(void); extern void *health_main(void *ptr); @@ -108,7 +70,7 @@ extern void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae); extern ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename); extern void health_alarm_log_load(RRDHOST *host); -extern void health_alarm_log( +extern ALARM_ENTRY* health_create_alarm_entry( RRDHOST *host, uint32_t alarm_id, uint32_t alarm_event_id, @@ -129,6 +91,8 @@ extern void health_alarm_log( int delay, uint32_t flags); +extern void health_alarm_log(RRDHOST *host, ALARM_ENTRY *ae); + extern void health_readdir(RRDHOST *host, const char *user_path, const char *stock_path, const char *subpath); extern char *health_user_config_dir(void); extern char *health_stock_config_dir(void); diff --git a/health/health_config.c b/health/health_config.c index 35fde90bc..0d6e77a9e 100644 --- a/health/health_config.c +++ b/health/health_config.c @@ -23,6 +23,7 @@ #define HEALTH_INFO_KEY "info" #define HEALTH_DELAY_KEY "delay" #define HEALTH_OPTIONS_KEY "options" +#define HEALTH_REPEAT_KEY "repeat" static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { if(!rc->chart) { @@ -45,7 +46,7 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); - debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", rc->chart?rc->chart:"NOCHART", rc->name, rc->id, @@ -66,10 +67,12 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { rc->delay_up_duration, rc->delay_down_duration, rc->delay_max_duration, - rc->delay_multiplier + rc->delay_multiplier, + rc->warn_repeat_every, + rc->crit_repeat_every ); - rrdcalc_create_part2(host, rc); + rrdcalc_add_to_host(host, rc); return 1; } @@ -100,7 +103,7 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL } } - debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", rt->name, (rt->context)?rt->context:"NONE", (rt->exec)?rt->exec:"DEFAULT", @@ -120,7 +123,9 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL rt->delay_up_duration, rt->delay_down_duration, rt->delay_max_duration, - rt->delay_multiplier + rt->delay_multiplier, + rt->warn_repeat_every, + rt->crit_repeat_every ); if(likely(last)) { @@ -134,48 +139,6 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL return 1; } -static inline int health_parse_duration(char *string, int *result) { - // make sure it is a number - if(!*string || !(isdigit(*string) || *string == '+' || *string == '-')) { - *result = 0; - return 0; - } - - char *e = NULL; - calculated_number n = str2ld(string, &e); - if(e && *e) { - switch (*e) { - case 'Y': - *result = (int) (n * 86400 * 365); - break; - case 'M': - *result = (int) (n * 86400 * 30); - break; - case 'w': - *result = (int) (n * 86400 * 7); - break; - case 'd': - *result = (int) (n * 86400); - break; - case 'h': - *result = (int) (n * 3600); - break; - case 'm': - *result = (int) (n * 60); - break; - - default: - case 's': - *result = (int) (n); - break; - } - } - else - *result = (int)(n); - - return 1; -} - static inline int health_parse_delay( size_t line, const char *filename, char *string, int *delay_up_duration, @@ -202,14 +165,14 @@ static inline int health_parse_delay( while(*s && isspace(*s)) *s++ = '\0'; if(!strcasecmp(key, "up")) { - if (!health_parse_duration(value, delay_up_duration)) { + if (!config_parse_duration(value, delay_up_duration)) { error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } else given_up = 1; } else if(!strcasecmp(key, "down")) { - if (!health_parse_duration(value, delay_down_duration)) { + if (!config_parse_duration(value, delay_down_duration)) { error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } @@ -224,7 +187,7 @@ static inline int health_parse_delay( else given_multiplier = 1; } else if(!strcasecmp(key, "max")) { - if (!health_parse_duration(value, delay_max_duration)) { + if (!config_parse_duration(value, delay_max_duration)) { error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } @@ -285,6 +248,50 @@ static inline uint32_t health_parse_options(const char *s) { return options; } +static inline int health_parse_repeat( + size_t line, + const char *file, + char *string, + uint32_t *warn_repeat_every, + uint32_t *crit_repeat_every +) { + + char *s = string; + while(*s) { + char *key = s; + + while(*s && !isspace(*s)) s++; + while(*s && isspace(*s)) *s++ = '\0'; + + if(!*key) break; + + char *value = s; + while(*s && !isspace(*s)) s++; + while(*s && isspace(*s)) *s++ = '\0'; + + if(!strcasecmp(key, "off")) { + *warn_repeat_every = 0; + *crit_repeat_every = 0; + return 1; + } + if(!strcasecmp(key, "warning")) { + if (!config_parse_duration(value, (int*)warn_repeat_every)) { + error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", + line, file, value, key); + } + } + else if(!strcasecmp(key, "critical")) { + if (!config_parse_duration(value, (int*)crit_repeat_every)) { + error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", + line, file, value, key); + } + } + } + + return 1; +} + + static inline int health_parse_db_lookup( size_t line, const char *filename, char *string, RRDR_GROUPING *group_method, int *after, int *before, int *every, @@ -322,7 +329,7 @@ static inline int health_parse_db_lookup( while(*s && !isspace(*s)) s++; while(*s && isspace(*s)) *s++ = '\0'; - if(!health_parse_duration(key, after)) { + if(!config_parse_duration(key, after)) { error("Health configuration at line %zu of file '%s': invalid duration '%s' after group method", line, filename, key); return 0; @@ -343,7 +350,7 @@ static inline int health_parse_db_lookup( while(*s && !isspace(*s)) s++; while(*s && isspace(*s)) *s++ = '\0'; - if (!health_parse_duration(value, before)) { + if (!config_parse_duration(value, before)) { error("Health configuration at line %zu of file '%s': invalid duration '%s' for '%s' keyword", line, filename, value, key); } @@ -353,7 +360,7 @@ static inline int health_parse_db_lookup( while(*s && !isspace(*s)) s++; while(*s && isspace(*s)) *s++ = '\0'; - if (!health_parse_duration(value, every)) { + if (!config_parse_duration(value, every)) { error("Health configuration at line %zu of file '%s': invalid duration '%s' for '%s' keyword", line, filename, value, key); } @@ -430,7 +437,8 @@ static int health_readfile(const char *filename, void *data) { hash_info = 0, hash_recipient = 0, hash_delay = 0, - hash_options = 0; + hash_options = 0, + hash_repeat = 0; char buffer[HEALTH_CONF_MAX_LINE + 1]; @@ -454,6 +462,7 @@ static int health_readfile(const char *filename, void *data) { hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY); hash_delay = simple_uhash(HEALTH_DELAY_KEY); hash_options = simple_uhash(HEALTH_OPTIONS_KEY); + hash_repeat = simple_uhash(HEALTH_REPEAT_KEY); } FILE *fp = fopen(filename, "r"); @@ -481,7 +490,7 @@ static int health_readfile(const char *filename, void *data) { if(append < HEALTH_CONF_MAX_LINE) continue; else { - error("Health configuration has too long muli-line at line %zu of file '%s'.", line, filename); + error("Health configuration has too long multi-line at line %zu of file '%s'.", line, filename); } } append = 0; @@ -532,6 +541,9 @@ static int health_readfile(const char *filename, void *data) { rc->value = NAN; rc->old_value = NAN; rc->delay_multiplier = 1.0; + rc->old_status = RRDCALC_STATUS_UNINITIALIZED; + rc->warn_repeat_every = host->health_default_warn_repeat_every; + rc->crit_repeat_every = host->health_default_crit_repeat_every; if(rrdvar_fix_name(rc->name)) error("Health configuration renamed alarm '%s' to '%s'", value, rc->name); @@ -556,6 +568,8 @@ static int health_readfile(const char *filename, void *data) { rt->green = NAN; rt->red = NAN; rt->delay_multiplier = 1.0; + rt->warn_repeat_every = host->health_default_warn_repeat_every; + rt->crit_repeat_every = host->health_default_crit_repeat_every; if(rrdvar_fix_name(rt->name)) error("Health configuration renamed template '%s' to '%s'", value, rt->name); @@ -612,7 +626,7 @@ static int health_readfile(const char *filename, void *data) { &rc->options, &rc->dimensions); } else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) { - if(!health_parse_duration(value, &rc->update_every)) + if(!config_parse_duration(value, &rc->update_every)) error("Health configuration at line %zu of file '%s' for alarm '%s' at key '%s' cannot parse duration: '%s'.", line, filename, rc->name, key, value); } @@ -707,6 +721,11 @@ static int health_readfile(const char *filename, void *data) { else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) { rc->options |= health_parse_options(value); } + else if(hash == hash_repeat && !strcasecmp(key, HEALTH_REPEAT_KEY)){ + health_parse_repeat(line, filename, value, + &rc->warn_repeat_every, + &rc->crit_repeat_every); + } else { error("Health configuration at line %zu of file '%s' for alarm '%s' has unknown key '%s'.", line, filename, rc->name, key); @@ -736,7 +755,7 @@ static int health_readfile(const char *filename, void *data) { &rt->update_every, &rt->options, &rt->dimensions); } else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) { - if(!health_parse_duration(value, &rt->update_every)) + if(!config_parse_duration(value, &rt->update_every)) error("Health configuration at line %zu of file '%s' for template '%s' at key '%s' cannot parse duration: '%s'.", line, filename, rt->name, key, value); } @@ -831,6 +850,11 @@ static int health_readfile(const char *filename, void *data) { else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) { rt->options |= health_parse_options(value); } + else if(hash == hash_repeat && !strcasecmp(key, HEALTH_REPEAT_KEY)){ + health_parse_repeat(line, filename, value, + &rt->warn_repeat_every, + &rt->crit_repeat_every); + } else { error("Health configuration at line %zu of file '%s' for template '%s' has unknown key '%s'.", line, filename, rt->name, key); diff --git a/health/health_json.c b/health/health_json.c index 781132447..e923b05c6 100644 --- a/health/health_json.c +++ b/health/health_json.c @@ -140,6 +140,8 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC "\t\t\t\"delay_multiplier\": %f,\n" "\t\t\t\"delay\": %d,\n" "\t\t\t\"delay_up_to_timestamp\": %lu,\n" + "\t\t\t\"warn_repeat_every\": \"%u\",\n" + "\t\t\t\"crit_repeat_every\": \"%u\",\n" "\t\t\t\"value_string\": \"%s\",\n" , rc->chart, rc->name , (unsigned long)rc->id @@ -165,6 +167,8 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC , rc->delay_multiplier , rc->delay_last , (unsigned long)rc->delay_up_to_timestamp + , rc->warn_repeat_every + , rc->crit_repeat_every , value_string ); diff --git a/health/health_log.c b/health/health_log.c index 009e42673..c91cde6cb 100644 --- a/health/health_log.c +++ b/health/health_log.c @@ -79,6 +79,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { "\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" "\t%d\t%d\t%d\t%d" "\t" CALCULATED_NUMBER_FORMAT_AUTO "\t" CALCULATED_NUMBER_FORMAT_AUTO + "\t%016lx" "\n" , (ae->flags & HEALTH_ENTRY_FLAG_SAVED)?'U':'A' , host->hostname @@ -112,6 +113,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { , ae->new_value , ae->old_value + , (uint64_t)ae->last_repeat ) < 0)) error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.", host->hostname, host->health_log_filename); else { @@ -174,10 +176,40 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena continue; } + // Check if we got last_repeat field + time_t last_repeat = 0; + if(entries > 27) { + char* alarm_name = pointers[13]; + last_repeat = (time_t)strtoul(pointers[27], NULL, 16); + + RRDCALC *rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name)); + if (!rc) { + for(rc = host->alarms; rc ; rc = rc->next) { + RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_name, (avl *)rc); + if(rdcmp != rc) { + error("Cannot insert the alarm index ID using log %s", rc->name); + } + } + + rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name)); + } + + if(unlikely(rc)) { + if (rrdcalc_isrepeating(rc)) { + rc->last_repeat = last_repeat; + // We iterate through repeating alarm entries only to + // find the latest last_repeat timestamp. Otherwise, + // there is no need to keep them in memory. + continue; + } + } + } + if(unlikely(*pointers[0] == 'A')) { // make sure it is properly numbered if(unlikely(host->health_log.alarms && unique_id < host->health_log.alarms->unique_id)) { - error("HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it.", host->hostname, line, filename, unique_id); + error( "HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it." + , host->hostname, line, filename, unique_id); errored++; continue; } @@ -186,11 +218,11 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena } else if(unlikely(*pointers[0] == 'U')) { // find the original - for(ae = host->health_log.alarms; ae; ae = ae->next) { + for(ae = host->health_log.alarms; ae ; ae = ae->next) { if(unlikely(unique_id == ae->unique_id)) { if(unlikely(*pointers[0] == 'A')) { error("HEALTH [%s]: line %zu of file '%s' adds duplicate alarm log entry %u. Using the later." - , host->hostname, line, filename, unique_id); + , host->hostname, line, filename, unique_id); *pointers[0] = 'U'; duplicate++; } @@ -270,6 +302,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena ae->new_value = str2l(pointers[25]); ae->old_value = str2l(pointers[26]); + ae->last_repeat = last_repeat; + char value_string[100 + 1]; freez(ae->old_value_string); freez(ae->new_value_string); @@ -339,7 +373,7 @@ inline void health_alarm_log_load(RRDHOST *host) { // ---------------------------------------------------------------------------- // health alarm log management -inline void health_alarm_log( +inline ALARM_ENTRY* health_create_alarm_entry( RRDHOST *host, uint32_t alarm_id, uint32_t alarm_event_id, @@ -398,9 +432,24 @@ inline void health_alarm_log( ae->delay_up_to_timestamp = when + delay; ae->flags |= flags; + ae->last_repeat = 0; + if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) ae->non_clear_duration += ae->duration; + return ae; +} + +inline void health_alarm_log( + RRDHOST *host, + ALARM_ENTRY *ae +) { + debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id); + + if(unlikely(alarm_entry_isrepeating(host, ae))) { + error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id); + return; + } // link it netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock); ae->next = host->health_log.alarms; diff --git a/health/notifications/README.md b/health/notifications/README.md index 5b7b43406..8c7ab66f7 100644 --- a/health/notifications/README.md +++ b/health/notifications/README.md @@ -58,6 +58,9 @@ export NETDATA_ALARM_NOTIFY_DEBUG=1 # send test alarms to any role /usr/libexec/netdata/plugins.d/alarm-notify.sh test "ROLE" ``` + +Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). You can always find the location of the alarm-notify.sh script in `netdata.conf`. + If you need to dig even deeper, you can trace the execution with `bash -x`. Note that in test mode, alarm-notify.sh calls itself with many more arguments. So first do ```sh bash -x /usr/libexec/netdata/plugins.d/alarm-notify.sh test diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in index ff4b3f3dc..852718bc9 100755 --- a/health/notifications/alarm-notify.sh.in +++ b/health/notifications/alarm-notify.sh.in @@ -189,6 +189,7 @@ fi [ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" [ -z "${NETDATA_CACHE_DIR}" ] && NETDATA_CACHE_DIR="@cachedir_POST@" [ -z "${NETDATA_REGISTRY_URL}" ] && NETDATA_REGISTRY_URL="https://registry.my-netdata.io" +[ -z "${NETDATA_REGISTRY_CLOUD_BASE_URL}" ] && NETDATA_REGISTRY_CLOUD_BASE_URL="https://netdata.cloud" # ----------------------------------------------------------------------------- # parse command line parameters @@ -681,7 +682,7 @@ date=$(date --date=@${when} "${date_format}" 2>/dev/null) # ---------------------------------------------------------------------------- # prepare some extra headers if we've been asked to thread e-mails if [ "${SEND_EMAIL}" == "YES" ] && [ "${EMAIL_THREADING}" != "NO" ]; then - email_thread_headers="In-Reply-To: <${chart}-${name}@${host}>\\nReferences: <${chart}-${name}@${host}>" + email_thread_headers="In-Reply-To: <${chart}-${name}@${host}>\\r\\nReferences: <${chart}-${name}@${host}>" else email_thread_headers= fi @@ -1790,7 +1791,7 @@ if [ "${NETDATA_REGISTRY_URL}" == "https://registry.my-netdata.io" ]; then NETDATA_REGISTRY_UNIQUE_ID="$(cat "@registrydir_POST@/netdata.public.unique.id")" fi fi - if [ ! -z "${NETDATA_REGISTRY_UNIQUE_ID}" ]; then + if [ -n "${NETDATA_REGISTRY_UNIQUE_ID}" ]; then GOTOCLOUD=1 fi fi @@ -1798,7 +1799,7 @@ fi if [ ${GOTOCLOUD} -eq 0 ]; then goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?${redirect_params}" else - goto_url="https://netdata.cloud/alarms/redirect?agentID=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}" + goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentID=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}" fi # the severity of the alarm @@ -1953,7 +1954,7 @@ send_pushbullet "${PUSHBULLET_ACCESS_TOKEN}" "${PUSHBULLET_SOURCE_DEVICE}" "${to Severity: ${severity}\\n Chart: ${chart}\\n Family: ${family}\\n -$(date -d @${when})\\n +${date}\\n The source of this alarm is line ${src}" SENT_PUSHBULLET=$? diff --git a/health/notifications/custom/README.md b/health/notifications/custom/README.md index 627dd9d48..eeaad8a60 100644 --- a/health/notifications/custom/README.md +++ b/health/notifications/custom/README.md @@ -1,11 +1,13 @@ # Custom -Netdata allows you to send custom notifications, to any endpoint you choose. -To configure custom notifications, you will need to define the `custom_sender()` function in `health_alarm_notify.conf` -You can look at the other senders in `/usr/libexec/netdata/plugins.d/alarm-notify.sh` for examples. +Netdata allows you to send custom notifications to any endpoint you choose. + +To configure custom notifications, you will need to customize `health_alarm_notify.conf`. You can look at the other senders in `/usr/libexec/netdata/plugins.d/alarm-notify.sh` for examples of how to modify the `custom_sender()` function in `health_alarm_notify.conf`. Ensure you follow the instructions of changing any configuration file to [persist your configuration](../../../docs/configuration-guide.md#persist-my-configuration). + As with other notifications, you will also need to define the recipient list in `DEFAULT_RECIPIENT_CUSTOM` and/or the `role_recipients_custom` array. -The following is a sample `custom_sender` function to send an SMS via an imaginary HTTPS endpoint to the SMS gateway: +The following is a sample `custom_sender` function in `health_alarm_notify.conf`, to send an SMS via an imaginary HTTPS endpoint to the SMS gateway: + ``` custom_sender() { # example human readable SMS @@ -37,45 +39,45 @@ The following is a sample `custom_sender` function to send an SMS via an imagina Variables available to the custom_sender: - - ${to_custom} the list of recipients for the alarm - - ${host} the host generated this event - - ${url_host} same as ${host} but URL encoded - - ${unique_id} the unique id of this event - - ${alarm_id} the unique id of the alarm that generated this event - - ${event_id} the incremental id of the event, for this alarm id - - ${when} the timestamp this event occurred - - ${name} the name of the alarm, as given in netdata health.d entries - - ${url_name} same as ${name} but URL encoded - - ${chart} the name of the chart (type.id) - - ${url_chart} same as ${chart} but URL encoded - - ${family} the family of the chart - - ${url_family} same as ${family} but URL encoded - - ${status} the current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL - - ${old_status} the previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL - - ${value} the current value of the alarm - - ${old_value} the previous value of the alarm - - ${src} the line number and file the alarm has been configured - - ${duration} the duration in seconds of the previous alarm state - - ${duration_txt} same as ${duration} for humans - - ${non_clear_duration} the total duration in seconds this is/was non-clear - - ${non_clear_duration_txt} same as ${non_clear_duration} for humans - - ${units} the units of the value - - ${info} a short description of the alarm - - ${value_string} friendly value (with units) - - ${old_value_string} friendly old value (with units) - - ${image} the URL of an image to represent the status of the alarm - - ${color} a color in #AABBCC format for the alarm - - ${goto_url} the URL the user can click to see the netdata dashboard - - ${calc_expression} the expression evaluated to provide the value for the alarm - - ${calc_param_values} the value of the variables in the evaluated expression - - ${total_warnings} the total number of alarms in WARNING state on the host - - ${total_critical} the total number of alarms in CRITICAL state on the host + - `${to_custom}` the list of recipients for the alarm + - `${host}` the host generated this event + - `${url_host}` same as `${host}` but URL encoded + - `${unique_id}` the unique id of this event + - `${alarm_id}` the unique id of the alarm that generated this event + - `${event_id}` the incremental id of the event, for this alarm id + - `${when}` the timestamp this event occurred + - `${name}` the name of the alarm, as given in netdata health.d entries + - `${url_name}` same as `${name}` but URL encoded + - `${chart}` the name of the chart (type.id) + - `${url_chart}` same as `${chart}` but URL encoded + - `${family}` the family of the chart + - `${url_family}` same as `${family}` but URL encoded + - `${status}` the current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL + - `${old_status}` the previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL + - `${value}` the current value of the alarm + - `${old_value}` the previous value of the alarm + - `${src}` the line number and file the alarm has been configured + - `${duration}` the duration in seconds of the previous alarm state + - `${duration_txt}` same as `${duration}` for humans + - `${non_clear_duration}` the total duration in seconds this is/was non-clear + - `${non_clear_duration_txt}` same as `${non_clear_duration}` for humans + - `${units}` the units of the value + - `${info}` a short description of the alarm + - `${value_string}` friendly value (with units) + - `${old_value_string}` friendly old value (with units) + - `${image}` the URL of an image to represent the status of the alarm + - `${color}` a color in #AABBCC format for the alarm + - `${goto_url}` the URL the user can click to see the netdata dashboard + - `${calc_expression}` the expression evaluated to provide the value for the alarm + - `${calc_param_values}` the value of the variables in the evaluated expression + - `${total_warnings}` the total number of alarms in WARNING state on the host + - `${total_critical}` the total number of alarms in CRITICAL state on the host The following are more human friendly: - - ${alarm} like "name = value units" - - ${status_message} like "needs attention", "recovered", "is critical" - - ${severity} like "Escalated to CRITICAL", "Recovered from WARNING" - - ${raised_for} like "(alarm was raised for 10 minutes)" + - `${alarm}` like "name = value units" + - `${status_message}` like "needs attention", "recovered", "is critical" + - `${severity}` like "Escalated to CRITICAL", "Recovered from WARNING" + - `${raised_for}` like "(alarm was raised for 10 minutes)" [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fhealth%2Fnotifications%2Fcustom%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/health/notifications/email/README.md b/health/notifications/email/README.md index 163839b6b..84a9e0ce7 100644 --- a/health/notifications/email/README.md +++ b/health/notifications/email/README.md @@ -30,4 +30,6 @@ sudo su -s /bin/bash netdata Where `[ROLE]` is the role you want to test. The default (if you don't give a `[ROLE]`) is `sysadmin`. +Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). You can always find the location of the alarm-notify.sh script in `netdata.conf`. + [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fhealth%2Fnotifications%2Femail%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/libnetdata/Makefile.am b/libnetdata/Makefile.am index d2710f0a3..87f12b32c 100644 --- a/libnetdata/Makefile.am +++ b/libnetdata/Makefile.am @@ -11,6 +11,8 @@ SUBDIRS = \ config \ dictionary \ eval \ + json \ + health \ locks \ log \ popen \ diff --git a/libnetdata/config/appconfig.c b/libnetdata/config/appconfig.c index 9e6a0c02c..65c36c281 100644 --- a/libnetdata/config/appconfig.c +++ b/libnetdata/config/appconfig.c @@ -411,6 +411,27 @@ int appconfig_set_boolean(struct config *root, const char *section, const char * return value; } +int appconfig_get_duration(struct config *root, const char *section, const char *name, const char *value) +{ + int result = 0; + const char *s; + + s = appconfig_get(root, section, name, value); + if(!s) goto fallback; + + if(!config_parse_duration(s, &result)) { + error("config option '[%s].%s = %s' is configured with an valid duration", section, name, s); + goto fallback; + } + + return result; + + fallback: + if(!config_parse_duration(value, &result)) + error("INTERNAL ERROR: default duration supplied for option '[%s].%s = %s' is not a valid duration", section, name, value); + + return result; +} // ---------------------------------------------------------------------------- // config load/save @@ -586,3 +607,65 @@ void appconfig_generate(struct config *root, BUFFER *wb, int only_changed) appconfig_unlock(root); } } + +/** + * Parse Duration + * + * Parse the string setting the result + * + * @param string the timestamp string + * @param result the output variable + * + * @return It returns 1 on success and 0 otherwise + */ +int config_parse_duration(const char* string, int* result) { + while(*string && isspace(*string)) string++; + + if(unlikely(!*string)) goto fallback; + + if(*string == 'n' && !strcmp(string, "never")) { + // this is a valid option + *result = 0; + return 1; + } + + // make sure it is a number + if(!(isdigit(*string) || *string == '+' || *string == '-')) goto fallback; + + char *e = NULL; + calculated_number n = str2ld(string, &e); + if(e && *e) { + switch (*e) { + case 'Y': + *result = (int) (n * 31536000); + break; + case 'M': + *result = (int) (n * 2592000); + break; + case 'w': + *result = (int) (n * 604800); + break; + case 'd': + *result = (int) (n * 86400); + break; + case 'h': + *result = (int) (n * 3600); + break; + case 'm': + *result = (int) (n * 60); + break; + case 's': + default: + *result = (int) (n); + break; + } + } + else + *result = (int)(n); + + return 1; + + fallback: + *result = 0; + return 0; +} diff --git a/libnetdata/config/appconfig.h b/libnetdata/config/appconfig.h index 78099aad4..32e289f9c 100644 --- a/libnetdata/config/appconfig.h +++ b/libnetdata/config/appconfig.h @@ -119,6 +119,7 @@ extern long long appconfig_get_number(struct config *root, const char *section, extern LONG_DOUBLE appconfig_get_float(struct config *root, const char *section, const char *name, LONG_DOUBLE value); extern int appconfig_get_boolean(struct config *root, const char *section, const char *name, int value); extern int appconfig_get_boolean_ondemand(struct config *root, const char *section, const char *name, int value); +extern int appconfig_get_duration(struct config *root, const char *section, const char *name, const char *value); extern const char *appconfig_set(struct config *root, const char *section, const char *name, const char *value); extern const char *appconfig_set_default(struct config *root, const char *section, const char *name, const char *value); @@ -133,4 +134,6 @@ extern void appconfig_generate(struct config *root, BUFFER *wb, int only_changed extern int appconfig_section_compare(void *a, void *b); +extern int config_parse_duration(const char* string, int* result); + #endif /* NETDATA_CONFIG_H */ diff --git a/libnetdata/health/Makefile.am b/libnetdata/health/Makefile.am new file mode 100644 index 000000000..9b7995f17 --- /dev/null +++ b/libnetdata/health/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + + +dist_noinst_DATA = \ + $(NULL) diff --git a/libnetdata/health/health.c b/libnetdata/health/health.c new file mode 100644 index 000000000..b93de8b93 --- /dev/null +++ b/libnetdata/health/health.c @@ -0,0 +1,170 @@ +#include "health.h" + +/** + * Create Silencer + * + * Allocate a new silencer to Netdata. + * + * @return It returns the address off the silencer on success and NULL otherwise + */ +SILENCER *create_silencer(void) { + SILENCER *t = callocz(1, sizeof(SILENCER)); + debug(D_HEALTH, "HEALTH command API: Created empty silencer"); + + return t; +} + +/** + * Health Silencers add + * + * Add more one silencer to the list of silenecers. + * + * @param silencer + */ +void health_silencers_add(SILENCER *silencer) { + // Add the created instance to the linked list in silencers + silencer->next = silencers->silencers; + silencers->silencers = silencer; + debug(D_HEALTH, "HEALTH command API: Added silencer %s:%s:%s:%s:%s", silencer->alarms, + silencer->charts, silencer->contexts, silencer->hosts, silencer->families + ); +} + +/** + * Silencers Add Parameter + * + * Create a new silencer and adjust the variables + * + * @param silencer a pointer to the silencer that will be adjusted + * @param key the key value sent by client + * @param value the value sent to the key + * + * @return It returns the silencer configured on success and NULL otherwise + */ +SILENCER *health_silencers_addparam(SILENCER *silencer, char *key, char *value) { + static uint32_t + hash_alarm = 0, + hash_template = 0, + hash_chart = 0, + hash_context = 0, + hash_host = 0, + hash_families = 0; + + if (unlikely(!hash_alarm)) { + hash_alarm = simple_uhash(HEALTH_ALARM_KEY); + hash_template = simple_uhash(HEALTH_TEMPLATE_KEY); + hash_chart = simple_uhash(HEALTH_CHART_KEY); + hash_context = simple_uhash(HEALTH_CONTEXT_KEY); + hash_host = simple_uhash(HEALTH_HOST_KEY); + hash_families = simple_uhash(HEALTH_FAMILIES_KEY); + } + + uint32_t hash = simple_uhash(key); + if (unlikely(silencer == NULL)) { + if ( + (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) || + (hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) || + (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) || + (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) || + (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) || + (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) + ) { + silencer = create_silencer(); + if(!silencer) { + error("Cannot add a new silencer to Netdata"); + return NULL; + } + } + } + + if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) { + silencer->alarms = strdupz(value); + silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) { + silencer->charts = strdupz(value); + silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) { + silencer->contexts = strdupz(value); + silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) { + silencer->hosts = strdupz(value); + silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) { + silencer->families = strdupz(value); + silencer->families_pattern = simple_pattern_create(silencer->families, NULL, SIMPLE_PATTERN_EXACT); + } + + return silencer; +} + +/** + * JSON Read Callback + * + * Callback called by netdata to create the silencer. + * + * @param e the main json structure + * + * @return It always return 0. + */ +int health_silencers_json_read_callback(JSON_ENTRY *e) +{ + switch(e->type) { + case JSON_OBJECT: +#ifndef ENABLE_JSONC + e->callback_function = health_silencers_json_read_callback; + if(e->name && strcmp(e->name,"")) { + // init silencer + debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name); +#endif + e->callback_data = create_silencer(); + if(e->callback_data) { + health_silencers_add(e->callback_data); + } +#ifndef ENABLE_JSONC + } +#endif + break; + + case JSON_ARRAY: + e->callback_function = health_silencers_json_read_callback; + break; + + case JSON_STRING: + if(!strcmp(e->name,"type")) { + debug(D_HEALTH, "JSON: Processing type=%s",e->data.string); + if (!strcmp(e->data.string,"SILENCE")) silencers->stype = STYPE_SILENCE_NOTIFICATIONS; + else if (!strcmp(e->data.string,"DISABLE")) silencers->stype = STYPE_DISABLE_ALARMS; + } else { + debug(D_HEALTH, "JSON: Adding %s=%s", e->name, e->data.string); + health_silencers_addparam(e->callback_data, e->name, e->data.string); + } + break; + + case JSON_BOOLEAN: + debug(D_HEALTH, "JSON: Processing all_alarms"); + silencers->all_alarms=e->data.boolean?1:0; + break; + + case JSON_NUMBER: + case JSON_NULL: + break; + } + + return 0; +} + +/** + * Initialize Global Silencers + * + * Initialize the silencer for the whole netdata system. + * + * @return It returns 0 on success and -1 otherwise + */ +int health_initialize_global_silencers() { + silencers = mallocz(sizeof(SILENCERS)); + silencers->all_alarms=0; + silencers->stype=STYPE_NONE; + silencers->silencers=NULL; + + return 0; +} \ No newline at end of file diff --git a/libnetdata/health/health.h b/libnetdata/health/health.h new file mode 100644 index 000000000..a3dc0775f --- /dev/null +++ b/libnetdata/health/health.h @@ -0,0 +1,55 @@ +#ifndef NETDATA_HEALTH_LIB +# define NETDATA_HEALTH_LIB 1 + +# include "../libnetdata.h" + +#define HEALTH_ALARM_KEY "alarm" +#define HEALTH_TEMPLATE_KEY "template" +#define HEALTH_CONTEXT_KEY "context" +#define HEALTH_CHART_KEY "chart" +#define HEALTH_HOST_KEY "hosts" +#define HEALTH_OS_KEY "os" +#define HEALTH_FAMILIES_KEY "families" +#define HEALTH_LOOKUP_KEY "lookup" +#define HEALTH_CALC_KEY "calc" + +typedef struct silencer { + char *alarms; + SIMPLE_PATTERN *alarms_pattern; + + char *hosts; + SIMPLE_PATTERN *hosts_pattern; + + char *contexts; + SIMPLE_PATTERN *contexts_pattern; + + char *charts; + SIMPLE_PATTERN *charts_pattern; + + char *families; + SIMPLE_PATTERN *families_pattern; + + struct silencer *next; +} SILENCER; + +typedef enum silence_type { + STYPE_NONE, + STYPE_DISABLE_ALARMS, + STYPE_SILENCE_NOTIFICATIONS +} SILENCE_TYPE; + +typedef struct silencers { + int all_alarms; + SILENCE_TYPE stype; + SILENCER *silencers; +} SILENCERS; + +SILENCERS *silencers; + +extern SILENCER *create_silencer(void); +extern int health_silencers_json_read_callback(JSON_ENTRY *e); +extern void health_silencers_add(SILENCER *silencer); +extern SILENCER * health_silencers_addparam(SILENCER *silencer, char *key, char *value); +extern int health_initialize_global_silencers(); + +#endif diff --git a/libnetdata/json/Makefile.am b/libnetdata/json/Makefile.am new file mode 100644 index 000000000..1cb69ed99 --- /dev/null +++ b/libnetdata/json/Makefile.am @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/json/README.md b/libnetdata/json/README.md new file mode 100644 index 000000000..fd6cb0f31 --- /dev/null +++ b/libnetdata/json/README.md @@ -0,0 +1,5 @@ +# json + +`json` contains a parser for json strings, based on `jsmn` (https://github.com/zserge/jsmn), but case you have installed the JSON-C library, the installation script will prefer it, you can also force its use with `--enable-jsonc` in the compilation time. + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Flibnetdata%2Fjson%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/libnetdata/json/jsmn.c b/libnetdata/json/jsmn.c new file mode 100644 index 000000000..c8d9e73db --- /dev/null +++ b/libnetdata/json/jsmn.c @@ -0,0 +1,326 @@ +#include + +#include "jsmn.h" + +/** + * Alloc token + * + * Allocates a fresh unused token from the token pull. + * + * @param parser the controller + * @param tokens the tokens I am working + * @param num_tokens the number total of tokens. + * + * @return it returns the next token to work. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *tok; + if (parser->toknext >= num_tokens) { + return NULL; + } + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +/** + * Fill Token + * + * Fills token type and boundaries. + * + * @param token the structure to set the values + * @param type is the token type + * @param start is the first position of the value + * @param end is the end of the value + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) { + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/** + * Parse primitive + * + * Fills next available token with JSON primitive. + * + * @param parser is the control structure + * @param js is the json string + * @param type is the token type + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* In strict mode primitive must be followed by "," or "}" or "]" */ + case ':': +#endif + case '\t' : case '\r' : case '\n' : case ' ' : + case ',' : case ']' : case '}' : + goto found; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* In strict mode primitive must be followed by a comma/object/array */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + + found: + if (tokens == NULL) { + parser->pos--; + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; +} + +/** + * Parse string + * + * Fills next token with JSON string. + * + * @param parser is the control structure + * @param js is the json string + * @param len is the js length + * @param tokens is structure with the tokens mapped. + * @param num_tokens is the total number of tokens + * + * @return It returns 0 on success and another integer otherwise + */ +static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + + int start = parser->pos; + + parser->pos++; + + /* Skip starting quote */ + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + if (tokens == NULL) { + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\') { + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': case '/' : case '\\' : case 'b' : + case 'f' : case 'r' : case 'n' : case 't' : + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + parser->pos++; + int i = 0; + for(; i < 4 && js[parser->pos] != '\0'; i++) { + /* If it isn't a hex character we have an error */ + if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ + (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ + (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ + parser->pos = start; + return JSMN_ERROR_INVAL; + } + parser->pos++; + } + parser->pos--; + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/** + * JSMN Parse + * + * Parse JSON string and fill tokens. + * + * @param parser the auxiliar vector used to parser + * @param js the string to parse + * @param len the string length + * @param tokens the place to map the tokens + * @param num_tokens the number of tokens present in the tokens structure. + * + * @return It returns the number of tokens present in the string on success or a negative number otherwise + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens) { + jsmnerr_t r; + int i; + jsmntok_t *token; + int count = 0; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': case '[': + count++; + if (tokens == NULL) { + break; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) + return JSMN_ERROR_NOMEM; + if (parser->toksuper != -1) { + tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + } + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': case ']': + if (tokens == NULL) + break; + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); +#ifdef JSMN_PARENT_LINKS + if (parser->toknext < 1) { + return JSMN_ERROR_INVAL; + } + token = &tokens[parser->toknext - 1]; + for (;;) { + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = parser->pos + 1; + parser->toksuper = token->parent; + break; + } + if (token->parent == -1) { + break; + } + token = &tokens[token->parent]; + } +#else + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) return JSMN_ERROR_INVAL; + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } +#endif + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + case '\t' : case '\r' : case '\n' : case ':' : case ',': case ' ': + break; +#ifdef JSMN_STRICT + /* In strict mode primitives are: numbers and booleans */ + case '-': case '0': case '1' : case '2': case '3' : case '4': + case '5': case '6': case '7' : case '8': case '9': + case 't': case 'f': case 'n' : +#else + /* In non-strict mode every unquoted value is a primitive */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + + return count; +} + +/** + * JSMN Init + * + * Creates a new parser based over a given buffer with an array of tokens + * available. + * + * @param parser is the structure with values to reset + */ +void jsmn_init(jsmn_parser *parser) { + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} \ No newline at end of file diff --git a/libnetdata/json/jsmn.h b/libnetdata/json/jsmn.h new file mode 100644 index 000000000..beff586c6 --- /dev/null +++ b/libnetdata/json/jsmn.h @@ -0,0 +1,75 @@ +#ifndef __JSMN_H_ +#define __JSMN_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +/** + * JSON type identifier. Basic types are: + * o Object + * o Array + * o String + * o Other primitive: number, boolean (true/false) or null + */ +typedef enum { + JSMN_PRIMITIVE = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3 +} jsmntype_t; + +typedef enum { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3, +} jsmnerr_t; + +/** + * JSON token description. + * + * @param type type (object, array, string etc.) + * @param start start position in JSON data string + * @param end end position in JSON data string + */ +typedef struct { + jsmntype_t type; + int start; + int end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; + +/** + * JSON parser. Contains an array of token blocks available. Also stores + * the string being parsed now and current position in that string + */ +typedef struct { + unsigned int pos; /* offset in the JSON string */ + unsigned int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g parent object or array */ +} jsmn_parser; + +/** + * Create JSON parser over an array of tokens + */ +void jsmn_init(jsmn_parser *parser); + +/** + * Run JSON parser. It parses a JSON data string into and array of tokens, each describing + * a single JSON object. + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens); + +#ifdef __cplusplus +} +#endif + +#endif /* __JSMN_H_ */ \ No newline at end of file diff --git a/libnetdata/json/json.c b/libnetdata/json/json.c new file mode 100644 index 000000000..c9ff39b05 --- /dev/null +++ b/libnetdata/json/json.c @@ -0,0 +1,546 @@ +#include "jsmn.h" +#include "../libnetdata.h" +#include "json.h" +#include "libnetdata/libnetdata.h" +#include "../../health/health.h" + +#define JSON_TOKENS 1024 + +int json_tokens = JSON_TOKENS; + +/** + * Json Tokenise + * + * Map the string given inside tokens. + * + * @param js is the string used to create the tokens + * @param len is the string length + * @param count the number of tokens present in the string + * + * @return it returns the json parsed in tokens + */ +#ifdef ENABLE_JSONC +json_object *json_tokenise(char *js) { + if(!js) { + error("JSON: json string is empty."); + return NULL; + } + + json_object *token = json_tokener_parse(js); + if(!token) { + error("JSON: Invalid json string."); + return NULL; + } + + return token; +} +#else +jsmntok_t *json_tokenise(char *js, size_t len, size_t *count) +{ + int n = json_tokens; + if(!js || !len) { + error("JSON: json string is empty."); + return NULL; + } + + jsmn_parser parser; + jsmn_init(&parser); + + jsmntok_t *tokens = mallocz(sizeof(jsmntok_t) * n); + if(!tokens) return NULL; + + int ret = jsmn_parse(&parser, js, len, tokens, n); + while (ret == JSMN_ERROR_NOMEM) { + n *= 2; + jsmntok_t *new = reallocz(tokens, sizeof(jsmntok_t) * n); + if(!new) { + freez(tokens); + return NULL; + } + tokens = new; + ret = jsmn_parse(&parser, js, len, tokens, n); + } + + if (ret == JSMN_ERROR_INVAL) { + error("JSON: Invalid json string."); + freez(tokens); + return NULL; + } + else if (ret == JSMN_ERROR_PART) { + error("JSON: Truncated JSON string."); + freez(tokens); + return NULL; + } + + if(count) *count = (size_t)ret; + + if(json_tokens < n) json_tokens = n; + return tokens; +} +#endif + +/** + * Callback Print + * + * Set callback print case necesary and wrinte an information inside a buffer to write in the log. + * + * @param e a pointer for a structure that has the complete information about json structure. + * + * @return It always return 0 + */ +int json_callback_print(JSON_ENTRY *e) +{ + BUFFER *wb=buffer_create(300); + + buffer_sprintf(wb,"%s = ", e->name); + char txt[50]; + switch(e->type) { + case JSON_OBJECT: + e->callback_function = json_callback_print; + buffer_strcat(wb,"OBJECT"); + break; + + case JSON_ARRAY: + e->callback_function = json_callback_print; + sprintf(txt,"ARRAY[%lu]", e->data.items); + buffer_strcat(wb, txt); + break; + + case JSON_STRING: + buffer_strcat(wb, e->data.string); + break; + + case JSON_NUMBER: + sprintf(txt,"%Lf", e->data.number); + buffer_strcat(wb,txt); + + break; + + case JSON_BOOLEAN: + buffer_strcat(wb, e->data.boolean?"TRUE":"FALSE"); + break; + + case JSON_NULL: + buffer_strcat(wb,"NULL"); + break; + } + info("JSON: %s", buffer_tostring(wb)); + buffer_free(wb); + return 0; +} + +/** + * JSONC Set String + * + * Set the string value of the structure JSON_ENTRY. + * + * @param e the output structure + */ +static inline void json_jsonc_set_string(JSON_ENTRY *e,char *key,const char *value) { + size_t length = strlen(key); + e->type = JSON_STRING; + memcpy(e->name,key,length); + e->name[length] = 0x00; + e->data.string = (char *) value; +} + + +#ifdef ENABLE_JSONC +/** + * JSONC set Boolean + * + * Set the boolean value of the structure JSON_ENTRY + * + * @param e the output structure + * @param value the input value + */ +static inline void json_jsonc_set_boolean(JSON_ENTRY *e,int value) { + e->type = JSON_BOOLEAN; + e->data.boolean = value; +} + +/** + * Parse Array + * + * Parse the array object. + * + * @param ptr the pointer for the object that we will parse. + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + */ +static inline void json_jsonc_parse_array(json_object *ptr, void *callback_data,int (*callback_function)(struct json_entry *)) { + int end = json_object_array_length(ptr); + JSON_ENTRY e; + + if(end) { + int i; + i = 0; + + enum json_type type; + do { + json_object *jvalue = json_object_array_get_idx(ptr, i); + if(jvalue) { + e.callback_data = callback_data; + e.type = JSON_OBJECT; + callback_function(&e); + json_object_object_foreach(jvalue, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val, callback_data, callback_function); + } else if (type == json_type_object) { + json_walk(val,callback_data,callback_function); + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } + } + } + + } while (++i < end); + } +} +#else + +/** + * Walk string + * + * Set JSON_ENTRY to string and map the values from jsmntok_t. + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_string(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + e->type = JSON_STRING; + e->data.string = e->original_string; + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Walk Primitive + * + * Define the data type of the string + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + switch(e->original_string[0]) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': + case '8': case '9': case '-': case '.': + e->type = JSON_NUMBER; + e->data.number = strtold(e->original_string, NULL); + break; + + case 't': case 'T': + e->type = JSON_BOOLEAN; + e->data.boolean = 1; + break; + + case 'f': case 'F': + e->type = JSON_BOOLEAN; + e->data.boolean = 0; + break; + + case 'n': case 'N': + default: + e->type = JSON_NULL; + break; + } + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Array + * + * Measure the array length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the output structure. + * + * @return It returns the array length + */ +size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne = { + .name = "", + .fullname = "", + .callback_data = NULL, + .callback_function = NULL + }; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_ARRAY; + ne.data.items = t[start].size; + ne.callback_function = NULL; + ne.name[0]='\0'; + ne.fullname[0]='\0'; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + ne.pos = i; + if (!e->name || !e->fullname || strlen(e->name) > JSON_NAME_LEN - 24 || strlen(e->fullname) > JSON_FULLNAME_LEN -24) { + info("JSON: JSON walk_array ignoring element with name:%s fullname:%s",e->name, e->fullname); + continue; + } + sprintf(ne.name, "%s[%lu]", e->name, i); + sprintf(ne.fullname, "%s[%lu]", e->fullname, i); + + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + break; + + case JSMN_STRING: + start += json_walk_string(js, t, start, &ne); + break; + } + } + return start - init; +} + +/** + * Object + * + * Measure the Object length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the output structure. + * + * @return It returns the Object length + */ +size_t json_walk_object(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne = { + .name = "", + .fullname = "", + .callback_data = NULL, + .callback_function = NULL + }; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_OBJECT; + ne.callback_function = NULL; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + int key = 1; + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + key = 1; + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_STRING: + default: + if(key) { + int len = t[start].end - t[start].start; + if (unlikely(len>JSON_NAME_LEN)) len=JSON_NAME_LEN; + strncpy(ne.name, &js[t[start].start], len); + ne.name[len] = '\0'; + len=strlen(e->fullname) + strlen(e->fullname[0]?".":"") + strlen(ne.name); + char *c = mallocz((len+1)*sizeof(char)); + sprintf(c,"%s%s%s", e->fullname, e->fullname[0]?".":"", ne.name); + if (unlikely(len>JSON_FULLNAME_LEN)) len=JSON_FULLNAME_LEN; + strncpy(ne.fullname, c, len); + freez(c); + start++; + key = 0; + } + else { + start += json_walk_string(js, t, start, &ne); + key = 1; + } + break; + } + } + return start - init; +} +#endif + +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param t the json object to work + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +#ifdef ENABLE_JSONC +size_t json_walk(json_object *t, void *callback_data, int (*callback_function)(struct json_entry *)) { + JSON_ENTRY e; + + e.callback_data = callback_data; + enum json_type type; + json_object_object_foreach(t, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val,NULL,health_silencers_json_read_callback); + } else if (type == json_type_object) { + e.type = JSON_OBJECT; + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } + } + + return 1; +} +#else +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param js the original string + * @param t the tokens + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +size_t json_walk_tree(char *js, jsmntok_t *t, void *callback_data, int (*callback_function)(struct json_entry *)) +{ + JSON_ENTRY e = { + .name = "", + .fullname = "", + .callback_data = callback_data, + .callback_function = callback_function + }; + + switch (t[0].type) { + case JSMN_OBJECT: + e.type = JSON_OBJECT; + json_walk_object(js, t, 0, 0, &e); + break; + + case JSMN_ARRAY: + e.type = JSON_ARRAY; + json_walk_array(js, t, 0, 0, &e); + break; + + case JSMN_PRIMITIVE: + case JSMN_STRING: + break; + } + + return 1; +} +#endif + +/** + * JSON Parse + * + * Parse the json message with the callback function + * + * @param js the string that the callback function will parse + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return JSON_OK case everything happend as expected, JSON_CANNOT_PARSE case there were errors in the + * parsing procces and JSON_CANNOT_DOWNLOAD case the string given(js) is NULL. + */ +int json_parse(char *js, void *callback_data, int (*callback_function)(JSON_ENTRY *)) +{ + if(js) { +#ifdef ENABLE_JSONC + json_object *tokens = json_tokenise(js); +#else + size_t count; + jsmntok_t *tokens = json_tokenise(js, strlen(js), &count); +#endif + + if(tokens) { +#ifdef ENABLE_JSONC + json_walk(tokens, callback_data, callback_function); + json_object_put(tokens); +#else + json_walk_tree(js, tokens, callback_data, callback_function); + freez(tokens); +#endif + return JSON_OK; + } + + return JSON_CANNOT_PARSE; + } + + return JSON_CANNOT_DOWNLOAD; +} + +/* +int json_test(char *str) +{ + return json_parse(str, NULL, json_callback_print); +} + */ \ No newline at end of file diff --git a/libnetdata/json/json.h b/libnetdata/json/json.h new file mode 100644 index 000000000..79b58b170 --- /dev/null +++ b/libnetdata/json/json.h @@ -0,0 +1,72 @@ +#ifndef CHECKIN_JSON_H +#define CHECKIN_JSON_H 1 + + +#if ENABLE_JSONC +# include +#endif + +#include "jsmn.h" + +//https://www.ibm.com/support/knowledgecenter/en/SS9H2Y_7.6.0/com.ibm.dp.doc/json_parserlimits.html +#define JSON_NAME_LEN 256 +#define JSON_FULLNAME_LEN 1024 + +typedef enum { + JSON_OBJECT = 0, + JSON_ARRAY = 1, + JSON_STRING = 2, + JSON_NUMBER = 3, + JSON_BOOLEAN = 4, + JSON_NULL = 5, +} JSON_ENTRY_TYPE; + +typedef struct json_entry { + JSON_ENTRY_TYPE type; + char name[JSON_NAME_LEN + 1]; + char fullname[JSON_FULLNAME_LEN + 1]; + union { + char *string; // type == JSON_STRING + long double number; // type == JSON_NUMBER + int boolean; // type == JSON_BOOLEAN + size_t items; // type == JSON_ARRAY + } data; + size_t pos; // the position of this item in its parent + + char *original_string; + + void *callback_data; + int (*callback_function)(struct json_entry *); +} JSON_ENTRY; + +// ---------------------------------------------------------------------------- +// public functions + +#define JSON_OK 0 +#define JSON_CANNOT_DOWNLOAD 1 +#define JSON_CANNOT_PARSE 2 + +int json_parse(char *js, void *callback_data, int (*callback_function)(JSON_ENTRY *)); + + +// ---------------------------------------------------------------------------- +// private functions + +#ifdef ENABLE_JSONC +json_object *json_tokenise(char *js); +size_t json_walk(json_object *t, void *callback_data, int (*callback_function)(struct json_entry *)); +#else +jsmntok_t *json_tokenise(char *js, size_t len, size_t *count); +size_t json_walk_tree(char *js, jsmntok_t *t, void *callback_data, int (*callback_function)(struct json_entry *)); +#endif + +size_t json_walk_object(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e); +size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e); +size_t json_walk_string(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e); +size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e); + +int json_callback_print(JSON_ENTRY *e); + + + +#endif \ No newline at end of file diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h index 230dc2442..43dc1e04d 100644 --- a/libnetdata/libnetdata.h +++ b/libnetdata/libnetdata.h @@ -298,6 +298,9 @@ extern char *netdata_configured_host_prefix; #include "clocks/clocks.h" #include "popen/popen.h" #include "simple_pattern/simple_pattern.h" +#ifdef ENABLE_HTTPS +# include "socket/security.h" +#endif #include "socket/socket.h" #include "config/appconfig.h" #include "log/log.h" @@ -307,5 +310,7 @@ extern char *netdata_configured_host_prefix; #include "statistical/statistical.h" #include "adaptive_resortable_list/adaptive_resortable_list.h" #include "url/url.h" +#include "json/json.h" +#include "health/health.h" #endif // NETDATA_LIB_H diff --git a/libnetdata/socket/security.c b/libnetdata/socket/security.c new file mode 100644 index 000000000..dcbd3f650 --- /dev/null +++ b/libnetdata/socket/security.c @@ -0,0 +1,277 @@ +#include "../libnetdata.h" + +#ifdef ENABLE_HTTPS + +SSL_CTX *netdata_opentsdb_ctx=NULL; +SSL_CTX *netdata_client_ctx=NULL; +SSL_CTX *netdata_srv_ctx=NULL; +const char *security_key=NULL; +const char *security_cert=NULL; +int netdata_use_ssl_on_stream = NETDATA_SSL_OPTIONAL; +int netdata_use_ssl_on_http = NETDATA_SSL_FORCE; //We force SSL due safety reasons +int netdata_validate_server = NETDATA_SSL_VALID_CERTIFICATE; + +/** + * Info Callback + * + * Function used as callback for the OpenSSL Library + * + * @param ssl a pointer to the SSL structure of the client + * @param where the variable with the flags set. + * @param ret the return of the caller + */ +static void security_info_callback(const SSL *ssl, int where, int ret) { + (void)ssl; + if (where & SSL_CB_ALERT) { + debug(D_WEB_CLIENT,"SSL INFO CALLBACK %s %s", SSL_alert_type_string(ret), SSL_alert_desc_string_long(ret)); + } +} + +/** + * OpenSSL Library + * + * Starts the openssl library for the Netdata. + */ +void security_openssl_library() +{ +#if OPENSSL_VERSION_NUMBER < 0x10100000L +# if (SSLEAY_VERSION_NUMBER >= 0x0907000L) + OPENSSL_config(NULL); +# endif + +# if OPENSSL_API_COMPAT < 0x10100000L + SSL_load_error_strings(); +# endif + + SSL_library_init(); +#else + if (OPENSSL_init_ssl(OPENSSL_INIT_LOAD_CONFIG, NULL) != 1) { + error("SSL library cannot be initialized."); + } +#endif +} + +/** + * OpenSSL common options + * + * Clients and SERVER have common options, this function is responsible to set them in the context. + * + * @param ctx + */ +void security_openssl_common_options(SSL_CTX *ctx) { +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + static char *ciphers = {"ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!aECDH:!EDH-DSS-DES-CBC3-SHA:!EDH-RSA-DES-CBC3-SHA:!KRB5-DES-CBC3-SHA"}; +#endif +#if OPENSSL_VERSION_NUMBER < 0x10100000L + SSL_CTX_set_options (ctx,SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); +#else + SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION); + //We are avoiding the TLS v1.3 for while, because Google Chrome + //is giving the message net::ERR_SSL_VERSION_INTERFERENCE with it. + SSL_CTX_set_max_proto_version(ctx, TLS1_2_VERSION); +#endif + SSL_CTX_set_mode(ctx, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); + +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + if (!SSL_CTX_set_cipher_list(ctx, ciphers)) { + error("SSL error. cannot set the cipher list"); + } +#endif +} + +/** + * Initialize Openssl Client + * + * Starts the client context with TLS 1.2. + * + * @return It returns the context on success or NULL otherwise + */ +static SSL_CTX * security_initialize_openssl_client() { + SSL_CTX *ctx; +#if OPENSSL_VERSION_NUMBER < 0x10100000L + ctx = SSL_CTX_new(SSLv23_client_method()); +#else + ctx = SSL_CTX_new(TLS_client_method()); +#endif + if(ctx) { + security_openssl_common_options(ctx); + } + + return ctx; +} + +/** + * Initialize OpenSSL server + * + * Starts the server context with TLS 1.2 and load the certificate. + * + * @return It returns the context on success or NULL otherwise + */ +static SSL_CTX * security_initialize_openssl_server() { + SSL_CTX *ctx; + char lerror[512]; + static int netdata_id_context = 1; + + //TO DO: Confirm the necessity to check return for other OPENSSL function +#if OPENSSL_VERSION_NUMBER < 0x10100000L + ctx = SSL_CTX_new(SSLv23_server_method()); + if (!ctx) { + error("Cannot create a new SSL context, netdata won't encrypt communication"); + return NULL; + } + + SSL_CTX_use_certificate_file(ctx, security_cert, SSL_FILETYPE_PEM); +#else + ctx = SSL_CTX_new(TLS_server_method()); + if (!ctx) { + error("Cannot create a new SSL context, netdata won't encrypt communication"); + return NULL; + } + + SSL_CTX_use_certificate_chain_file(ctx, security_cert); +#endif + security_openssl_common_options(ctx); + + SSL_CTX_use_PrivateKey_file(ctx,security_key,SSL_FILETYPE_PEM); + + if (!SSL_CTX_check_private_key(ctx)) { + ERR_error_string_n(ERR_get_error(),lerror,sizeof(lerror)); + error("SSL cannot check the private key: %s",lerror); + SSL_CTX_free(ctx); + return NULL; + } + + SSL_CTX_set_session_id_context(ctx,(void*)&netdata_id_context,(unsigned int)sizeof(netdata_id_context)); + SSL_CTX_set_info_callback(ctx,security_info_callback); + +#if (OPENSSL_VERSION_NUMBER < 0x00905100L) + SSL_CTX_set_verify_depth(ctx,1); +#endif + debug(D_WEB_CLIENT,"SSL GLOBAL CONTEXT STARTED\n"); + + return ctx; +} + +/** + * Start SSL + * + * Call the correct function to start the SSL context. + * + * @param selector informs the context that must be initialized, the following list has the valid values: + * NETDATA_SSL_CONTEXT_SERVER - the server context + * NETDATA_SSL_CONTEXT_STREAMING - Starts the streaming context. + * NETDATA_SSL_CONTEXT_OPENTSDB - Starts the OpenTSDB contextv + */ +void security_start_ssl(int selector) { + switch (selector) { + case NETDATA_SSL_CONTEXT_SERVER: { + struct stat statbuf; + if (stat(security_key,&statbuf) || stat(security_cert,&statbuf)) { + info("To use encryption it is necessary to set \"ssl certificate\" and \"ssl key\" in [web] !\n"); + return; + } + + netdata_srv_ctx = security_initialize_openssl_server(); + break; + } + case NETDATA_SSL_CONTEXT_STREAMING: { + netdata_client_ctx = security_initialize_openssl_client(); + break; + } + case NETDATA_SSL_CONTEXT_OPENTSDB: { + netdata_opentsdb_ctx = security_initialize_openssl_client(); + break; + } + } +} + +void security_clean_openssl() { + if (netdata_srv_ctx) + { + SSL_CTX_free(netdata_srv_ctx); + } + + if (netdata_client_ctx) + { + SSL_CTX_free(netdata_client_ctx); + } + + if ( netdata_opentsdb_ctx ) + { + SSL_CTX_free(netdata_opentsdb_ctx); + } + +#if OPENSSL_VERSION_NUMBER < 0x10100000L + ERR_free_strings(); +#endif +} + +int security_process_accept(SSL *ssl,int msg) { + int sock = SSL_get_fd(ssl); + int test; + if (msg > 0x17) + { + return NETDATA_SSL_NO_HANDSHAKE; + } + + ERR_clear_error(); + if ((test = SSL_accept(ssl)) <= 0) { + int sslerrno = SSL_get_error(ssl, test); + switch(sslerrno) { + case SSL_ERROR_WANT_READ: + { + error("SSL handshake did not finish and it wanna read on socket %d!", sock); + return NETDATA_SSL_WANT_READ; + } + case SSL_ERROR_WANT_WRITE: + { + error("SSL handshake did not finish and it wanna read on socket %d!", sock); + return NETDATA_SSL_WANT_WRITE; + } + case SSL_ERROR_NONE: + case SSL_ERROR_SSL: + case SSL_ERROR_SYSCALL: + default: + { + u_long err; + char buf[256]; + int counter = 0; + while ((err = ERR_get_error()) != 0) { + ERR_error_string_n(err, buf, sizeof(buf)); + info("%d SSL Handshake error (%s) on socket %d ", counter++, ERR_error_string((long)SSL_get_error(ssl, test), NULL), sock); + } + return NETDATA_SSL_NO_HANDSHAKE; + } + } + } + + if (SSL_is_init_finished(ssl)) + { + debug(D_WEB_CLIENT_ACCESS,"SSL Handshake finished %s errno %d on socket fd %d", ERR_error_string((long)SSL_get_error(ssl, test), NULL), errno, sock); + } + + return 0; +} + +int security_test_certificate(SSL *ssl) { + X509* cert = SSL_get_peer_certificate(ssl); + int ret; + long status; + if (!cert) { + return -1; + } + + status = SSL_get_verify_result(ssl); + if((X509_V_OK != status)) + { + char error[512]; + ERR_error_string_n(ERR_get_error(), error, sizeof(error)); + error("SSL RFC4158 check: We have a invalid certificate, the tests result with %ld and message %s", status, error); + ret = -1; + } else { + ret = 0; + } + return ret; +} + +#endif diff --git a/libnetdata/socket/security.h b/libnetdata/socket/security.h new file mode 100644 index 000000000..8beb9672f --- /dev/null +++ b/libnetdata/socket/security.h @@ -0,0 +1,47 @@ +#ifndef NETDATA_SECURITY_H +# define NETDATA_SECURITY_H + +# define NETDATA_SSL_HANDSHAKE_COMPLETE 0 //All the steps were successful +# define NETDATA_SSL_START 1 //Starting handshake, conn variable is NULL +# define NETDATA_SSL_WANT_READ 2 //The connection wanna read from socket +# define NETDATA_SSL_WANT_WRITE 4 //The connection wanna write on socket +# define NETDATA_SSL_NO_HANDSHAKE 8 //Continue without encrypt connection. +# define NETDATA_SSL_OPTIONAL 16 //Flag to define the HTTP request +# define NETDATA_SSL_FORCE 32 //We only accepts HTTPS request +# define NETDATA_SSL_INVALID_CERTIFICATE 64 //Accepts invalid certificate +# define NETDATA_SSL_VALID_CERTIFICATE 128 //Accepts invalid certificate + +#define NETDATA_SSL_CONTEXT_SERVER 0 +#define NETDATA_SSL_CONTEXT_STREAMING 1 +#define NETDATA_SSL_CONTEXT_OPENTSDB 2 + +# ifdef ENABLE_HTTPS + +# include +# include +# if (SSLEAY_VERSION_NUMBER >= 0x0907000L) && (OPENSSL_VERSION_NUMBER < 0x10100000L) +# include +# endif + +struct netdata_ssl{ + SSL *conn; //SSL connection + int flags; +}; + +extern SSL_CTX *netdata_opentsdb_ctx; +extern SSL_CTX *netdata_client_ctx; +extern SSL_CTX *netdata_srv_ctx; +extern const char *security_key; +extern const char *security_cert; +extern int netdata_use_ssl_on_stream; +extern int netdata_use_ssl_on_http; +extern int netdata_validate_server; + +void security_openssl_library(); +void security_clean_openssl(); +void security_start_ssl(int selector); +int security_process_accept(SSL *ssl,int msg); +int security_test_certificate(SSL *ssl); + +# endif //ENABLE_HTTPS +#endif //NETDATA_SECURITY_H diff --git a/libnetdata/socket/socket.c b/libnetdata/socket/socket.c index bf9c60ea1..282710081 100644 --- a/libnetdata/socket/socket.c +++ b/libnetdata/socket/socket.c @@ -301,14 +301,39 @@ void listen_sockets_close(LISTEN_SOCKETS *sockets) { sockets->failed = 0; } +WEB_CLIENT_ACL socket_ssl_acl(char *ssl) { +#ifdef ENABLE_HTTPS + if (!strcmp(ssl,"optional")) { + netdata_use_ssl_on_http = NETDATA_SSL_OPTIONAL; + return WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING; + } + else if (!strcmp(ssl,"force")) { + netdata_use_ssl_on_stream = NETDATA_SSL_FORCE; + return WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING; + } +#endif + + return WEB_CLIENT_ACL_NONE; +} + WEB_CLIENT_ACL read_acl(char *st) { + char *ssl = strchr(st,'^'); + if (ssl) { + ssl++; + if (!strncmp("SSL=",ssl,4)) { + ssl += 4; + } + socket_ssl_acl(ssl); + } + if (!strcmp(st,"dashboard")) return WEB_CLIENT_ACL_DASHBOARD; if (!strcmp(st,"registry")) return WEB_CLIENT_ACL_REGISTRY; if (!strcmp(st,"badges")) return WEB_CLIENT_ACL_BADGE; if (!strcmp(st,"management")) return WEB_CLIENT_ACL_MGMT; if (!strcmp(st,"streaming")) return WEB_CLIENT_ACL_STREAMING; if (!strcmp(st,"netdata.conf")) return WEB_CLIENT_ACL_NETDATACONF; - return WEB_CLIENT_ACL_NONE; + + return socket_ssl_acl(st); } static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, uint16_t default_port, int listen_backlog) { @@ -794,11 +819,15 @@ int connect_to_one_of(const char *destination, int default_port, struct timeval while(*s) { const char *e = s; + // skip path, moving both s(tart) and e(nd) + if(*e == '/') + while(!isspace(*e) && *e != ',') s = ++e; + // skip separators, moving both s(tart) and e(nd) while(isspace(*e) || *e == ',') s = ++e; // move e(nd) to the first separator - while(*e && !isspace(*e) && *e != ',') e++; + while(*e && !isspace(*e) && *e != ',' && *e != '/') e++; // is there anything? if(!*s || s == e) break; @@ -824,7 +853,12 @@ int connect_to_one_of(const char *destination, int default_port, struct timeval // -------------------------------------------------------------------------------------------------------------------- // helpers to send/receive data in one call, in blocking mode, with a timeout +#ifdef ENABLE_HTTPS +ssize_t recv_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout) { +#else ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) { +#endif + for(;;) { struct pollfd fd = { .fd = sockfd, @@ -852,10 +886,22 @@ ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) if(fd.events & POLLIN) break; } +#ifdef ENABLE_HTTPS + if (ssl->conn) { + if (!ssl->flags) { + return SSL_read(ssl->conn,buf,len); + } + } +#endif return recv(sockfd, buf, len, flags); } +#ifdef ENABLE_HTTPS +ssize_t send_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout) { +#else ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) { +#endif + for(;;) { struct pollfd fd = { .fd = sockfd, @@ -883,6 +929,13 @@ ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) if(fd.events & POLLOUT) break; } +#ifdef ENABLE_HTTPS + if(ssl->conn) { + if (!ssl->flags) { + return SSL_write(ssl->conn, buf, len); + } + } +#endif return send(sockfd, buf, len, flags); } @@ -1291,6 +1344,8 @@ static void poll_events_process(POLLJOB *p, POLLINFO *pi, struct pollfd *pf, sho do { char client_ip[NI_MAXHOST + 1]; char client_port[NI_MAXSERV + 1]; + client_ip[0] = 0x00; + client_port[0] = 0x00; debug(D_POLLFD, "POLLFD: LISTENER: calling accept4() slot %zu (fd %d)", i, fd); nfd = accept_socket(fd, SOCK_NONBLOCK, client_ip, NI_MAXHOST + 1, client_port, NI_MAXSERV + 1, p->access_list); diff --git a/libnetdata/socket/socket.h b/libnetdata/socket/socket.h index c69d4897f..9ea83bcc0 100644 --- a/libnetdata/socket/socket.h +++ b/libnetdata/socket/socket.h @@ -51,8 +51,13 @@ extern void listen_sockets_close(LISTEN_SOCKETS *sockets); extern int connect_to_this(const char *definition, int default_port, struct timeval *timeout); extern int connect_to_one_of(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size); +#ifdef ENABLE_HTTPS +extern ssize_t recv_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +extern ssize_t send_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +#else extern ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); extern ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); +#endif extern int sock_setnonblock(int fd); extern int sock_delnonblock(int fd); diff --git a/netdata-installer.sh b/netdata-installer.sh index 4ed4050fd..a0c3f828a 100755 --- a/netdata-installer.sh +++ b/netdata-installer.sh @@ -44,15 +44,21 @@ else source "${NETDATA_SOURCE_DIR}/packaging/installer/functions.sh" || exit 1 fi -download() { +download_go() { url="${1}" dest="${2}" + if command -v curl >/dev/null 2>&1; then - run curl -sSL --connect-timeout 10 --retry 3 "${url}" >"${dest}" || fatal "Cannot download ${url}" + run curl -sSL --connect-timeout 10 --retry 3 "${url}" > "${dest}" elif command -v wget >/dev/null 2>&1; then - run wget -T 15 -O - "${url}" >"${dest}" || fatal "Cannot download ${url}" + run wget -T 15 -O - "${url}" > "${dest}" else - fatal "I need curl or wget to proceed, but neither is available on this system." + echo >&2 + echo >&2 "Downloading go.d plugin from '${url}' failed because of missing mandatory packages." + echo >&2 "Either add packages or disable it by issuing '--disable-go' in the installer" + echo >&2 + + run_failed "I need curl or wget to proceed, but neither is available on this system." fi } @@ -159,6 +165,9 @@ USAGE: ${PROGRAM} [options] --enable-backend-kinesis Enable AWS Kinesis backend. Default: enable it when libaws_cpp_sdk_kinesis and libraries it depends on are available. --disable-backend-kinesis + --enable-backend-prometheus-remote-write Enable Prometheus remote write backend. Default: enable it when libprotobuf and + libsnappy are available. + --disable-backend-prometheus-remote-write --enable-lto Enable Link-Time-Optimization. Default: enabled --disable-lto --disable-x86-sse Disable SSE instructions. By default SSE optimizations are enabled. @@ -204,8 +213,10 @@ while [ -n "${1}" ]; do "--disable-plugin-nfacct") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-plugin-nfacct/} --disable-plugin-nfacct";; "--enable-plugin-xenstat") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--enable-plugin-xenstat/} --enable-plugin-xenstat";; "--disable-plugin-xenstat") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-plugin-xenstat/} --disable-plugin-xenstat";; - "--enable-backend-kinesis") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--enable-backend-kinesis/} --enable-backend-kinesis";; - "--disable-backend-kinesis") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-backend-kinesis/} --disable-backend-kinesis";; + "--enable-backend-kinesis") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--enable-backend-kinesis/} --enable-backend-kinesis";; + "--disable-backend-kinesis") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-backend-kinesis/} --disable-backend-kinesis";; + "--enable-backend-prometheus-remote-write") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--enable-backend-prometheus-remote-write/} --enable-backend-prometheus-remote-write";; + "--disable-backend-prometheus-remote-write") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-backend-prometheus-remote-write/} --disable-backend-prometheus-remote-write";; "--enable-lto") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--enable-lto/} --enable-lto";; "--disable-lto") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-lto/} --disable-lto";; "--disable-x86-sse") NETDATA_CONFIGURE_OPTIONS="${NETDATA_CONFIGURE_OPTIONS//--disable-x86-sse/} --disable-x86-sse";; @@ -383,6 +394,7 @@ run ./configure \ --prefix="${NETDATA_PREFIX}/usr" \ --sysconfdir="${NETDATA_PREFIX}/etc" \ --localstatedir="${NETDATA_PREFIX}/var" \ + --libexecdir="${NETDATA_PREFIX}/usr/libexec" \ --with-zlib \ --with-math \ --with-user=netdata \ @@ -540,6 +552,7 @@ progress "Install logrotate configuration for netdata" install_netdata_logrotate + # ----------------------------------------------------------------------------- progress "Read installation options from netdata.conf" @@ -628,7 +641,7 @@ fi # --- conf dir ---- -for x in "python.d" "charts.d" "node.d" "health.d" "statsd.d" "go.d"; do +for x in "python.d" "charts.d" "node.d" "health.d" "statsd.d" "go.d" "custom-plugins.d" "ssl"; do if [ ! -d "${NETDATA_USER_CONFIG_DIR}/${x}" ]; then echo >&2 "Creating directory '${NETDATA_USER_CONFIG_DIR}/${x}'" run mkdir -p "${NETDATA_USER_CONFIG_DIR}/${x}" || exit 1 @@ -723,15 +736,20 @@ if [ "${UID}" -eq 0 ]; then run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/nfacct.plugin" fi - if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" ]; then - run chown root:${NETDATA_GROUP} "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" - run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" - fi + if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" ]; then + run chown root:${NETDATA_GROUP} "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" + run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/xenstat.plugin" + fi - if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" ]; then - run chown root:${NETDATA_GROUP} "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" - run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" - fi + if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/perf.plugin" ]; then + run chown root:${NETDATA_GROUP} "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/perf.plugin" + run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/perf.plugin" + fi + + if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" ]; then + run chown root:${NETDATA_GROUP} "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" + run chmod 4750 "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ioping" + fi if [ -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/cgroup-network" ]; then run chown "root:${NETDATA_GROUP}" "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/cgroup-network" @@ -755,7 +773,7 @@ fi install_go() { # When updating this value, ensure correct checksums in packaging/go.d.checksums - GO_PACKAGE_VERSION="v0.5.0" + GO_PACKAGE_VERSION="v0.7.0" ARCH_MAP=( 'i386::386' 'i686::386' @@ -775,24 +793,37 @@ install_go() { for index in "${ARCH_MAP[@]}" ; do KEY="${index%%::*}" VALUE="${index##*::}" - if [ "$KEY" == "$ARCH" ]; then + if [ "$KEY" = "$ARCH" ]; then ARCH="${VALUE}" break fi done tmp=$(mktemp -d /tmp/netdata-go-XXXXXX) - GO_PACKAGE_BASENAME="go.d.plugin-$GO_PACKAGE_VERSION.$OS-$ARCH" + GO_PACKAGE_BASENAME="go.d.plugin-${GO_PACKAGE_VERSION}.${OS}-${ARCH}" + + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/${GO_PACKAGE_BASENAME}" "${tmp}/${GO_PACKAGE_BASENAME}" - download "https://github.com/netdata/go.d.plugin/releases/download/$GO_PACKAGE_VERSION/$GO_PACKAGE_BASENAME" "${tmp}/$GO_PACKAGE_BASENAME" + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/config.tar.gz" "${tmp}/config.tar.gz" + + if [ ! -f "${tmp}/${GO_PACKAGE_BASENAME}" ] || [ ! -f "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/${GO_PACKAGE_BASENAME}" ]; then + run_failed "go.d plugin download failed, go.d plugin will not be available" + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + return 0 + fi - download "https://github.com/netdata/go.d.plugin/releases/download/$GO_PACKAGE_VERSION/config.tar.gz" "${tmp}/config.tar.gz" grep "${GO_PACKAGE_BASENAME}\$" "${INSTALLER_DIR}/packaging/go.d.checksums" > "${tmp}/sha256sums.txt" 2>/dev/null grep "config.tar.gz" "${INSTALLER_DIR}/packaging/go.d.checksums" >> "${tmp}/sha256sums.txt" 2>/dev/null # Checksum validation if ! (cd "${tmp}" && safe_sha256sum -c "sha256sums.txt"); then + + echo >&2 "go.d plugin checksum validation failure." + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + run_failed "go.d.plugin package files checksum validation failed." - return 1 + return 0 fi # Install new files @@ -811,6 +842,16 @@ install_go() { } install_go +# ----------------------------------------------------------------------------- +progress "Telemetry configuration" + +# Opt-out from telemetry program +if [ -n "${NETDATA_DISABLE_TELEMETRY+x}" ]; then + run touch "${NETDATA_USER_CONFIG_DIR}/.opt-out-from-anonymous-statistics" +else + printf "You can opt out from anonymous statistics via the --disable-telemetry option, or by creating an empty file ${NETDATA_USER_CONFIG_DIR}/.opt-out-from-anonymous-statistics \n\n" +fi + # ----------------------------------------------------------------------------- progress "Install netdata at system init" @@ -818,6 +859,7 @@ NETDATA_START_CMD="${NETDATA_PREFIX}/usr/sbin/netdata" if grep -q docker /proc/1/cgroup >/dev/null 2>&1; then echo >&2 "We are running within a docker container, will not be installing netdata service" + echo >&2 else install_netdata_service || run_failed "Cannot install netdata init service." fi @@ -839,7 +881,7 @@ else create_netdata_conf "${NETDATA_PREFIX}/etc/netdata/netdata.conf" "http://localhost:${NETDATA_PORT}/netdata.conf" fi if [ "${UID}" -eq 0 ]; then - run chown "${NETDATA_USER}" "${NETDATA_PREFIX}/etc/netdata/netdata.conf" + run chown "${NETDATA_USER}" "${NETDATA_PREFIX}/etc/netdata/netdata.conf" fi run chmod 0664 "${NETDATA_PREFIX}/etc/netdata/netdata.conf" @@ -1035,10 +1077,6 @@ RELEASE_CHANNEL="${RELEASE_CHANNEL}" NETDATA_TARBALL_CHECKSUM="new_installation" EOF -# Opt-out from telemetry program -if [ -n "${NETDATA_DISABLE_TELEMETRY+x}" ]; then - touch "${NETDATA_USER_CONFIG_DIR}/.opt-out-from-anonymous-statistics" -fi # ----------------------------------------------------------------------------- echo >&2 diff --git a/netdata.spec.in b/netdata.spec.in index e201f4a19..25b5f9a46 100644 --- a/netdata.spec.in +++ b/netdata.spec.in @@ -6,11 +6,15 @@ # error. %global __os_install_post %{nil} +# Mitigate the cross-distro mayhem by strictly defining the libexec destination +%define _prefix /usr +%define _sysconfdir /etc +%define _localstatedir /var +%define _libexecdir /usr/libexec + # # Conditional build: %bcond_without systemd # systemd -%bcond_with nfacct # build with nfacct plugin -%bcond_with freeipmi # build with freeipmi plugin %bcond_with netns # build with netns support (cgroup-network) %if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1140 @@ -27,8 +31,12 @@ BuildRequires: systemd-rpm-macros \ %global netdata_initd_requires \ %{?systemd_requires} \ %{nil} -%global netdata_init_post %service_add_post netdata.service -%global netdata_init_preun %service_del_preun netdata.service +%global netdata_init_post %service_add_post netdata.service \ +/sbin/service netdata restart > /dev/null 2>&1 \ +%{nil} +%global netdata_init_preun %service_del_preun netdata.service \ +/sbin/service netdata stop > /dev/null 2>&1 \ +%{nil} %global netdata_init_postun %service_del_postun netdata.service %else %global netdata_initd_buildrequires \ @@ -38,7 +46,11 @@ Requires(preun): systemd-units \ Requires(postun): systemd-units \ Requires(post): systemd-units \ %{nil} -%global netdata_init_post %systemd_post netdata.service +%global netdata_init_post %systemd_post netdata.service \ +/usr/bin/systemctl enable netdata.service \ +/usr/bin/systemctl daemon-reload \ +/usr/bin/systemctl restart netdata.service \ +%{nil} %global netdata_init_preun %systemd_preun netdata.service %global netdata_init_postun %systemd_postun_with_restart netdata.service %endif @@ -49,6 +61,7 @@ Requires(post): chkconfig \ %{nil} %global netdata_init_post \ /sbin/chkconfig --add netdata \ +/sbin/service netdata restart > /dev/null 2>&1 \ %{nil} %global netdata_init_preun %{nil} \ if [ $1 = 0 ]; then \ @@ -63,23 +76,7 @@ fi \ %{nil} %endif -%if 0%{?_fedora} -%global netdata_recommends \ -Recommends: curl \ -Recommends: iproute-tc \ -Recommends: lm_sensors \ -Recommends: nmap-ncat \ -Recommends: nodejs \ -Recommends: python \ -Recommends: PyYAML \ -Recommends: python2-PyMySQL \ -Recommends: python2-psycopg2 \ -%{nil} -%else -%global netdata_recommends %{nil} -%endif - -Summary: Real-time performance monitoring, done right +Summary: Real-time performance monitoring, done right! Name: netdata Version: @PACKAGE_VERSION@ Release: 1%{?dist} @@ -87,43 +84,122 @@ License: GPLv3+ Group: Applications/System Source0: https://github.com/netdata/%{name}/releases/download/%{version}/%{name}-%{version}.tar.gz URL: http://my-netdata.io -BuildRequires: pkgconfig -BuildRequires: xz -BuildRequires: zlib-devel -BuildRequires: libuuid-devel -BuildRequires: autoconf -BuildRequires: automake -Requires: zlib -Requires: libuuid - -# Packages can be found in the EPEL repo -%if %{with nfacct} -BuildRequires: libmnl-devel -BuildRequires: libnetfilter_acct-devel -Requires: libmnl -Requires: libnetfilter_acct + +# ##################################################################### +# Core build/install/runtime dependencies +# ##################################################################### + +# Build dependencies +# +BuildRequires: gcc +BuildRequires: make +BuildRequires: git +BuildRequires: autoconf +%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1140 +BuildRequires: autoconf-archive +BuildRequires: autogen +%endif +BuildRequires: automake +BuildRequires: pkgconfig +BuildRequires: curl +BuildRequires: findutils +BuildRequires: zlib-devel +BuildRequires: libuuid-devel +BuildRequires: libuv-devel >= 1 +BuildRequires: openssl-devel +%if 0%{?suse_version} +BuildRequires: judy-devel +BuildRequires: liblz4-devel +BuildRequires: netcat-openbsd +BuildRequires: json-glib-devel +%else +BuildRequires: Judy-devel +BuildRequires: lz4-devel +BuildRequires: nc +BuildRequires: json-c-devel %endif -%if %{with freeipmi} -BuildRequires: freeipmi-devel -Requires: freeipmi +# Core build requirements for service install +%{netdata_initd_buildrequires} + +# Runtime dependencies +# +Requires: python +Requires: zlib +%if 0%{?suse_version} +# for libuv, Requires version >= 1 +Requires: libuv1 +Requires: libJudy1 +Requires: json-glib +Requires: libuuid1 +%else +# for libuv, Requires version >= 1 +Requires: libuv >= 1 +Requires: Judy +Requires: json-c +Requires: libuuid %endif +Requires: openssl +Requires: lz4 +# Core requirements for the install to succeed Requires(pre): /usr/sbin/groupadd Requires(pre): /usr/sbin/useradd +%if 0%{?suse_version} >= 1140 +Requires(post): libcap1 +%else Requires(post): libcap +%endif -%{netdata_initd_buildrequires} -%{netdata_recommends} %{netdata_initd_requires} +# ##################################################################### +# Functionality-dependent package dependencies +# ##################################################################### +# Note: Some or all of the Packages may be found in the EPEL repo, +# rather than the standard ones + +# nfacct plugin dependencies +BuildRequires: libmnl-devel +%if 0%{?fedora} || 0%{?suse_version} >= 1140 +BuildRequires: libnetfilter_acct-devel +%endif + +%if 0%{?suse_version} +Requires: libmnl0 +%else +Requires: libmnl +%endif + +%if 0%{?fedora} +Requires: libnetfilter_acct +%else +%if 0%{?suse_version} >= 1140 +Requires: libnetfilter_acct1 +%endif +%endif +# end nfacct plugin dependencies + +# freeipmi plugin dependencies +BuildRequires: freeipmi-devel +Requires: freeipmi +# end - freeipmi plugin dependencies + +# CUPS plugin dependencies +BuildRequires: cups-devel +Requires: cups +# end - cups plugin dependencies + +# ##################################################################### +# End of dependency management configuration +# ##################################################################### + %description -netdata is the fastest way to visualize metrics. It is a resource + netdata is the fastest way to visualize metrics. It is a resource efficient, highly optimized system for collecting and visualizing any type of realtime timeseries data, from CPU usage, disk activity, SQL queries, API calls, web site visitors, etc. - -netdata tries to visualize the truth of now, in its greatest detail, + netdata tries to visualize the truth of now, in its greatest detail, so that you can get insights of what is happening now and what just happened, on your systems and applications. @@ -131,25 +207,57 @@ happened, on your systems and applications. %setup -q -n %{name}-%{version} %build -autoreconf -i +# Conf step +autoreconf -ivf %configure \ + --prefix="%{_prefix}" \ + --sysconfdir="%{_sysconfdir}" \ + --localstatedir="%{_localstatedir}" \ + --libexecdir="%{_libexecdir}" \ --with-zlib \ --with-math \ - %{?with_nfacct:--enable-plugin-nfacct} \ - %{?with_freeipmi:--enable-plugin-freeipmi} \ - --with-user=netdata + --with-user=netdata \ + +# Build step %{__make} %{?_smp_mflags} %install + +# ########################################################### +# Clear the directory, if already exists and install rm -rf "${RPM_BUILD_ROOT}" %{__make} %{?_smp_mflags} DESTDIR="${RPM_BUILD_ROOT}" install find "${RPM_BUILD_ROOT}" -name .keep -delete install -m 644 -p system/netdata.conf "${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}" + +# ########################################################### +# logrotate settings install -m 755 -d "${RPM_BUILD_ROOT}%{_sysconfdir}/logrotate.d" install -m 644 -p system/netdata.logrotate "${RPM_BUILD_ROOT}%{_sysconfdir}/logrotate.d/%{name}" +# ########################################################### +# Install freeipmi +install -m 4750 -p freeipmi.plugin "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/plugins.d/freeipmi.plugin" + +# ########################################################### +# Install apps.plugin +install -m 4750 -p apps.plugin "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/plugins.d/apps.plugin" + +# ########################################################### +# Install perf.plugin +install -m 4750 -p perf.plugin "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/plugins.d/perf.plugin" + +# ########################################################### +# Install registry directory +install -m 755 -d "${RPM_BUILD_ROOT}%{_localstatedir}/lib/%{name}/registry" +install -m 755 -d "${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/custom-plugins.d" +install -m 755 -d "${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/go.d" +install -m 755 -d "${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/ssl" + +# ########################################################### +# Install netdata service %if %{with systemd} install -m 755 -d "${RPM_BUILD_ROOT}%{_unitdir}" install -m 644 -p system/netdata.service "${RPM_BUILD_ROOT}%{_unitdir}/netdata.service" @@ -160,7 +268,100 @@ install -m 755 system/netdata-init-d \ "${RPM_BUILD_ROOT}/etc/rc.d/init.d/netdata" %endif +# ############################################################ +# Package Go within netdata (TBD: Package it separately) +safe_sha256sum() { + # Within the contexct of the installer, we only use -c option that is common between the two commands + # We will have to reconsider if we start non-common options + if command -v sha256sum >/dev/null 2>&1; then + sha256sum $@ + elif command -v shasum >/dev/null 2>&1; then + shasum -a 256 $@ + else + fatal "I could not find a suitable checksum binary to use" + fi +} + +download_go() { + url="${1}" + dest="${2}" + + if command -v curl >/dev/null 2>&1; then + curl -sSL --connect-timeout 10 --retry 3 "${url}" > "${dest}" + elif command -v wget >/dev/null 2>&1; then + wget -T 15 -O - "${url}" > "${dest}" + else + echo >&2 + echo >&2 "Downloading go.d plugin from '${url}' failed because of missing mandatory packages." + echo >&2 "Either add packages or disable it by issuing '--disable-go' in the installer" + echo >&2 + exit 1 + fi +} + +install_go() { + # When updating this value, ensure correct checksums in packaging/go.d.checksums + GO_PACKAGE_VERSION="v0.7.0" + ARCH_MAP=( + 'i386::386' + 'i686::386' + 'x86_64::amd64' + 'aarch64::arm64' + 'armv64::arm64' + 'armv6l::arm' + 'armv7l::arm' + 'armv5tel::arm' + ) + + if [ -z "${NETDATA_DISABLE_GO+x}" ]; then + echo >&2 "Install go.d.plugin" + ARCH=$(uname -m) + OS=$(uname -s | tr '[:upper:]' '[:lower:]') + + for index in "${ARCH_MAP[@]}" ; do + KEY="${index%%::*}" + VALUE="${index##*::}" + if [ "$KEY" = "$ARCH" ]; then + ARCH="${VALUE}" + break + fi + done + tmp=$(mktemp -d /tmp/netdata-go-XXXXXX) + GO_PACKAGE_BASENAME="go.d.plugin-${GO_PACKAGE_VERSION}.${OS}-${ARCH}" + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/${GO_PACKAGE_BASENAME}" "${tmp}/${GO_PACKAGE_BASENAME}" + download_go "https://github.com/netdata/go.d.plugin/releases/download/${GO_PACKAGE_VERSION}/config.tar.gz" "${tmp}/config.tar.gz" + + if [ ! -f "${tmp}/${GO_PACKAGE_BASENAME}" ] || [ ! -f "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/config.tar.gz" ] || [ ! -s "${tmp}/${GO_PACKAGE_BASENAME}" ]; then + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + return 1 + fi + + grep "${GO_PACKAGE_BASENAME}\$" "packaging/go.d.checksums" > "${tmp}/sha256sums.txt" 2>/dev/null + grep "config.tar.gz" "packaging/go.d.checksums" >> "${tmp}/sha256sums.txt" 2>/dev/null + + # Checksum validation + if ! (cd "${tmp}" && safe_sha256sum -c "sha256sums.txt"); then + + echo >&2 "go.d plugin checksum validation failure." + echo >&2 "Either check the error or consider disabling it by issuing '--disable-go' in the installer" + echo >&2 + + echo "go.d.plugin package files checksum validation failed." + exit 1 + fi + + # Install files + tar -xf "${tmp}/config.tar.gz" -C "${RPM_BUILD_ROOT}%{_libdir}/%{name}/conf.d/" + mv "${tmp}/$GO_PACKAGE_BASENAME" "${RPM_BUILD_ROOT}%{_libexecdir}/%{name}/plugins.d/go.d.plugin" + fi + return 0 +} +install_go + %pre + +# User/Group creations, as needed getent group netdata >/dev/null || groupadd -r netdata getent group docker >/dev/null || groupadd -r docker getent passwd netdata >/dev/null || \ @@ -181,24 +382,28 @@ rm -rf "${RPM_BUILD_ROOT}" %files %doc README.md -%defattr(-,root,root) +%defattr(-,root,netdata) %dir %{_sysconfdir}/%{name} %dir %{_libdir}/%{name} -%config %{_sysconfdir}/%{name}/*.conf -#%config %{_sysconfdir}/%{name}/charts.d/*.conf -#%config %{_sysconfdir}/%{name}/health.d/*.conf -#%config %{_sysconfdir}/%{name}/node.d/*.conf -#%config %{_sysconfdir}/%{name}/python.d/*.conf -#%config %{_sysconfdir}/%{name}/statsd.d/*.conf -%config %{_sysconfdir}/logrotate.d/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/*.conf +%config(noreplace) %{_sysconfdir}/logrotate.d/%{name} %{_libdir}/%{name} + +%defattr(0755,netdata,netdata,0755) %{_libexecdir}/%{name} %{_sbindir}/%{name} %{_sysconfdir}/%{name}/edit-config +%defattr(4750,root,netdata,0750) + +%dir %{_libexecdir}/%{name}/python.d +%dir %{_libexecdir}/%{name}/charts.d +%dir %{_libexecdir}/%{name}/plugins.d +%dir %{_libexecdir}/%{name}/node.d + %caps(cap_dac_read_search,cap_sys_ptrace=ep) %attr(0550,root,netdata) %{_libexecdir}/%{name}/plugins.d/apps.plugin %if %{with netns} @@ -209,27 +414,25 @@ rm -rf "${RPM_BUILD_ROOT}" %attr(0550,root,root) %{_libexecdir}/%{name}/plugins.d/cgroup-network-helper.sh %endif -%if %{with freeipmi} -%caps(cap_setuid=ep) %attr(4550,root,netdata) %{_libexecdir}/%{name}/plugins.d/freeipmi.plugin -%endif +# perf plugin +%caps(cap_setuid=ep) %attr(4750,root,netdata) %{_libexecdir}/%{name}/plugins.d/perf.plugin -%attr(0770,netdata,netdata) %dir %{_localstatedir}/cache/%{name} -%attr(0755,netdata,root) %dir %{_localstatedir}/log/%{name} -%attr(0770,netdata,netdata) %dir %{_localstatedir}/lib/%{name} +# freeipmi files +%caps(cap_setuid=ep) %attr(4550,root,netdata) %{_libexecdir}/%{name}/plugins.d/freeipmi.plugin %dir %{_datadir}/%{name} +%defattr(0750,netdata,netdata,0755) + %dir %{_sysconfdir}/%{name}/health.d %dir %{_sysconfdir}/%{name}/python.d %dir %{_sysconfdir}/%{name}/charts.d +%dir %{_sysconfdir}/%{name}/custom-plugins.d +%dir %{_sysconfdir}/%{name}/go.d +%dir %{_sysconfdir}/%{name}/ssl %dir %{_sysconfdir}/%{name}/node.d %dir %{_sysconfdir}/%{name}/statsd.d - -%dir %{_libdir}/%{name}/conf.d/health.d -%dir %{_libdir}/%{name}/conf.d/python.d -%dir %{_libdir}/%{name}/conf.d/charts.d -#%dir %{_libdir}/%{name}/conf.d/node.d -%dir %{_libdir}/%{name}/conf.d/statsd.d +%{_libdir}/%{name}/conf.d/ %if %{with systemd} %{_unitdir}/netdata.service @@ -242,7 +445,31 @@ rm -rf "${RPM_BUILD_ROOT}" %defattr(0644,root,netdata,0755) %{_datadir}/%{name}/web +# Enforce 0660 for files and 0770 for directories +# for the netdata lib, cache and log dirs +%defattr(0660,root,netdata,0770) +%attr(0770,netdata,netdata) %dir %{_localstatedir}/cache/%{name} +%attr(0755,netdata,root) %dir %{_localstatedir}/log/%{name} +%attr(0770,netdata,netdata) %dir %{_localstatedir}/lib/%{name} +%attr(0770,netdata,netdata) %dir %{_localstatedir}/lib/%{name}/registry + + %changelog +* Fri Jun 28 2019 Pavlos Emm. Katsoulakis - 0.0.0-7 +- Raise the path overrides to the spec file level, not just the configure. +- Adjust tighter permissions on some folders, based on what we did on our installer +- Introduce go.d plugin download and install, to include it on the package (Temporarily, to become separate package on next iteration) +* Tue Jun 25 2019 Pavlos Emm. Katsoulakis - 0.0.0-6 +- Adjust dependency list: Some packages are missing on some distros, adopt to build successfully +* Mon Jun 24 2019 Pavlos Emm. Katsoulakis - 0.0.0-5 +Another pass on cleaning up pre/post installation steps +- Sync permission and ownership on files and directories +* Sun Jun 16 2019 Pavlos Emm. Katsoulakis - 0.0.0-4 +First draft refactor on package dependencies section +- Remove freeipmi/nfacct plugin flags. We auto-detect all plugins by decision +- Start refactor of package dependencies +- Add missing dependencies, with respect to distro peculiarities +- Adjust existing dependencies, so that distro-specific package names is applied * Wed Jan 02 2019 Pawel Krupa - 0.0.0-3 - Temporary set version statically - Fix changelog ordering diff --git a/package.json b/package.json index 69f74bcfd..2bd614ca7 100644 --- a/package.json +++ b/package.json @@ -1,23 +1,57 @@ { "devDependencies": { "coffee-script": "^1.12.7", + "dictionary-en-us": "^2.0.0", + "gaze": "^1.1.2", + "grunt": "^1.0.1", + "grunt-exec": "^2.0.0", "jasmine": "^2.6.0", "jasmine-core": "^2.6.4", + "jasmine-growl-reporter": "^1.0.1", + "jasmine-node": "BrainDoctor/jasmine-node", + "jasmine-reporters": "^2.2.1", "karma": "^1.7.0", "karma-chrome-launcher": "^2.2.0", "karma-coverage": "^1.1.1", "karma-firefox-launcher": "^1.0.1", "karma-jasmine": "^1.1.0", - "walkdir": "^0.0.11", - "underscore": "^1.8.3", - "gaze": "^1.1.2", - "mkdirp": "^0.5.1", "minimist": "^1.2.0", - "jasmine-growl-reporter": "^1.0.1", - "xml2js": "^0.4.17", - "grunt": "^1.0.1", - "grunt-exec": "^2.0.0", - "jasmine-reporters": "^2.2.1", - "jasmine-node": "BrainDoctor/jasmine-node" + "mkdirp": "^0.5.1", + "remark-cli": "^6.0.1", + "remark-frontmatter": "^1.3.1", + "remark-lint-heading-whitespace": "^1.0.0", + "remark-lint-no-dead-urls": "^0.4.1", + "remark-lint-unordered-list-marker-style": "^1.0.2", + "remark-lint-write-good": "^1.1.0", + "remark-preset-lint-consistent": "^2.0.2", + "remark-preset-lint-markdown-style-guide": "^2.1.2", + "remark-preset-lint-recommended": "^3.0.2", + "remark-retext": "^3.1.2", + "remark-stringify": "^6.0.4", + "remark-validate-links": "^8.0.2", + "retext-contractions": "^2.1.3", + "retext-diacritics": "^1.2.2", + "retext-english": "^3.0.2", + "retext-equality": "^3.9.1", + "retext-indefinite-article": "^1.1.6", + "retext-overuse": "^1.1.1", + "retext-passive": "^1.3.2", + "retext-profanities": "^4.6.0", + "retext-quotes": "^2.0.3", + "retext-readability": "^4.2.1", + "retext-redundant-acronyms": "^1.2.2", + "retext-repeated-words": "^1.2.2", + "retext-sentence-spacing": "^2.1.0", + "retext-spell": "^2.4.0", + "retext-syntax-urls": "^1.0.1", + "retext-usage": "^0.5.0", + "underscore": "^1.8.3", + "walkdir": "^0.0.11", + "xml2js": "^0.4.17" + }, + "scripts": { + "lint-md": "remark .", + "lint-md-path": "remark", + "fix-md": "remark collectors --output" } } diff --git a/packaging/docker/README.md b/packaging/docker/README.md index 6ae299f1d..0bf416cd4 100644 --- a/packaging/docker/README.md +++ b/packaging/docker/README.md @@ -54,10 +54,28 @@ services: ### Docker container names resolution -If you want to have your container names resolved by netdata it needs to have access to docker group. To achive that just add environment variable `PGID=999` to netdata container, where `999` is a docker group id from your host. This number can be found by running: -```bash -grep docker /etc/group | cut -d ':' -f 3 -``` +If you want to have your container names resolved by netdata, you need to do two things: +1) Make netdata user be part of the group that owns the socket. + To achieve that just add environment variable `PGID=[GROUP NUMBER]` to the netdata container, + where `[GROUP NUMBER]` is practically the group id of the group assigned to the docker socket, on your host. + This group number can be found by running the following (if socket group ownership is docker): + ```bash + grep docker /etc/group | cut -d ':' -f 3 + ``` + +2) Change docker socket access level to read/write like so: + from + ``` + /var/run/docker.sock:/var/run/docker.sock:ro + ``` + + change to + ``` + /var/run/docker.sock:/var/run/docker.sock:rw + ``` + +**Important Note**: You should seriously consider the necessity of activating this option, +as it grants to the netdata user access to the privileged socket connection of docker service ### Pass command line options to Netdata diff --git a/packaging/docker/publish.sh b/packaging/docker/publish.sh index 948787b0b..fd1883afb 100755 --- a/packaging/docker/publish.sh +++ b/packaging/docker/publish.sh @@ -21,6 +21,8 @@ ARCH_MAP=(["i386"]="386" ["amd64"]="amd64" ["armhf"]="arm" ["aarch64"]="arm64") DEVEL_ARCHS=(amd64) ARCHS="${!ARCH_MAP[@]}" DOCKER_CMD="docker --config ${WORKDIR}" +GIT_MAIL=${GIT_MAIL:-"bot@netdata.cloud"} +GIT_USER=${GIT_USER:-"netdatabot"} if [ -z ${REPOSITORY} ]; then REPOSITORY="${TRAVIS_REPO_SLUG}" @@ -37,6 +39,10 @@ if [ ! -z ${DEVEL+x} ]; then declare -a ARCHS=(${DEVEL_ARCHS[@]}) fi +echo "Syncing repository with latest changes (We may have updated with package versions)" +git checkout master +git pull + # Ensure there is a version, the most appropriate one if [ "${VERSION}" == "" ]; then VERSION=$(git tag --points-at) diff --git a/packaging/docker/run.sh b/packaging/docker/run.sh old mode 100644 new mode 100755 index 243cae8a2..2b5047cd0 --- a/packaging/docker/run.sh +++ b/packaging/docker/run.sh @@ -1,16 +1,51 @@ -#!/bin/sh - -#set -e +#!/usr/bin/env bash +# +# Entry point script for netdata +# +# Copyright: SPDX-License-Identifier: GPL-3.0-or-later +# +# Author : Pavlos Emm. Katsoulakis +set -e +echo "Netdata entrypoint script starting" if [ ${RESCRAMBLE+x} ]; then echo "Reinstalling all packages to get the latest Polymorphic Linux scramble" apk upgrade --update-cache --available fi -if [ ${PGID+x} ]; then - echo "Adding user netdata to group with id ${PGID}" - addgroup -g "${PGID}" -S hostgroup 2>/dev/null - sed -i "s/${PGID}:$/${PGID}:netdata/g" /etc/group +create_group_and_assign_to_user() { + local local_DOCKER_GROUP="$1" + local local_DOCKER_GID="$2" + local local_DOCKER_USR="$3" + + echo >&2 "Adding group with ID ${local_DOCKER_GID} and name '${local_DOCKER_GROUP}'" + addgroup -g "${local_DOCKER_GID}" "${local_DOCKER_GROUP}" || echo >&2 "Could not add group ${local_DOCKER_GROUP} with ID ${local_DOCKER_GID}, its already there probably" + + echo >&2 "Adding user '${local_DOCKER_USR}' to group '${local_DOCKER_GROUP}/${local_DOCKER_GID}'" + sed -i "s/:${local_DOCKER_GID}:$/:${local_DOCKER_GID}:${local_DOCKER_USR}/g" /etc/group + + # Make sure we use the right docker group + GRP_TO_ASSIGN="$(grep ":x:${local_DOCKER_GID}:" /etc/group | cut -d':' -f1)" + if [ -z "${GRP_TO_ASSIGN}" ]; then + echo >&2 "Could not find group ID ${local_DOCKER_GID} in /etc/group. Check your logs and report it if this is an unrecovereable error" + else + echo >&2 "Group creation and assignment completed, netdata was assigned to group ${GRP_TO_ASSIGN}/${local_DOCKER_GID}" + echo "${GRP_TO_ASSIGN}" + fi +} + +DOCKER_USR="netdata" +DOCKER_SOCKET="/var/run/docker.sock" +DOCKER_GROUP="docker" + +if [ -S "${DOCKER_SOCKET}" ] && [ -n "${PGID}" ]; then + GRP=$(create_group_and_assign_to_user "${DOCKER_GROUP}" "${PGID}" "${DOCKER_USR}") + if [ -n "${GRP}" ]; then + echo "Adjusting ownership of mapped docker socket '${DOCKER_SOCKET}' to root:${GRP}" + chown "root:${GRP}" "${DOCKER_SOCKET}" || echo "Failed to change ownership on docker socket, container name resolution might not work" + fi fi -exec /usr/sbin/netdata -u netdata -D -s /host -p "${NETDATA_PORT}" "$@" +exec /usr/sbin/netdata -u "${DOCKER_USR}" -D -s /host -p "${NETDATA_PORT}" "$@" + +echo "Netdata entrypoint script, completed!" diff --git a/packaging/go.d.checksums b/packaging/go.d.checksums index ae57b3c78..700bad0af 100644 --- a/packaging/go.d.checksums +++ b/packaging/go.d.checksums @@ -1,16 +1,16 @@ -f851c86df8248e52602e39c3198c9b0d858a70c24c5e5c3fb63d691ede5ae9c6 *config.tar.gz -a27dddfc9a783980375aa1f5c54dcfbaf38044311bd16e0371cffd94a2ebe46e *go.d.plugin-v0.5.0.darwin-386 -1d4815d92860089728944f6b893fea16dc51dd6e47a81e5a7599abfdc73ff2de *go.d.plugin-v0.5.0.darwin-amd64 -a3c76f4b806bf930d344a83b0dc2b3fabe16f747ba89b96eac7fcbdb88c4b058 *go.d.plugin-v0.5.0.freebsd-386 -673f61317b8e6f2b226f30d106cff3532d8a3ee3453997d11f984d76c55831ce *go.d.plugin-v0.5.0.freebsd-amd64 -a352b24578d497b505031b8a84e541532d8f4f2543e3ea454b674dece426982c *go.d.plugin-v0.5.0.freebsd-arm -0a3a4249dd94c2cd4bc0f9ac3e49d5f19ff3a52d91fc4540a17688a4c1b71ce8 *go.d.plugin-v0.5.0.linux-386 -40e034ec19952467b85aebda3c57b823c9e75d799318669c4a811b4296382396 *go.d.plugin-v0.5.0.linux-amd64 -74b955b838939a73455403181cf4be67c8f5d0d313f3da0504a6b47605b22ae0 *go.d.plugin-v0.5.0.linux-arm -8d564d5bc689fdf46b63fa9b4d152f8ce84bfad102d358f1d3acd390aebf1c2d *go.d.plugin-v0.5.0.linux-arm64 -dd2c9c4e842248f8d7d0588057507e4b683cc9ebef406886c3a839afbcbdee3f *go.d.plugin-v0.5.0.linux-mips -046e315f82b0dd9fa792a0cd07d25564e768d7d44c7c388f3f432e0d3a98da50 *go.d.plugin-v0.5.0.linux-mips64 -6a05c782d5b8200a51eb5334b9c0750a6d511d442078614729592582ab40da05 *go.d.plugin-v0.5.0.linux-mips64le -0f5427fb451aa34cdc71b2c3d0a2d638f63e8bc60f7cffdf62258fc88048d39e *go.d.plugin-v0.5.0.linux-mipsle -a5d21ed9c9858d9fe24ade24825e5449151e5dd114f9715c26d6c03ad6d70919 *go.d.plugin-v0.5.0.linux-ppc64 -c7ec8b4ae2b94f7689f4a6722a5fac7a8302574e9a906e4b76af70bff624557c *go.d.plugin-v0.5.0.linux-ppc64le +133e138307a52a1c3af5abeec4d368c7bcb27f3398f0f380cfacc23db57b9911 *config.tar.gz +7795ff9058852e9e03ceecd432e5c462ef141b3dd2e1f8e7c3cb13a6c4b685ce *go.d.plugin-v0.7.0.darwin-386 +a8db5312e803376bd96ab3c4cfd6f2d8288795fde97a2aefca7916cd8743f2a4 *go.d.plugin-v0.7.0.darwin-amd64 +a130a6aa7a98d37b648d41f8c3f0b939bfb8f343d1a3a6c8267a7fe604aae96f *go.d.plugin-v0.7.0.freebsd-386 +078c8a9607aea92ee8346cb2567a73b2a2ac317ea72c6975de07b47fdba2de80 *go.d.plugin-v0.7.0.freebsd-amd64 +bf7bff1f6fa32055242b627534ec5936fa1b8eb2f42edc736bbe041bee11129e *go.d.plugin-v0.7.0.freebsd-arm +2e15dc67736b29cf736ad7a05271f462467f84e80073fd1a7084dd5e2ac83115 *go.d.plugin-v0.7.0.linux-386 +3b0b5b0faa319201ecac554cb300789546b7f51847d202ff913e29339acca48b *go.d.plugin-v0.7.0.linux-amd64 +1be3860bea67e2ac789a37bf4dae24f8925f93bebe72a57cc2218c9e9a702f19 *go.d.plugin-v0.7.0.linux-arm +cba7cbfeda2e5146c8229d455aaf61f29f196d24291a509f4bf36ae12a2729e7 *go.d.plugin-v0.7.0.linux-arm64 +5f263cd5a032149618483a50486ce69c6e1a32b7e568c498d42b4d94691167f5 *go.d.plugin-v0.7.0.linux-mips +9558e7aa633331afea78c682a15fc9e6cf10ed39fb4c26f03034a7b0cbdfcc1a *go.d.plugin-v0.7.0.linux-mips64 +0f93f4cac9b21cdb28ef88b9f1ba42afcc1e913c0227deb266440c205ff9a224 *go.d.plugin-v0.7.0.linux-mips64le +51c0763f07de48e9f9dd9625a647aacecdd4a1bd39f13298b4f7c123436f4327 *go.d.plugin-v0.7.0.linux-mipsle +7e7e53fff1852c9756d6117d35d1f061a8bd97135b231b010ad1461e789b1f66 *go.d.plugin-v0.7.0.linux-ppc64 +6d4203f9c4d5778add09ef2679dc025a72914b68dce5fb816e7cc38f4f36945f *go.d.plugin-v0.7.0.linux-ppc64le diff --git a/packaging/installer/README.md b/packaging/installer/README.md index 6dc084e83..b10ffa05a 100644 --- a/packaging/installer/README.md +++ b/packaging/installer/README.md @@ -20,6 +20,8 @@ The best way to install Netdata is directly from source. Our **automatic install See also the list of Netdata [package maintainers](../maintainers) for ASUSTOR NAS, OpenWRT, ReadyNAS, etc. +Note: From Netdata v1.12 and above, anonymous usage information is collected by default and sent to Google Analytics. To read more about the information collected and how to opt-out, check the [anonymous statistics page](../../docs/anonymous-statistics.md). + --- ## One line installation @@ -42,7 +44,7 @@ bash <(curl -Ss https://my-netdata.io/kickstart.sh) Verify the integrity of the script with this: ```bash -[ "fe451cd039c8f99b2ba4ca0feab88033" = "$(curl -Ss https://my-netdata.io/kickstart.sh | md5sum | cut -d ' ' -f 1)" ] && echo "OK, VALID" || echo "FAILED, INVALID" +[ "8a2b054081a108dff915994ce77f2f2d" = "$(curl -Ss https://my-netdata.io/kickstart.sh | md5sum | cut -d ' ' -f 1)" ] && echo "OK, VALID" || echo "FAILED, INVALID" ``` *It should print `OK, VALID` if the script is the one we ship.* @@ -99,7 +101,7 @@ To install Netdata with a binary package on any Linux distro, any kernel version Verify the integrity of the script with this: ```bash -[ "9ff4f5f37d23dff431f80d5349e0a25c" = "$(curl -Ss https://my-netdata.io/kickstart-static64.sh | md5sum | cut -d ' ' -f 1)" ] && echo "OK, VALID" || echo "FAILED, INVALID" +[ "8779d8717ccaa8dac18d599502eef591" = "$(curl -Ss https://my-netdata.io/kickstart-static64.sh | md5sum | cut -d ' ' -f 1)" ] && echo "OK, VALID" || echo "FAILED, INVALID" ``` *It should print `OK, VALID` if the script is the one we ship.* @@ -163,13 +165,25 @@ To install the latest git version of Netdata, please follow these 2 steps: Try our experimental automatic requirements installer (no need to be root). This will try to find the packages that should be installed on your system to build and run Netdata. It supports most major Linux distributions released after 2010: -- **Alpine** Linux and its derivatives (you have to install `bash` yourself, before using the installer) -- **Arch** Linux and its derivatives -- **Gentoo** Linux and its derivatives -- **Debian** Linux and its derivatives (including **Ubuntu**, **Mint**) -- **Fedora** and its derivatives (including **Red Hat Enterprise Linux**, **CentOS**, **Amazon Machine Image**) -- **SuSe** Linux and its derivatives (including **openSuSe**) -- **SLE12** Must have your system registered with Suse Customer Center or have the DVD. See [#1162](https://github.com/netdata/netdata/issues/1162) +* **Alpine** Linux and its derivatives + * You have to install `bash` yourself, before using the installer. + +* **Arch** Linux and its derivatives + * You need arch/aur for package Judy. + +* **Gentoo** Linux and its derivatives + +* **Debian** Linux and its derivatives (including **Ubuntu**, **Mint**) + +* **Redhat Enterprise Linux** and its derivatives (including **Fedora**, **CentOS**, **Amazon Machine Image**) + * Please note that for RHEL/CentOS you need + [EPEL](http://www.tecmint.com/how-to-enable-epel-repository-for-rhel-centos-6-5/). + In addition, RHEL/CentOS version 6 also need + [OKay](https://okay.com.mx/blog-news/rpm-repositories-for-centos-6-and-7.html) for package libuv version 1. + +* **SuSe** Linux and its derivatives (including **openSuSe**) + +* **SLE12** Must have your system registered with Suse Customer Center or have the DVD. See [#1162](https://github.com/netdata/netdata/issues/1162) Install the packages for having a **basic Netdata installation** (system monitoring and many applications, without `mysql` / `mariadb`, `postgres`, `named`, hardware sensors and `SNMP`): @@ -199,9 +213,10 @@ dnf install zlib-devel libuuid-devel libuv-devel lz4-devel Judy-devel openssl-de # CentOS / Red Hat Enterprise Linux yum install autoconf automake curl gcc git libmnl-devel libuuid-devel openssl-devel libuv-devel lz4-devel Judy-devel lm_sensors make MySQL-python nc pkgconfig python python-psycopg2 PyYAML zlib-devel -``` +# openSUSE +zypper install zlib-devel libuuid-devel libuv-devel liblz4-devel judy-devel libopenssl-devel libmnl-devel gcc make git autoconf autoconf-archive autogen automake pkgconfig curl findutils -Please note that for RHEL/CentOS you might need [EPEL](http://www.tecmint.com/how-to-enable-epel-repository-for-rhel-centos-6-5/). +``` Once Netdata is compiled, to run it the following packages are required (already installed using the above commands): diff --git a/packaging/installer/functions.sh b/packaging/installer/functions.sh index d1e944878..6f9996906 100644 --- a/packaging/installer/functions.sh +++ b/packaging/installer/functions.sh @@ -303,7 +303,7 @@ install_non_systemd_init() { run rc-update add netdata default && return 0 - elif [ "${key}" = "debian-7" ] || [ "${key}" = "ubuntu-12.04" ] || [ "${key}" = "ubuntu-14.04" ]; then + elif [ "${key}" =~ ^devuan* ] || [ "${key}" = "debian-7" ] || [ "${key}" = "ubuntu-12.04" ] || [ "${key}" = "ubuntu-14.04" ]; then echo >&2 "Installing LSB init file..." run cp system/netdata-lsb /etc/init.d/netdata && run chmod 755 /etc/init.d/netdata && @@ -332,6 +332,8 @@ install_non_systemd_init() { NETDATA_START_CMD="netdata" NETDATA_STOP_CMD="killall netdata" +NETDATA_INSTALLER_START_CMD="${NETDATA_START_CMD}" +NETDATA_INSTALLER_STOP_CMD="${NETDATA_STOP_CMD}" install_netdata_service() { local uname="$(uname 2>/dev/null)" @@ -351,15 +353,23 @@ install_netdata_service() { elif [ "${uname}" = "FreeBSD" ]; then - run cp system/netdata-freebsd /etc/rc.d/netdata && - NETDATA_START_CMD="service netdata start" && - NETDATA_STOP_CMD="service netdata stop" && - return 0 + run cp system/netdata-freebsd /etc/rc.d/netdata && NETDATA_START_CMD="service netdata start" && + NETDATA_STOP_CMD="service netdata stop" && + NETDATA_INSTALLER_START_CMD="service netdata onestart" && + NETDATA_INSTALLER_STOP_CMD="${NETDATA_STOP_CMD}" + myret=$? + + echo >&2 "Note: To explicitly enable netdata automatic start, set 'netdata_enable' to 'YES' in /etc/rc.conf" + echo >&2 "" + + return ${myret} elif issystemd; then # systemd is running on this system NETDATA_START_CMD="systemctl start netdata" NETDATA_STOP_CMD="systemctl stop netdata" + NETDATA_INSTALLER_START_CMD="${NETDATA_START_CMD}" + NETDATA_INSTALLER_STOP_CMD="${NETDATA_STOP_CMD}" SYSTEMD_DIRECTORY="" @@ -390,6 +400,8 @@ install_netdata_service() { NETDATA_START_CMD="rc-service netdata start" NETDATA_STOP_CMD="rc-service netdata stop" fi + NETDATA_INSTALLER_START_CMD="${NETDATA_START_CMD}" + NETDATA_INSTALLER_STOP_CMD="${NETDATA_STOP_CMD}" fi return ${ret} @@ -429,6 +441,7 @@ stop_netdata_on_pid() { ret=$? test ${ret} -eq 0 && printf >&2 "." && sleep 2 + done echo >&2 @@ -446,8 +459,6 @@ netdata_pids() { myns="$(readlink /proc/self/ns/pid 2>/dev/null)" - # echo >&2 "Stopping a (possibly) running netdata (namespace '${myns}')..." - for p in \ $(cat /var/run/netdata.pid 2>/dev/null) \ $(cat /var/run/netdata/netdata.pid 2>/dev/null) \ @@ -477,12 +488,15 @@ restart_netdata() { local started=0 - progress "Start netdata" + progress "Restarting netdata instance" if [ "${UID}" -eq 0 ]; then - service netdata stop - stop_all_netdata - service netdata restart && started=1 + echo >&2 + echo >&2 "Stopping all netdata threads" + run stop_all_netdata + + echo >&2 "Starting netdata using command '${NETDATA_INSTALLER_START_CMD}'" + run ${NETDATA_INSTALLER_START_CMD} && started=1 if [ ${started} -eq 1 ] && [ -z "$(netdata_pids)" ]; then echo >&2 "Ooops! it seems netdata is not started." @@ -490,7 +504,8 @@ restart_netdata() { fi if [ ${started} -eq 0 ]; then - service netdata start && started=1 + echo >&2 "Attempting another netdata start using command '${NETDATA_INSTALLER_START_CMD}'" + run ${NETDATA_INSTALLER_START_CMD} && started=1 fi fi @@ -500,8 +515,8 @@ restart_netdata() { fi if [ ${started} -eq 0 ]; then - # still not started... - + # still not started... another forced attempt, just run the binary + echo >&2 "Netdata service still not started, attempting another forced restart by running '${netdata} ${@}'" run stop_all_netdata run "${netdata}" "${@}" return $? diff --git a/packaging/installer/kickstart-static64.sh b/packaging/installer/kickstart-static64.sh index 505179051..a9f11238c 100755 --- a/packaging/installer/kickstart-static64.sh +++ b/packaging/installer/kickstart-static64.sh @@ -127,7 +127,7 @@ download() { } set_tarball_urls() { - if [ "$1" == "stable" ]; then + if [ "$1" = "stable" ]; then local latest # Simple version # latest="$(curl -sSL https://api.github.com/repos/netdata/netdata/releases/latest | grep tag_name | cut -d'"' -f4)" diff --git a/packaging/installer/kickstart.sh b/packaging/installer/kickstart.sh index 2db95f21d..d396f139e 100755 --- a/packaging/installer/kickstart.sh +++ b/packaging/installer/kickstart.sh @@ -141,7 +141,7 @@ warning() { create_tmp_directory() { # Check if tmp is mounted as noexec - if grep -Eq '^[^ ]+ /tmp [^ ]+ ([^ ]*,)?noexec[, ]' /proc/mounts; then + if grep -Eq '^[^ ]+ /tmp [^ ]+ ([^ ]*,)?noexec[, ]' /proc/mounts > /dev/null 2>&1; then pattern="$(pwd)/netdata-kickstart-XXXXXX" else pattern="/tmp/netdata-kickstart-XXXXXX" @@ -163,7 +163,7 @@ download() { } set_tarball_urls() { - if [ "$1" == "stable" ]; then + if [ "$1" = "stable" ]; then local latest # Simple version # latest="$(curl -sSL https://api.github.com/repos/netdata/netdata/releases/latest | grep tag_name | cut -d'"' -f4)" @@ -200,9 +200,9 @@ detect_bash4() { } dependencies() { - SYSTEM="$(uname -s)" - OS="$(uname -o)" - MACHINE="$(uname -m)" + SYSTEM="$(uname -s 2> /dev/null || uname -v)" + OS="$(uname -o 2> /dev/null || uname -rs)" + MACHINE="$(uname -m 2> /dev/null)" echo "System : ${SYSTEM}" echo "Operating System : ${OS}" diff --git a/packaging/installer/netdata-uninstaller.sh b/packaging/installer/netdata-uninstaller.sh index cfd858c02..0bbdaac2c 100755 --- a/packaging/installer/netdata-uninstaller.sh +++ b/packaging/installer/netdata-uninstaller.sh @@ -232,7 +232,7 @@ quit_msg() { user_input() { TEXT="$1" - if [ "${INTERACTIVITY}" == "-i" ]; then + if [ "${INTERACTIVITY}" = "-i" ]; then read -r -p "$TEXT" >&2 fi } diff --git a/packaging/installer/netdata-updater.sh b/packaging/installer/netdata-updater.sh index 21a769ba5..83031f3aa 100755 --- a/packaging/installer/netdata-updater.sh +++ b/packaging/installer/netdata-updater.sh @@ -73,7 +73,7 @@ set_tarball_urls() { return fi - if [ "$1" == "stable" ]; then + if [ "$1" = "stable" ]; then local latest # Simple version # latest="$(curl -sSL https://api.github.com/repos/netdata/netdata/releases/latest | grep tag_name | cut -d'"' -f4)" @@ -95,7 +95,7 @@ update() { if [ -z "${NETDATA_LOCAL_TARBAL_OVERRIDE}" ]; then download "${NETDATA_TARBALL_CHECKSUM_URL}" "${tmpdir}/sha256sum.txt" >&3 2>&3 - if grep "${NETDATA_TARBALL_CHECKSUM}" sha256sum.txt >&3 2>&3; then + if [[ -n "${NETDATA_TARBALL_CHECKSUM}" ]] && grep "${NETDATA_TARBALL_CHECKSUM}" sha256sum.txt >&3 2>&3; then info "Newest version is already installed" else download "${NETDATA_TARBALL_URL}" "${tmpdir}/netdata-latest.tar.gz" diff --git a/packaging/makeself/install-or-update.sh b/packaging/makeself/install-or-update.sh index fc4e6d077..165e7920b 100755 --- a/packaging/makeself/install-or-update.sh +++ b/packaging/makeself/install-or-update.sh @@ -175,7 +175,7 @@ fi progress "create user config directories" -for x in "python.d" "charts.d" "node.d" "health.d" "statsd.d" +for x in "python.d" "charts.d" "node.d" "health.d" "statsd.d" "custom-plugins.d" "ssl" do if [ ! -d "etc/netdata/${x}" ] then diff --git a/packaging/version b/packaging/version index 440ddd8f1..a406138ee 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.15.0 +v1.16.0 diff --git a/registry/README.md b/registry/README.md index 5a9a2b3bb..738908071 100644 --- a/registry/README.md +++ b/registry/README.md @@ -1,7 +1,11 @@ # Registry -Netdata registry implements the `my-netdata` menu on netdata dashboards. -The `my-netdata` menu lists the netdata servers you have visited. +The Netdata registry implements the node menu on the top left corner of the netdata dashboards and enables the Netdata cloud features, such as the node view. +The node menu lists the netdata servers you have visited. The node view offers a lot of additional features on top of the menu, +[with many more to come](https://blog.netdata.cloud/posts/netdata-cloud-announcement/). +To enable the global Netdata registry and the cloud features, you need to Sign In to Netdata cloud. By signing in, you opt in to let the registry receive and store +the information described [here](#what-data-does-the-registry-store). +You can still get the node menu, but not the cloud features, if you [run your own registry](#run-your-own-registry). ## Why? @@ -26,11 +30,13 @@ Using netdata, your monitoring infrastructure is embedded on each server, limiti However, the netdata approach introduces a few new issues that need to be addressed, one being **the list of netdata we have installed**, i.e. the URLs our netdata servers are listening. -To solve this, netdata utilizes a **central registry**. This registry, together with certain browser features, allow netdata to provide unified cross-server dashboards. For example, when you jump from server to server using the `my-netdata` menu, several session settings (like the currently viewed charts, the current zoom and pan operations on the charts, etc.) are propagated to the new server, so that the new dashboard will come with exactly the same view. +To solve this, netdata utilizes a **central registry**. This registry, together with certain browser features, allow netdata to provide unified cross-server dashboards. +For example, when you jump from server to server using the node menu, several session settings (like the currently viewed charts, the current zoom and pan operations on the charts, etc.) are propagated to the new server, so that the new dashboard will come with exactly the same view. +Netdata cloud has a roadmap to [offer many more features](https://blog.netdata.cloud/posts/netdata-cloud-announcement/) over and above the simple node menu. -## What is the registry? +## What data does the registry store? -The registry keeps track of 3 entities: +The registry keeps track of 4 entities: 1. **machines**: i.e. the netdata installations (a random GUID generated by each netdata the first time it starts; we call this **machine_guid**) @@ -38,12 +44,17 @@ The registry keeps track of 3 entities: 2. **persons**: i.e. the web browsers accessing the netdata installations (a random GUID generated by the registry the first time it sees a new web browser; we call this **person_guid**) - For each person, the registry keeps track of the netdata installations it has accessed and their URLs. + For each person, the registry keeps track of the netdata installations it has accessed and their URLs. 3. **URLs** of netdata installations (as seen by the web browsers) For each URL, the registry keeps the URL and nothing more. Each URL is linked to *persons* and *machines*. The only way to find a URL is to know its **machine_guid** or have a **person_guid** it is linked to it. +4. **accounts**: i.e. the information used to sign-in via one of the available sign-in methods. Depending on the method, this may include an email, an email and a profile picture. + +For *persons*/*accounts* and *machines*, the registry keeps links to *URLs*, each link with 2 timestamps (first time seen, last time seen) and a counter (number of times it has been seen). +*machines*, *persons* and timestamps are stored in the netdata registry regardless of whether you sign in or not. + ## Who talks to the registry? Your web browser **only**! If sending this information is against your policies, you can [run your own registry](#run-your-own-registry) @@ -52,19 +63,11 @@ Your netdata servers do not talk to the registry. This is a UML diagram of its o ![registry](https://cloud.githubusercontent.com/assets/2662304/19448565/11a70632-94ab-11e6-9d80-f410b4acb797.png) -## What data does the registry store? - -Its database contains: - -- **random person GUIDs** (generated by the registry as a browser cookie) -- **random machine GUIDs** (generated by each netdata server on its first run), including the hostname of the server netdata is running (without the domain) -- **URLs** (the base URL for accessing a netdata server, as seen by the web browser) - -For *persons* and *machines*, the registry keeps links to *URLs*, each link with 2 timestamps (first time seen, last time seen) and a counter (number of times it has been seen). ## Which is the default registry? `https://registry.my-netdata.io`, which is currently served by `https://london.my-netdata.io`. This registry listens to both HTTP and HTTPS requests but the default is HTTPS. +`https://netdata.cloud` is the additional registry endpoint, that enables [the cloud features](https://blog.netdata.cloud/posts/netdata-cloud-announcement/). It only accepts HTTPS. ### Can this registry handle the global load of netdata installations? @@ -98,14 +101,14 @@ Note that we have not enabled the registry on the other servers. Only one netdat This is it. You have your registry now. -You may also want to give your server different names under the **my-netdata** menu (i.e. to have them sorted / grouped). You can change its registry name, by setting on each netdata server: +You may also want to give your server different names under the node menu (i.e. to have them sorted / grouped). You can change its registry name, by setting on each netdata server: ``` [registry] registry hostname = Group1 - Master DB ``` -So this server will appear in **my-netdata** as `Group1 - Master DB`. The max name length is 50 characters. +So this server will appear in the node menu as `Group1 - Master DB`. The max name length is 50 characters. ### Limiting access to the registry diff --git a/registry/registry_init.c b/registry/registry_init.c index 3cf140dee..e5e666820 100644 --- a/registry/registry_init.c +++ b/registry/registry_init.c @@ -43,6 +43,7 @@ int registry_init(void) { // netdata.cloud configuration, if cloud_base_url == "", cloud functionality is disabled. registry.cloud_base_url = config_get(CONFIG_SECTION_CLOUD, "cloud base url", "https://netdata.cloud"); + setenv("NETDATA_REGISTRY_CLOUD_BASE_URL", registry.cloud_base_url, 1); setenv("NETDATA_REGISTRY_HOSTNAME", registry.hostname, 1); setenv("NETDATA_REGISTRY_URL", registry.registry_to_announce, 1); diff --git a/streaming/README.md b/streaming/README.md index 0ad9d7e2e..3e58f1f06 100644 --- a/streaming/README.md +++ b/streaming/README.md @@ -18,7 +18,7 @@ a netdata performs: Local netdata (`slave`), **without any database or alarms**, collects metrics and sends them to another netdata (`master`). -The `my-netdata` menu shows a list of all "databases streamed to" the master. Clicking one of those links allows the user to view the full dashboard of the `slave` netdata. The URL has the form http://master-host:master-port/host/slave-host/. +The node menu shows a list of all "databases streamed to" the master. Clicking one of those links allows the user to view the full dashboard of the `slave` netdata. The URL has the form http://master-host:master-port/host/slave-host/. Alarms for the `slave` are served by the `master`. @@ -41,6 +41,8 @@ The `slave` and the `master` may have different data retention policies for the Alarms for the `slave` are triggered by **both** the `slave` and the `master` (and actually each can have different alarms configurations or have alarms disabled). +Take a note, that custom chart names, configured on the `slave`, should be in the form `type.name` to work correctly. The `master` will truncate the `type` part and substitute the original chart `type` to store the name in the database. + ### netdata proxies Local netdata (`slave`), with or without a database, collects metrics and sends them to another @@ -81,14 +83,14 @@ monitoring (there cannot be health monitoring without a database). ``` [web] - mode = none | static-threaded - accept a streaming request every seconds = 0 + mode = none | static-threaded + accept a streaming request every seconds = 0 ``` `[web].mode = none` disables the API (netdata will not listen to any ports). This also disables the registry (there cannot be a registry without an API). -`accept a streaming request every seconds` can be used to set a limit on how often a master Netdata server will accept streaming requests from the slaves. 0 sets no limit, 1 means maximum once every second. If this is set, you may see error log entries "... too busy to accept new streaming request. Will be allowed in X secs". +`accept a streaming request every seconds` can be used to set a limit on how often a master Netdata server will accept streaming requests from the slaves. 0 sets no limit, 1 means maximum once every second. If this is set, you may see error log entries "... too busy to accept new streaming request. Will be allowed in X secs". ``` [backend] @@ -123,7 +125,7 @@ a `proxy`). ``` [stream] enabled = yes | no - destination = IP:PORT ... + destination = IP:PORT[:SSL] ... api key = XXXXXXXXXXX ``` @@ -136,6 +138,8 @@ headless proxy|`none`|not `none`|`yes`|only for `data source = as collected`|not proxy with db|not `none`|not `none`|`yes`|possible|possible|yes central netdata|not `none`|not `none`|`no`|possible|possible|yes +For the options to encrypt the data stream between the slave and the master, refer to [securing the communication](#securing-the-communication) + ##### options for the receiving node `stream.conf` looks like this: @@ -209,11 +213,46 @@ The receiving end (`proxy` or `master`) logs entries like these: For netdata v1.9+, streaming can also be monitored via `access.log`. +### Securing the communication + +Netdata does not activate TLS encryption by default. To encrypt the connection, you first need to [enable TLS support](../web/server/#enabling-tls-support) on the master. With encryption enabled on the receiving side, we need to instruct the slave to use SSL as well. On the slave's `stream.conf`, configure the destination as follows: + +``` +[stream] + destination = host:port:SSL +``` + +The word SSL appended to the end of the destination tells the slave that the connection must be encrypted. + +#### Certificate verification + +When SSL is enabled on the slave, the default behavior will be do not connect with the master unless the server's certificate can be verified via the default chain. In case you want to avoid this check, add to the slave's `stream.conf` the following: + +``` +[stream] + ssl skip certificate verification = yes +``` + +#### Expected behaviors + +With the introduction of SSL, the master-slave communication behaves as shown in the table below, depending on the following configurations: +- Master TLS (Yes/No): Whether the `[web]` section in `netdata.conf` has `ssl key` and `ssl certificate`. +- Master port SSL (-/force/optional): Depends on whether the `[web]` section `bind to` contains a `^SSL=force` or `^SSL=optional` directive on the port(s) used for streaming. +- Slave TLS (Yes/No): Whether the destination in the slave's `stream.conf` has `:SSL` at the end. +- Slave SSL Verification (yes/no): Value of the slave's `stream.conf` `ssl skip certificate verification` parameter (default is no). + + Master TLS enabled | Master port SSL | Slave TLS | Slave SSL Ver. | Behavior +:------:|:-----:|:-----:|:-----:|:-------- +No | - | No | no | Legacy behavior. The master-slave stream is unencrypted. +Yes | force | No | no | The master rejects the slave connection. +Yes | -/optional | No | no | The master-slave stream is unencrypted (expected situation for legacy slaves and newer masters) +Yes | -/force/optional | Yes | no | The master-slave stream is encrypted, provided that the master has a valid SSL certificate. Otherwise, the slave refuses to connect. +Yes | -/force/optional | Yes | yes | The master-slave stream is encrypted. ## Viewing remote host dashboards, using mirrored databases On any receiving netdata, that maintains remote databases and has its web server enabled, -`my-netdata` menu will include a list of the mirrored databases. +The node menu will include a list of the mirrored databases. ![image](https://cloud.githubusercontent.com/assets/2662304/24080824/24cd2d3c-0caf-11e7-909d-a8dd1dbb95d7.png) @@ -289,13 +328,13 @@ On the master, edit `/etc/netdata/stream.conf` (to edit it on your system run `/ [11111111-2222-3333-4444-555555555555] # enable/disable this API key enabled = yes - + # one hour of data for each of the slaves default history = 3600 - + # do not save slave metrics on disk default memory = ram - + # alarms checks, only while the slave is connected health enabled by default = auto ``` @@ -305,6 +344,10 @@ If you used many API keys, you can add one such section for each API key. When done, restart netdata on the `master` node. It is now ready to receive metrics. +Note that `health enabled by default = auto` will still trigger `last_collected` alarms, if a connected slave does not exit gracefully. If the netdata running on the slave is +stopped, it will close the connection to the master, ensuring that no `last_collected` alarms are triggered. For example, a proper container restart would first terminate +the netdata process, but a system power issue would leave the connection open on the master side. In the second case, you will still receive alarms. + #### Configuring the `slaves` On each of the slaves, edit `/etc/netdata/stream.conf` (to edit it on your system run `/etc/netdata/edit-config stream.conf`) and set these: @@ -313,10 +356,10 @@ On each of the slaves, edit `/etc/netdata/stream.conf` (to edit it on your syste [stream] # stream metrics to another netdata enabled = yes - + # the IP and PORT of the master destination = 10.11.12.13:19999 - + # the API key to use api key = 11111111-2222-3333-4444-555555555555 ``` diff --git a/streaming/rrdpush.c b/streaming/rrdpush.c index 2e9050ff2..954b1d7d1 100644 --- a/streaming/rrdpush.c +++ b/streaming/rrdpush.c @@ -79,6 +79,25 @@ int rrdpush_init() { default_rrdpush_enabled = 0; } +#ifdef ENABLE_HTTPS + if (netdata_use_ssl_on_stream == NETDATA_SSL_OPTIONAL) { + if (default_rrdpush_destination){ + char *test = strstr(default_rrdpush_destination,":SSL"); + if(test){ + *test = 0X00; + netdata_use_ssl_on_stream = NETDATA_SSL_FORCE; + } + } + } + char *invalid_certificate = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", "no"); + if ( !strcmp(invalid_certificate,"yes")){ + if (netdata_validate_server == NETDATA_SSL_VALID_CERTIFICATE){ + info("The Netdata is configured to accept invalid certificate."); + netdata_validate_server = NETDATA_SSL_INVALID_CERTIFICATE; + } + } +#endif + return default_rrdpush_enabled; } @@ -414,6 +433,7 @@ static inline void rrdpush_sender_thread_close_socket(RRDHOST *host) { } } +//called from client side static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_port, int timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size) { struct timeval tv = { .tv_sec = timeout, @@ -442,9 +462,38 @@ static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_po info("STREAM %s [send to %s]: initializing communication...", host->hostname, connected_to); +#ifdef ENABLE_HTTPS + if( netdata_client_ctx ){ + host->ssl.flags = NETDATA_SSL_START; + if (!host->ssl.conn){ + host->ssl.conn = SSL_new(netdata_client_ctx); + if(!host->ssl.conn){ + error("Failed to allocate SSL structure."); + host->ssl.flags = NETDATA_SSL_NO_HANDSHAKE; + } + } + else{ + SSL_clear(host->ssl.conn); + } + + if (host->ssl.conn) + { + if (SSL_set_fd(host->ssl.conn, host->rrdpush_sender_socket) != 1) { + error("Failed to set the socket to the SSL on socket fd %d.", host->rrdpush_sender_socket); + host->ssl.flags = NETDATA_SSL_NO_HANDSHAKE; + } else{ + host->ssl.flags = NETDATA_SSL_HANDSHAKE_COMPLETE; + } + } + } + else { + host->ssl.flags = NETDATA_SSL_NO_HANDSHAKE; + } +#endif + #define HTTP_HEADER_SIZE 8192 char http[HTTP_HEADER_SIZE + 1]; - snprintfz(http, HTTP_HEADER_SIZE, + int eol = snprintfz(http, HTTP_HEADER_SIZE, "STREAM key=%s&hostname=%s®istry_hostname=%s&machine_guid=%s&update_every=%d&os=%s&timezone=%s&tags=%s" "&NETDATA_SYSTEM_OS_NAME=%s" "&NETDATA_SYSTEM_OS_ID=%s" @@ -486,8 +535,39 @@ static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_po , host->program_name , host->program_version ); - + http[eol] = 0x00; + +#ifdef ENABLE_HTTPS + if (!host->ssl.flags) { + ERR_clear_error(); + SSL_set_connect_state(host->ssl.conn); + int err = SSL_connect(host->ssl.conn); + if (err != 1){ + err = SSL_get_error(host->ssl.conn, err); + error("SSL cannot connect with the server: %s ",ERR_error_string((long)SSL_get_error(host->ssl.conn,err),NULL)); + if (netdata_use_ssl_on_stream == NETDATA_SSL_FORCE) { + rrdpush_sender_thread_close_socket(host); + return 0; + }else { + host->ssl.flags = NETDATA_SSL_NO_HANDSHAKE; + } + } + else { + if (netdata_use_ssl_on_stream == NETDATA_SSL_FORCE) { + if (netdata_validate_server == NETDATA_SSL_VALID_CERTIFICATE) { + if ( security_test_certificate(host->ssl.conn)) { + error("Closing the stream connection, because the server SSL certificate is not valid."); + rrdpush_sender_thread_close_socket(host); + return 0; + } + } + } + } + } + if(send_timeout(&host->ssl,host->rrdpush_sender_socket, http, strlen(http), 0, timeout) == -1) { +#else if(send_timeout(host->rrdpush_sender_socket, http, strlen(http), 0, timeout) == -1) { +#endif error("STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", host->hostname, connected_to); rrdpush_sender_thread_close_socket(host); return 0; @@ -495,7 +575,11 @@ static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_po info("STREAM %s [send to %s]: waiting response from remote netdata...", host->hostname, connected_to); +#ifdef ENABLE_HTTPS + if(recv_timeout(&host->ssl,host->rrdpush_sender_socket, http, HTTP_HEADER_SIZE, 0, timeout) == -1) { +#else if(recv_timeout(host->rrdpush_sender_socket, http, HTTP_HEADER_SIZE, 0, timeout) == -1) { +#endif error("STREAM %s [send to %s]: remote netdata does not respond.", host->hostname, connected_to); rrdpush_sender_thread_close_socket(host); return 0; @@ -565,6 +649,12 @@ void *rrdpush_sender_thread(void *ptr) { return NULL; } +#ifdef ENABLE_HTTPS + if (netdata_use_ssl_on_stream & NETDATA_SSL_FORCE ){ + security_start_ssl(NETDATA_SSL_CONTEXT_STREAMING); + } +#endif + info("STREAM %s [send]: thread created (task id %d)", host->hostname, gettid()); int timeout = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "timeout seconds", 60); @@ -852,6 +942,9 @@ static int rrdpush_receive(int fd , int update_every , char *client_ip , char *client_port +#ifdef ENABLE_HTTPS + , struct netdata_ssl *ssl +#endif ) { RRDHOST *host; int history = default_rrd_history_entries; @@ -965,7 +1058,11 @@ static int rrdpush_receive(int fd snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", client_ip, client_port); info("STREAM %s [receive from [%s]:%s]: initializing communication...", host->hostname, client_ip, client_port); +#ifdef ENABLE_HTTPS + if(send_timeout(ssl,fd, START_STREAMING_PROMPT, strlen(START_STREAMING_PROMPT), 0, 60) != strlen(START_STREAMING_PROMPT)) { +#else if(send_timeout(fd, START_STREAMING_PROMPT, strlen(START_STREAMING_PROMPT), 0, 60) != strlen(START_STREAMING_PROMPT)) { +#endif log_stream_connection(client_ip, client_port, key, host->machine_guid, host->hostname, "FAILED - CANNOT REPLY"); error("STREAM %s [receive from [%s]:%s]: cannot send ready command.", host->hostname, client_ip, client_port); close(fd); @@ -1058,6 +1155,9 @@ struct rrdpush_thread { char *program_version; struct rrdhost_system_info *system_info; int update_every; +#ifdef ENABLE_HTTPS + struct netdata_ssl ssl; +#endif }; static void rrdpush_receiver_thread_cleanup(void *ptr) { @@ -1079,8 +1179,13 @@ static void rrdpush_receiver_thread_cleanup(void *ptr) { freez(rpt->client_port); freez(rpt->program_name); freez(rpt->program_version); - rrdhost_system_info_free(rpt->system_info); +#ifdef ENABLE_HTTPS + if(rpt->ssl.conn){ + SSL_free(rpt->ssl.conn); + } +#endif freez(rpt); + } } @@ -1105,6 +1210,9 @@ static void *rrdpush_receiver_thread(void *ptr) { , rpt->update_every , rpt->client_ip , rpt->client_port +#ifdef ENABLE_HTTPS + , &rpt->ssl +#endif ); netdata_thread_cleanup_pop(1); @@ -1295,6 +1403,13 @@ int rrdpush_receiver_thread_spawn(RRDHOST *host, struct web_client *w, char *url rpt->client_port = strdupz(w->client_port); rpt->update_every = update_every; rpt->system_info = system_info; +#ifdef ENABLE_HTTPS + rpt->ssl.conn = w->ssl.conn; + rpt->ssl.flags = w->ssl.flags; + + w->ssl.conn = NULL; + w->ssl.flags = NETDATA_SSL_START; +#endif if(w->user_agent && w->user_agent[0]) { char *t = strchr(w->user_agent, '/'); diff --git a/streaming/stream.conf b/streaming/stream.conf index d0d02a7c8..0d360cc24 100644 --- a/streaming/stream.conf +++ b/streaming/stream.conf @@ -17,7 +17,7 @@ # Where is the receiving netdata? # A space separated list of: # - # [PROTOCOL:]HOST[%INTERFACE][:PORT] + # [PROTOCOL:]HOST[%INTERFACE][:PORT][:SSL] # # If many are given, the first available will get the metrics. # @@ -26,10 +26,21 @@ # IPv6 IPs should be given with brackets [ip:address] # INTERFACE = the network interface to use (only for IPv6) # PORT = the port number or service name (/etc/services) + # SSL = when this word appear at the end of the destination string + # the Netdata will do encrypt connection with the master. # # This communication is not HTTP (it cannot be proxied by web proxies). destination = + # Skip Certificate verification? + # + # The netdata slave is configurated to avoid invalid SSL/TLS certificate, + # so certificates that are self-signed or expired will stop the streaming. + # Case the server certificate is not valid, you can enable the use of + # 'bad' certificates setting the next option as 'yes'. + # + #ssl skip certificate verification = yes + # The API_KEY to use (as the sender) api key = @@ -114,7 +125,8 @@ # 3 possible values: # yes enable alarms # no do not enable alarms - # auto enable alarms, only when the sending netdata is connected + # auto enable alarms, only when the sending netdata is connected. For ephemeral slaves or slave system restarts, + # ensure that the netdata process on the slave is gracefully stopped, to prevent invalid last_collected alarms # You can also set it per host, below. # The default is taken from [health].enabled of netdata.conf health enabled by default = auto diff --git a/system/netdata-lsb.in b/system/netdata-lsb.in index e623f1e0c..ca197a520 100644 --- a/system/netdata-lsb.in +++ b/system/netdata-lsb.in @@ -1,10 +1,18 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-3.0-or-later +#!/usr/bin/env bash # +# Netdata LSB start script +# +# Copyright: +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Author: +# Costa Tsaousis +# Pavlos Emm. Katsoulakis + ### BEGIN INIT INFO # Provides: netdata -# Required-Start: $local_fs $remote_fs $network $named $time apache2 httpd squid nginx mysql named opensips upsd hostapd postfix lm_sensors -# Required-Stop: $local_fs $remote_fs $network $named $time apache2 httpd squid nginx mysql named opensips upsd hostapd postfix lm_sensors +# Required-Start: $local_fs $remote_fs $network $named $time +# Required-Stop: $local_fs $remote_fs $network $named $time # Should-Start: $local_fs $network $named $remote_fs $time $all # Should-Stop: $local_fs $network $named $remote_fs $time $all # Default-Start: 2 3 4 5 diff --git a/tests/backends/prometheus-avg-oldunits.txt b/tests/backends/prometheus-avg-oldunits.txt index b89c924d4..53ee8ffa9 100644 --- a/tests/backends/prometheus-avg-oldunits.txt +++ b/tests/backends/prometheus-avg-oldunits.txt @@ -1,3 +1,20 @@ +nd_apps_cpu_percent_average +nd_apps_cpu_system_percent_average +nd_apps_cpu_user_percent_average +nd_apps_files_open_files_average +nd_apps_lreads_kilobytes_persec_average +nd_apps_lwrites_kilobytes_persec_average +nd_apps_major_faults_page_faults_persec_average +nd_apps_mem_MB_average +nd_apps_minor_faults_page_faults_persec_average +nd_apps_pipes_open_pipes_average +nd_apps_preads_kilobytes_persec_average +nd_apps_processes_processes_average +nd_apps_pwrites_kilobytes_persec_average +nd_apps_sockets_open_sockets_average +nd_apps_swap_MB_average +nd_apps_threads_threads_average +nd_apps_vmem_MB_average nd_cpu_core_throttling_events_persec_average nd_cpu_cpu_percent_average nd_cpu_interrupts_interrupts_persec_average @@ -20,6 +37,7 @@ nd_ip_ecnpkts_packets_persec_average nd_ip_inerrors_packets_persec_average nd_ip_mcast_kilobits_persec_average nd_ip_mcastpkts_packets_persec_average +nd_ip_tcp_accept_queue_packets_persec_average nd_ip_tcpconnaborts_connections_persec_average nd_ip_tcpofo_packets_persec_average nd_ip_tcpreorders_packets_persec_average @@ -42,7 +60,6 @@ nd_ipv4_udperrors_events_persec_average nd_ipv4_udppackets_packets_persec_average nd_ipv6_ect_packets_persec_average nd_ipv6_errors_packets_persec_average -nd_ipv6_icmpechos_messages_persec_average nd_ipv6_icmperrors_errors_persec_average nd_ipv6_icmp_messages_persec_average nd_ipv6_icmpmldv2_reports_persec_average @@ -64,9 +81,46 @@ nd_mem_pgfaults_page_faults_persec_average nd_mem_slab_MB_average nd_mem_transparent_hugepages_MB_average nd_mem_writeback_MB_average +nd_netdata_apps_children_fix_percent_average +nd_netdata_apps_cpu_milliseconds_persec_average +nd_netdata_apps_fix_percent_average +nd_netdata_apps_sizes_files_persec_average +nd_netdata_clients_connected_clients_average +nd_netdata_compression_ratio_percent_average +nd_netdata_go_plugin_execution_time_ms_average +nd_netdata_net_kilobits_persec_average +nd_netdata_plugin_cgroups_cpu_milliseconds_persec_average +nd_netdata_plugin_diskspace_dt_milliseconds_run_average +nd_netdata_plugin_diskspace_milliseconds_persec_average +nd_netdata_plugin_proc_cpu_milliseconds_persec_average +nd_netdata_plugin_proc_modules_milliseconds_run_average +nd_netdata_plugin_tc_cpu_milliseconds_persec_average +nd_netdata_plugin_tc_time_milliseconds_run_average +nd_netdata_private_charts_charts_average +nd_netdata_pythond_runtime_ms_average +nd_netdata_requests_requests_persec_average +nd_netdata_response_time_milliseconds_request_average +nd_netdata_server_cpu_milliseconds_persec_average +nd_netdata_statsd_bytes_kilobits_persec_average +nd_netdata_statsd_cpu_milliseconds_persec_average +nd_netdata_statsd_events_events_persec_average +nd_netdata_statsd_metrics_metrics_average +nd_netdata_statsd_packets_packets_persec_average +nd_netdata_statsd_reads_reads_persec_average +nd_netdata_statsd_useful_metrics_metrics_average +nd_netdata_tcp_connected_sockets_average +nd_netdata_tcp_connects_events_average +nd_netdata_web_cpu_milliseconds_persec_average nd_net_drops_drops_persec_average nd_net_net_kilobits_persec_average nd_net_packets_packets_persec_average +nd_services_cpu_percent_average +nd_services_mem_usage_MB_average +nd_services_swap_usage_MB_average +nd_services_throttle_io_ops_read_operations_persec_average +nd_services_throttle_io_ops_write_operations_persec_average +nd_services_throttle_io_read_kilobytes_persec_average +nd_services_throttle_io_write_kilobytes_persec_average nd_system_active_processes_processes_average nd_system_cpu_percent_average nd_system_ctxt_context_switches_persec_average @@ -85,6 +139,8 @@ nd_system_net_kilobits_persec_average nd_system_pgpgio_kilobytes_persec_average nd_system_processes_processes_average nd_system_ram_MB_average +nd_system_shared_memory_bytes_bytes_average +nd_system_shared_memory_segments_segments_average nd_system_softirqs_softirqs_persec_average nd_system_softnet_stat_events_persec_average nd_system_swapio_kilobytes_persec_average diff --git a/tests/backends/prometheus-avg.txt b/tests/backends/prometheus-avg.txt index eaed4fb7a..1aedff2b5 100644 --- a/tests/backends/prometheus-avg.txt +++ b/tests/backends/prometheus-avg.txt @@ -1,3 +1,20 @@ +nd_apps_cpu_percentage_average +nd_apps_cpu_system_percentage_average +nd_apps_cpu_user_percentage_average +nd_apps_files_open_files_average +nd_apps_lreads_KiB_persec_average +nd_apps_lwrites_KiB_persec_average +nd_apps_major_faults_page_faults_persec_average +nd_apps_mem_MiB_average +nd_apps_minor_faults_page_faults_persec_average +nd_apps_pipes_open_pipes_average +nd_apps_preads_KiB_persec_average +nd_apps_processes_processes_average +nd_apps_pwrites_KiB_persec_average +nd_apps_sockets_open_sockets_average +nd_apps_swap_MiB_average +nd_apps_threads_threads_average +nd_apps_vmem_MiB_average nd_cpu_core_throttling_events_persec_average nd_cpu_cpu_percentage_average nd_cpu_interrupts_interrupts_persec_average @@ -20,6 +37,7 @@ nd_ip_ecnpkts_packets_persec_average nd_ip_inerrors_packets_persec_average nd_ip_mcast_kilobits_persec_average nd_ip_mcastpkts_packets_persec_average +nd_ip_tcp_accept_queue_packets_persec_average nd_ip_tcpconnaborts_connections_persec_average nd_ip_tcpofo_packets_persec_average nd_ip_tcpreorders_packets_persec_average @@ -42,7 +60,6 @@ nd_ipv4_udperrors_events_persec_average nd_ipv4_udppackets_packets_persec_average nd_ipv6_ect_packets_persec_average nd_ipv6_errors_packets_persec_average -nd_ipv6_icmpechos_messages_persec_average nd_ipv6_icmperrors_errors_persec_average nd_ipv6_icmp_messages_persec_average nd_ipv6_icmpmldv2_reports_persec_average @@ -64,9 +81,46 @@ nd_mem_pgfaults_faults_persec_average nd_mem_slab_MiB_average nd_mem_transparent_hugepages_MiB_average nd_mem_writeback_MiB_average +nd_netdata_apps_children_fix_percentage_average +nd_netdata_apps_cpu_milliseconds_persec_average +nd_netdata_apps_fix_percentage_average +nd_netdata_apps_sizes_files_persec_average +nd_netdata_clients_connected_clients_average +nd_netdata_compression_ratio_percentage_average +nd_netdata_go_plugin_execution_time_ms_average +nd_netdata_net_kilobits_persec_average +nd_netdata_plugin_cgroups_cpu_milliseconds_persec_average +nd_netdata_plugin_diskspace_dt_milliseconds_run_average +nd_netdata_plugin_diskspace_milliseconds_persec_average +nd_netdata_plugin_proc_cpu_milliseconds_persec_average +nd_netdata_plugin_proc_modules_milliseconds_run_average +nd_netdata_plugin_tc_cpu_milliseconds_persec_average +nd_netdata_plugin_tc_time_milliseconds_run_average +nd_netdata_private_charts_charts_average +nd_netdata_pythond_runtime_ms_average +nd_netdata_requests_requests_persec_average +nd_netdata_response_time_milliseconds_request_average +nd_netdata_server_cpu_milliseconds_persec_average +nd_netdata_statsd_bytes_kilobits_persec_average +nd_netdata_statsd_cpu_milliseconds_persec_average +nd_netdata_statsd_events_events_persec_average +nd_netdata_statsd_metrics_metrics_average +nd_netdata_statsd_packets_packets_persec_average +nd_netdata_statsd_reads_reads_persec_average +nd_netdata_statsd_useful_metrics_metrics_average +nd_netdata_tcp_connected_sockets_average +nd_netdata_tcp_connects_events_average +nd_netdata_web_cpu_milliseconds_persec_average nd_net_drops_drops_persec_average nd_net_net_kilobits_persec_average nd_net_packets_packets_persec_average +nd_services_cpu_percentage_average +nd_services_mem_usage_MiB_average +nd_services_swap_usage_MiB_average +nd_services_throttle_io_ops_read_operations_persec_average +nd_services_throttle_io_ops_write_operations_persec_average +nd_services_throttle_io_read_KiB_persec_average +nd_services_throttle_io_write_KiB_persec_average nd_system_active_processes_processes_average nd_system_cpu_percentage_average nd_system_ctxt_context_switches_persec_average @@ -85,6 +139,8 @@ nd_system_net_kilobits_persec_average nd_system_pgpgio_KiB_persec_average nd_system_processes_processes_average nd_system_ram_MiB_average +nd_system_shared_memory_bytes_bytes_average +nd_system_shared_memory_segments_segments_average nd_system_softirqs_softirqs_persec_average nd_system_softnet_stat_events_persec_average nd_system_swapio_KiB_persec_average diff --git a/tests/backends/prometheus-raw.txt b/tests/backends/prometheus-raw.txt index 7caffc873..2ac4c2c7a 100644 --- a/tests/backends/prometheus-raw.txt +++ b/tests/backends/prometheus-raw.txt @@ -1,3 +1,20 @@ +nd_apps_cpu +nd_apps_cpu_system +nd_apps_cpu_user +nd_apps_files +nd_apps_lreads +nd_apps_lwrites +nd_apps_major_faults +nd_apps_mem +nd_apps_minor_faults +nd_apps_pipes +nd_apps_preads +nd_apps_processes +nd_apps_pwrites +nd_apps_sockets +nd_apps_swap +nd_apps_threads +nd_apps_vmem nd_cpu_core_throttling_total nd_cpu_cpu_total nd_cpu_interrupts_total @@ -20,6 +37,7 @@ nd_ip_ecnpkts_total nd_ip_inerrors_total nd_ip_mcastpkts_total nd_ip_mcast_total +nd_ip_tcp_accept_queue_total nd_ip_tcpconnaborts_total nd_ip_tcpofo_total nd_ip_tcpreorders_total @@ -42,7 +60,6 @@ nd_ipv4_udperrors_total nd_ipv4_udppackets_total nd_ipv6_ect_total nd_ipv6_errors_total -nd_ipv6_icmpechos_total nd_ipv6_icmperrors_total nd_ipv6_icmpmldv2_total nd_ipv6_icmpneighbor_total @@ -64,9 +81,54 @@ nd_mem_pgfaults_total nd_mem_slab nd_mem_transparent_hugepages nd_mem_writeback +nd_netdata_apps_children_fix +nd_netdata_apps_cpu_total +nd_netdata_apps_fix +nd_netdata_apps_sizes_calls_total +nd_netdata_apps_sizes_fds +nd_netdata_apps_sizes_filenames_total +nd_netdata_apps_sizes_files_total +nd_netdata_apps_sizes_inode_changes_total +nd_netdata_apps_sizes_link_changes_total +nd_netdata_apps_sizes_new_pids_total +nd_netdata_apps_sizes_pids +nd_netdata_apps_sizes_targets +nd_netdata_clients +nd_netdata_compression_ratio +nd_netdata_go_plugin_execution_time +nd_netdata_net_total +nd_netdata_plugin_cgroups_cpu_total +nd_netdata_plugin_diskspace_dt +nd_netdata_plugin_diskspace_total +nd_netdata_plugin_proc_cpu_total +nd_netdata_plugin_proc_modules +nd_netdata_plugin_tc_cpu_total +nd_netdata_plugin_tc_time +nd_netdata_private_charts +nd_netdata_pythond_runtime +nd_netdata_requests_total +nd_netdata_response_time +nd_netdata_server_cpu_total +nd_netdata_statsd_bytes_total +nd_netdata_statsd_cpu_total +nd_netdata_statsd_events_total +nd_netdata_statsd_metrics +nd_netdata_statsd_packets_total +nd_netdata_statsd_reads_total +nd_netdata_statsd_useful_metrics +nd_netdata_tcp_connected +nd_netdata_tcp_connects_total +nd_netdata_web_cpu_total nd_net_drops_total nd_net_net_total nd_net_packets_total +nd_services_cpu_total +nd_services_mem_usage +nd_services_swap_usage +nd_services_throttle_io_ops_read_total +nd_services_throttle_io_ops_write_total +nd_services_throttle_io_read_total +nd_services_throttle_io_write_total nd_system_active_processes nd_system_cpu_total nd_system_ctxt_total @@ -85,6 +147,8 @@ nd_system_net_total nd_system_pgpgio_total nd_system_processes nd_system_ram +nd_system_shared_memory_bytes +nd_system_shared_memory_segments nd_system_softirqs_total nd_system_softnet_stat_total nd_system_swap diff --git a/tests/backends/prometheus.bats b/tests/backends/prometheus.bats index d6ffa8d78..d52f39d54 100755 --- a/tests/backends/prometheus.bats +++ b/tests/backends/prometheus.bats @@ -5,7 +5,7 @@ validate_metrics() { params="${2}" curl -sS "http://localhost:19999/api/v1/allmetrics?format=prometheus&prefix=nd×tamps=no${params}" | - grep -E 'nd_system_|nd_cpu_|nd_system_|nd_net_|nd_disk_|nd_ip_|nd_ipv4_|nd_ipv6_|nd_mem_' | + grep -E 'nd_system_|nd_cpu_|nd_system_|nd_net_|nd_disk_|nd_ip_|nd_ipv4_|nd_ipv6_|nd_mem_|nd_netdata_|nd_apps_|nd_services_' | sed -ne 's/{.*//p' | sort | uniq > tests/backends/new-${fname} diff tests/backends/${fname} tests/backends/new-${fname} rm tests/backends/new-${fname} diff --git a/tests/health_mgmtapi/README.md b/tests/health_mgmtapi/README.md index 278c72dc1..8473b35ea 100644 --- a/tests/health_mgmtapi/README.md +++ b/tests/health_mgmtapi/README.md @@ -4,9 +4,8 @@ The directory `tests/health_cmdapi` contains the test script `health-cmdapi-test The script can be executed with options to prepare the system for the tests, run them and restore the system to its previous state. -It depends on the management API being accessible and on the responses to the api/v1/alarms?all requests being functional. - -Run it with `tests/health_mgmtapi/health-cmdapi-test.sh -h` to see the options. +It depends on the management API being accessible on localhost:19999 and on the responses to the api/v1/alarms?all requests being functional. +It also requires read access to the management API key that is usually under `/var/lib/netdata/netdata.api.key` (`@varlibdir_POST@/netdata.api.key`). [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Ftests%2Fhealth_mgmtapi%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/tests/health_mgmtapi/expected_list/ALARM_CPU_IOWAIT-list.json b/tests/health_mgmtapi/expected_list/ALARM_CPU_IOWAIT-list.json new file mode 100644 index 000000000..9f05efe70 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/ALARM_CPU_IOWAIT-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "alarm": "*10min_cpu_iowait" }, { "alarm": "*10min_cpu_usage *load_trigger" } ] } diff --git a/tests/health_mgmtapi/expected_list/ALARM_CPU_USAGE-list.json b/tests/health_mgmtapi/expected_list/ALARM_CPU_USAGE-list.json new file mode 100644 index 000000000..dbf879925 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/ALARM_CPU_USAGE-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "alarm": "*10min_cpu_usage *load_trigger", "context": "system.cpu" }, { "alarm": "*10min_cpu_usage *load_trigger", "chart": "system.load" } ] } diff --git a/tests/health_mgmtapi/expected_list/CONTEXT_SYSTEM_CPU-list.json b/tests/health_mgmtapi/expected_list/CONTEXT_SYSTEM_CPU-list.json new file mode 100644 index 000000000..a267cfd6f --- /dev/null +++ b/tests/health_mgmtapi/expected_list/CONTEXT_SYSTEM_CPU-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "DISABLE", "silencers": [ { "context": "system.cpu" }, { "chart": "system.load" } ] } diff --git a/tests/health_mgmtapi/expected_list/DISABLE-list.json b/tests/health_mgmtapi/expected_list/DISABLE-list.json new file mode 100644 index 000000000..c2c778104 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/DISABLE-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "DISABLE", "silencers": [ { "alarm": "*10min_cpu_usage *load_trigger" } ] } diff --git a/tests/health_mgmtapi/expected_list/DISABLE_ALL-list.json b/tests/health_mgmtapi/expected_list/DISABLE_ALL-list.json new file mode 100644 index 000000000..bbc3f4f0c --- /dev/null +++ b/tests/health_mgmtapi/expected_list/DISABLE_ALL-list.json @@ -0,0 +1 @@ +{ "all": true, "type": "DISABLE", "silencers": [] } diff --git a/tests/health_mgmtapi/expected_list/DISABLE_ALL_ERROR-list.json b/tests/health_mgmtapi/expected_list/DISABLE_ALL_ERROR-list.json new file mode 100644 index 000000000..e8aee1795 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/DISABLE_ALL_ERROR-list.json @@ -0,0 +1 @@ +Auth Error diff --git a/tests/health_mgmtapi/expected_list/DISABLE_SYSTEM_LOAD-list.json b/tests/health_mgmtapi/expected_list/DISABLE_SYSTEM_LOAD-list.json new file mode 100644 index 000000000..a7fc1cb8a --- /dev/null +++ b/tests/health_mgmtapi/expected_list/DISABLE_SYSTEM_LOAD-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "DISABLE", "silencers": [ { "chart": "system.load" } ] } diff --git a/tests/health_mgmtapi/expected_list/FAMILIES_LOAD-list.json b/tests/health_mgmtapi/expected_list/FAMILIES_LOAD-list.json new file mode 100644 index 000000000..50119f79c --- /dev/null +++ b/tests/health_mgmtapi/expected_list/FAMILIES_LOAD-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "None", "silencers": [ { "families": "load" } ] } diff --git a/tests/health_mgmtapi/expected_list/HOSTS-list.json b/tests/health_mgmtapi/expected_list/HOSTS-list.json new file mode 100644 index 000000000..9db21b6c3 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/HOSTS-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "hosts": "*" } ] } diff --git a/tests/health_mgmtapi/expected_list/RESET-list.json b/tests/health_mgmtapi/expected_list/RESET-list.json new file mode 100644 index 000000000..2d3f09d68 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/RESET-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "None", "silencers": [] } diff --git a/tests/health_mgmtapi/expected_list/SILENCE-list.json b/tests/health_mgmtapi/expected_list/SILENCE-list.json new file mode 100644 index 000000000..d157f2d3a --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "alarm": "*10min_cpu_usage *load_trigger" } ] } diff --git a/tests/health_mgmtapi/expected_list/SILENCE_2-list.json b/tests/health_mgmtapi/expected_list/SILENCE_2-list.json new file mode 100644 index 000000000..d5e6fa2d1 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE_2-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "families": "load" } ] } diff --git a/tests/health_mgmtapi/expected_list/SILENCE_3-list.json b/tests/health_mgmtapi/expected_list/SILENCE_3-list.json new file mode 100644 index 000000000..69e98cc19 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE_3-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [] } WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors. diff --git a/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE-list.json b/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE-list.json new file mode 100644 index 000000000..dd789cd33 --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "alarm": "*10min_cpu_usage *load_trigger", "chart": "system.load" } ] } diff --git a/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json b/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json new file mode 100644 index 000000000..d157f2d3a --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER-list.json @@ -0,0 +1 @@ +{ "all": false, "type": "SILENCE", "silencers": [ { "alarm": "*10min_cpu_usage *load_trigger" } ] } diff --git a/tests/health_mgmtapi/expected_list/SILENCE_ALL-list.json b/tests/health_mgmtapi/expected_list/SILENCE_ALL-list.json new file mode 100644 index 000000000..c88ef9fde --- /dev/null +++ b/tests/health_mgmtapi/expected_list/SILENCE_ALL-list.json @@ -0,0 +1 @@ +{ "all": true, "type": "SILENCE", "silencers": [] } diff --git a/tests/health_mgmtapi/health-cmdapi-test.sh.in b/tests/health_mgmtapi/health-cmdapi-test.sh.in index 1bbe269d5..5abf2b170 100755 --- a/tests/health_mgmtapi/health-cmdapi-test.sh.in +++ b/tests/health_mgmtapi/health-cmdapi-test.sh.in @@ -1,56 +1,72 @@ #!/usr/bin/env bash # shellcheck disable=SC1117,SC2034,SC2059,SC2086,SC2181 -NETDATA_USER_CONFIG_DIR="@configdir_POST@" -NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" NETDATA_VARLIB_DIR="@varlibdir_POST@" -printhelp () { - echo "Usage: health-cmdapi-test.sh [OPTIONS] - -s SETUP config files for python example tests - -c CLEANUP config files from python example tests - -r RESTART netdata after SETUP and CLEANUP, using systemctl restart netdata. - -t TEST scenarios execution - -u changes the host:port from localhost:19999 to - " -} - check () { - echo -e "${GRAY}Check: '${1}' in 2 sec" - sleep 2 - resp=$(curl -s "http://$URL/api/v1/alarms?all") + sec=1 + echo -e " ${GRAY}Check: '${1}' in $sec sec" + sleep $sec + number=$RANDOM + resp=$(curl -s "http://$URL/api/v1/alarms?all&$number") r=$(echo "${resp}" | \ python3 -c "import sys, json; d=json.load(sys.stdin); \ print(\ - d['alarms']['example.random.example_alarm1']['disabled'], \ - d['alarms']['example.random.example_alarm1']['silenced'] , \ - d['alarms']['example.random.example_alarm2']['disabled'], \ - d['alarms']['example.random.example_alarm2']['silenced'], \ + d['alarms']['system.cpu.10min_cpu_usage']['disabled'], \ + d['alarms']['system.cpu.10min_cpu_usage']['silenced'] , \ + d['alarms']['system.cpu.10min_cpu_iowait']['disabled'], \ + d['alarms']['system.cpu.10min_cpu_iowait']['silenced'], \ d['alarms']['system.load.load_trigger']['disabled'], \ d['alarms']['system.load.load_trigger']['silenced'], \ );" 2>&1) if [ $? -ne 0 ] ; then - echo -e "${RED}ERROR: Unexpected response '$resp'" + echo -e " ${RED}ERROR: Unexpected response stored in /tmp/resp-$number.json" + echo "$resp" > /tmp/resp-$number.json err=$((err+1)) + iter=0 elif [ "${r}" != "${2}" ] ; then - echo -e "${RED}ERROR: 'Got ${r}'. Expected '${2}'" - err=$((err+1)) + echo -e " ${GRAY}WARNING: 'Got ${r}'. Expected '${2}'" + iter=$((iter+1)) + if [ $iter -lt 10 ] ; then + echo -e " ${GRAY}Repeating test " + check "$1" "$2" + else + echo -e " ${RED}ERROR: 'Got ${r}'. Expected '${2}'" + iter=0 + err=$((err+1)) + fi else - echo -e "${GREEN}Success" + echo -e " ${GREEN}Success" + iter=0 fi } cmd () { - echo -e "${WHITE}Cmd '${1}', expecting '${2}'" + echo -e "${WHITE}Cmd '${1}'" + echo -en " ${GRAY}Expecting '${2}' : " RESPONSE=$(curl -s "http://$URL/api/v1/manage/health?${1}" -H "X-Auth-Token: $TOKEN" 2>&1) if [ "${RESPONSE}" != "${2}" ] ; then - echo -e "${RED}ERROR: Response '${RESPONSE}' != '${2}'" + echo -e "${RED}ERROR: Response '${RESPONSE}'" err=$((err+1)) else echo -e "${GREEN}Success" fi } +check_list() { + RESPONSE=$(curl -s "http://$URL/api/v1/manage/health?cmd=LIST" -H "X-Auth-Token: $TOKEN" 2>&1) + + NAME="$1-list.json" + echo $RESPONSE > $NAME + diff $NAME expected_list/$NAME 1>/dev/null 2>&1 + if [ $? -eq 0 ]; then + echo -e "${GREEN}Success: The list command got the correct answer for $NAME!" + else + echo -e "${RED}ERROR: the files $NAME and expected_list/$NAME does not match." + exit 1 + fi +} + WHITE='\033[0;37m' RED='\033[0;31m' GREEN='\033[0;32m' @@ -62,63 +78,8 @@ CLEANUP=0 TEST=0 URL="localhost:19999" -while getopts :srctu: option -do - case "$option" in - s) - SETUP=1 - ;; - r) - RESTART=1 - ;; - c) - CLEANUP=1 - ;; - t) - TEST=1 - ;; - u) - URL=$OPTARG - ;; - *) - printhelp - exit 1 - ;; - esac -done - -if [ $SETUP -eq 1 ] ; then - echo "Preparing netdata configuration for testing" - # Prep netdata for tests - if [ -f "${NETDATA_USER_CONFIG_DIR}/python.d.conf" ] ; then - cp -f "${NETDATA_USER_CONFIG_DIR}/python.d.conf" /tmp/python.d.conf - else - cp "${NETDATA_STOCK_CONFIG_DIR}/python.d.conf" "${NETDATA_USER_CONFIG_DIR}/" - fi - sed -i -e "s/example: no/example: yes/g" "${NETDATA_USER_CONFIG_DIR}/python.d.conf" - - mypath=$(cd ${0%/*} && echo $PWD) - - cp -f "${mypath}/python-example.conf" "${NETDATA_USER_CONFIG_DIR}/health.d/" - - # netdata.conf - if [ -f "${NETDATA_USER_CONFIG_DIR}/netdata.conf" ] ; then - cp -f "${NETDATA_USER_CONFIG_DIR}/netdata.conf" /tmp/netdata.conf - fi - printf "[health]\nrun at least every seconds = 1\n" > "${NETDATA_USER_CONFIG_DIR}/netdata.conf" - - chmod +r "${NETDATA_USER_CONFIG_DIR}/python.d.conf" "${NETDATA_USER_CONFIG_DIR}/netdata.conf" "${NETDATA_USER_CONFIG_DIR}/health.d/python-example.conf" "${NETDATA_STOCK_CONFIG_DIR}/health.d/load.conf" - # Restart netdata - if [ $RESTART -eq 1 ] ; then - echo "Restarting netdata" - systemctl restart netdata - fi -fi - err=0 -# Execute tests -if [ $TEST -eq 1 ] ; then HEALTH_CMDAPI_MSG_AUTHERROR="Auth Error" HEALTH_CMDAPI_MSG_SILENCEALL="All alarm notifications are silenced" @@ -143,11 +104,13 @@ if [ $TEST -eq 1 ] ; then # Test default state cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" check "Default State" "False False False False False False" + check_list "RESET" # Test auth failure TOKEN="Wrong token" cmd "cmd=DISABLE ALL" "$HEALTH_CMDAPI_MSG_AUTHERROR" check "Default State" "False False False False False False" + check_list "DISABLE_ALL_ERROR" # Set correct token TOKEN="${CORRECT_TOKEN}" @@ -155,108 +118,107 @@ if [ $TEST -eq 1 ] ; then # Test disable cmd "cmd=DISABLE ALL" "$HEALTH_CMDAPI_MSG_DISABLEALL" check "All disabled" "True False True False True False" + check_list "DISABLE_ALL" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" check "Default State" "False False False False False False" + check_list "RESET" # Test silence cmd "cmd=SILENCE ALL" "$HEALTH_CMDAPI_MSG_SILENCEALL" check "All silenced" "False True False True False True" + check_list "SILENCE_ALL" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" check "Default State" "False False False False False False" + check_list "RESET" # Add silencer by name printf -v resp "$HEALTH_CMDAPI_MSG_SILENCE\n$HEALTH_CMDAPI_MSG_ADDED" - cmd "cmd=SILENCE&alarm=*example_alarm1 *load_trigger" "${resp}" + cmd "cmd=SILENCE&alarm=*10min_cpu_usage *load_trigger" "${resp}" check "Silence notifications for alarm1 and load_trigger" "False True False False False True" + check_list "SILENCE_ALARM_CPU_USAGE_LOAD_TRIGGER" # Convert to disable health checks cmd "cmd=DISABLE" "$HEALTH_CMDAPI_MSG_DISABLE" check "Disable notifications for alarm1 and load_trigger" "True False False False True False" + check_list "DISABLE" # Convert back to silence notifications cmd "cmd=SILENCE" "$HEALTH_CMDAPI_MSG_SILENCE" check "Silence notifications for alarm1 and load_trigger" "False True False False False True" + check_list "SILENCE" # Add second silencer by name - cmd "alarm=*example_alarm2" "$HEALTH_CMDAPI_MSG_ADDED" + cmd "alarm=*10min_cpu_iowait" "$HEALTH_CMDAPI_MSG_ADDED" check "Silence notifications for alarm1,alarm2 and load_trigger" "False True False True False True" + check_list "ALARM_CPU_IOWAIT" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" + check_list "RESET" # Add silencer by chart printf -v resp "$HEALTH_CMDAPI_MSG_DISABLE\n$HEALTH_CMDAPI_MSG_ADDED" cmd "cmd=DISABLE&chart=system.load" "${resp}" check "Default State" "False False False False True False" + check_list "DISABLE_SYSTEM_LOAD" # Add silencer by context - cmd "context=random" "$HEALTH_CMDAPI_MSG_ADDED" + cmd "context=system.cpu" "$HEALTH_CMDAPI_MSG_ADDED" check "Default State" "True False True False True False" + check_list "CONTEXT_SYSTEM_CPU" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" + check_list "RESET" # Add second condition to a selector (AND) printf -v resp "$HEALTH_CMDAPI_MSG_SILENCE\n$HEALTH_CMDAPI_MSG_ADDED" - cmd "cmd=SILENCE&alarm=*example_alarm1 *load_trigger&chart=system.load" "${resp}" + cmd "cmd=SILENCE&alarm=*10min_cpu_usage *load_trigger&chart=system.load" "${resp}" check "Silence notifications load_trigger" "False False False False False True" + check_list "SILENCE_ALARM_CPU_USAGE" # Add second selector with two conditions - cmd "alarm=*example_alarm1 *load_trigger&context=random" "$HEALTH_CMDAPI_MSG_ADDED" + cmd "alarm=*10min_cpu_usage *load_trigger&context=system.cpu" "$HEALTH_CMDAPI_MSG_ADDED" check "Silence notifications load_trigger" "False True False False False True" + check_list "ALARM_CPU_USAGE" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" + check_list "RESET" # Add silencer without a command to disable or silence alarms printf -v resp "$HEALTH_CMDAPI_MSG_ADDED\n$HEALTH_CMDAPI_MSG_STYPEWARNING" cmd "families=load" "${resp}" check "Family selector with no command" "False False False False False False" + check_list "FAMILIES_LOAD" # Add silence command cmd "cmd=SILENCE" "$HEALTH_CMDAPI_MSG_SILENCE" check "Silence family load" "False False False False False True" + check_list "SILENCE_2" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" + check_list "RESET" # Add command without silencers printf -v resp "$HEALTH_CMDAPI_MSG_SILENCE\n$HEALTH_CMDAPI_MSG_NOSELECTORWARNING" cmd "cmd=SILENCE" "${resp}" check "Command with no selector" "False False False False False False" + check_list "SILENCE_3" # Add hosts silencer cmd "hosts=*" "$HEALTH_CMDAPI_MSG_ADDED" check "Silence all hosts" "False True False True False True" + check_list "HOSTS" # Reset cmd "cmd=RESET" "$HEALTH_CMDAPI_MSG_RESET" - -fi - -# Cleanup -if [ $CLEANUP -eq 1 ] ; then - echo -e "${WHITE}Restoring netdata configuration" - for f in "python.d.conf" "netdata.conf" ; do - if [ -f "/tmp/$f" ] ; then - mv -f "/tmp/$f" "${NETDATA_USER_CONFIG_DIR}/" - else - rm -f "${NETDATA_USER_CONFIG_DIR}/$f" - fi - done - - rm -f "${NETDATA_USER_CONFIG_DIR}/health.d/python-example.conf" - - # Restart netdata - if [ $RESTART -eq 1 ] ; then - echo "Restarting netdata" - systemctl restart netdata - fi -fi + check_list "RESET" if [ $err -gt 0 ] ; then echo "$err error(s) found" diff --git a/tests/health_mgmtapi/python-example.conf b/tests/health_mgmtapi/python-example.conf deleted file mode 100644 index 66713208c..000000000 --- a/tests/health_mgmtapi/python-example.conf +++ /dev/null @@ -1,16 +0,0 @@ -alarm: example_alarm1 - on: example.random - every: 2s - warn: $random1 > (($status >= $WARNING) ? (55) : (75)) - crit: $random1 > (($status == $CRITICAL) ? (75) : (95)) - info: random - to: sysadmin - -alarm: example_alarm2 - on: example.random - every: 2s - warn: $random2 > (($status >= $WARNING) ? (55) : (75)) - crit: $random2 > (($status == $CRITICAL) ? (75) : (95)) - info: random - to: sysadmin - diff --git a/tests/installer/slack.sh b/tests/installer/slack.sh index 3c5f94a45..83cb5fa7c 100755 --- a/tests/installer/slack.sh +++ b/tests/installer/slack.sh @@ -13,6 +13,7 @@ post_message() { TYPE="$1" MESSAGE="$2" + CUSTOM_CHANNEL="$3" case "$TYPE" in "PLAIN_MESSAGE") @@ -24,7 +25,13 @@ post_message() { EVENT_LINE="${TRAVIS_JOB_NUMBER}: Event type '${TRAVIS_EVENT_TYPE}' #${TRAVIS_PULL_REQUEST}, on '${TRAVIS_OS_NAME}' " fi + if [ -n "${CUSTOM_CHANNEL}" ]; then + echo "Sending travis message to custom channel ${CUSTOM_CHANNEL}" + OPTIONAL_CHANNEL_INFO="\"channel\": \"${CUSTOM_CHANNEL}\"," + fi + POST_MESSAGE="{ + ${OPTIONAL_CHANNEL_INFO} \"text\": \"${TRAVIS_REPO_SLUG}, ${MESSAGE}\", \"attachments\": [{ \"text\": \"${TRAVIS_JOB_NUMBER}: Event type '${TRAVIS_EVENT_TYPE}', on '${TRAVIS_OS_NAME}' \", diff --git a/tests/updater_checks.bats b/tests/updater_checks.bats index e177fe4e5..1a7eeb704 100755 --- a/tests/updater_checks.bats +++ b/tests/updater_checks.bats @@ -25,7 +25,6 @@ DIRS="usr/sbin/netdata var/log/netdata" setup() { - # If we are not in netdata git repo, at the top level directory, fail TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)") CWD=$(git rev-parse --show-cdup || echo "") diff --git a/tests/updater_checks.sh b/tests/updater_checks.sh index dce136853..9c8b6fa48 100755 --- a/tests/updater_checks.sh +++ b/tests/updater_checks.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/usr/bin/env sh # # Wrapper script that installs the required dependencies # for the BATS script to run successfully @@ -8,9 +8,71 @@ # Author : Pavlos Emm. Katsoulakis /dev/null 2>&1; then + echo "Executing grep installation" + pacman -Sy + pacman --noconfirm --needed -S grep + fi +} +blind_arch_grep_install || echo "Workaround failed, proceed as usual" + +running_os="$(cat /etc/os-release |grep '^ID=' | cut -d'=' -f2 | sed -e 's/"//g')" + +case "${running_os}" in +"centos"|"fedora") + echo "Running on CentOS, updating YUM repository.." + yum clean all + yum update -y + + echo "Installing extra dependencies.." + yum install -y epel-release + yum install -y bats curl + ;; +"debian"|"ubuntu") + echo "Running ${running_os}, updating APT repository" + apt-get update -y + apt-get install -y bats curl + ;; +"opensuse-leap"|"opensuse-tumbleweed") + zypper update -y + zypper install -y bats curl + ;; +"arch") + pacman -Sy + pacman --noconfirm --needed -S bash-bats curl + ;; +"alpine") + apk update + apk add bash curl bats + ;; +*) + echo "Running on ${running_os}, no repository preparation done" + ;; +esac + +# Download and run depednency scriptlet, before anything else +# +deps_tool="/tmp/deps_tool.$$.sh" +curl -Ss -o ${deps_tool} https://raw.githubusercontent.com/netdata/netdata-demo-site/master/install-required-packages.sh +if [ -f "${deps_tool}" ]; then + echo "Running dependency handling script.." + chmod +x "${deps_tool}" + ${deps_tool} --non-interactive netdata + rm -f "${deps_tool}" + echo "Done!" +else + echo "Failed to fetch dependency script, aborting the test" + exit 1 +fi echo "Running BATS file.." bats --tap tests/updater_checks.bats diff --git a/web/README.md b/web/README.md index c110ef651..5c1a06f59 100644 --- a/web/README.md +++ b/web/README.md @@ -14,7 +14,7 @@ For our convenience, Netdata provides 2 more layers: Charts information is stored at /usr/share/netdata/web/[dashboard_info.js](gui/dashboard_info.js). This file includes information that is rendered on the dashboard, controls chart colors, section and subsection heading, titles, etc. -If you change that file, your changes will be overwritten when Netdata is updated. You can preserve your settings by creating a new such file (there is /usr/share/netdata/web/[dashboard_info_custom.example.js](gui/dashboard_info_custom_example.js) you can use to start with). +If you change that file, your changes will be overwritten when Netdata is updated. You can preserve your settings by creating a new such file (there is /usr/share/netdata/web/[dashboard_info_custom_example.js](gui/dashboard_info_custom_example.js) you can use to start with). You have to copy the example file under a new name, so that it will not be overwritten with Netdata updates. diff --git a/web/api/health/README.md b/web/api/health/README.md index 2003a61e0..66a80d5f6 100644 --- a/web/api/health/README.md +++ b/web/api/health/README.md @@ -45,6 +45,7 @@ The following will return an SVG badge of the alarm named `NAME`, attached to th ## Health Management API Netdata v1.12 and beyond provides a command API to control health checks and notifications at runtime. The feature is especially useful for maintenance periods, during which you receive meaningless alarms. +From Netdata v1.16.0 and beyond, the configuration controlled via the API commands is [persisted across netdata restarts](#persistence). Specifically, the API allows you to: - Disable health checks completely. Alarm conditions will not be evaluated at all and no entries will be added to the alarm log. @@ -142,6 +143,43 @@ Example 2.2: Add one more selector, to also silence alarms for cpu1 and cpu2 http://localhost/api/v1/manage/health?families=cpu1 cpu2 ``` +### List silencers + +The command `LIST` was added in netdata v1.16.0 and returns a JSON with the current status of the silencers. + +``` + curl "http://myserver/api/v1/manage/health?cmd=LIST" -H "X-Auth-Token: Mytoken" +``` + +As an example, the following response shows that we have two silencers configured, one for an alarm called `samplealarm` and one for alarms with context `random` on host `myhost` +``` +json +{ + "all": false, + "type": "SILENCE", + "silencers": [ + { + "alarm": "samplealarm" + }, + { + "context": "random", + "hosts": "myhost" + } + ] +} +``` + +The response below shows that we have disabled all health checks. + +``` +json +{ + "all": true, + "type": "DISABLE", + "silencers": [] +} + + ### Responses - "Auth Error" : Token authentication failed @@ -155,6 +193,17 @@ http://localhost/api/v1/manage/health?families=cpu1 cpu2 - "WARNING: Added alarm selector to silence/disable alarms without a SILENCE or DISABLE command." : Added to the response if a selector is added without a selector-specific command. - "WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors." : Added to the response if a selector-specific command is issued without a selector. +### Persistence + +From netdata v1.16.0 and beyond, the silencers configuration is persisted to disk and loaded when netdata starts. +The JSON string returned by the [LIST command](#list-silencers) is automatically saved to the `silencers file`, every time a command alters the silencers configuration. +The file's location is configurable in `netdata.conf`. The default is shown below: + +``` +[health] + # silencers file = /var/lib/netdata/health.silencers.json +``` + ### Further reading The test script under [tests/health_mgmtapi](../../../tests/health_mgmtapi) contains a series of tests that you can either run or read through to understand the various calls and responses better. diff --git a/web/api/health/health_cmdapi.c b/web/api/health/health_cmdapi.c index ec177751b..468054c67 100644 --- a/web/api/health/health_cmdapi.c +++ b/web/api/health/health_cmdapi.c @@ -1,17 +1,16 @@ // -// Created by christopher on 11/12/18. +// Created by Christopher on 11/12/18. // #include "health_cmdapi.h" - -static SILENCER *create_silencer(void) { - SILENCER *t = callocz(1, sizeof(SILENCER)); - debug(D_HEALTH, "HEALTH command API: Created empty silencer"); - - return t; -} - +/** + * Free Silencers + * + * Clean the silencer structure + * + * @param t is the structure that will be cleaned. + */ void free_silencers(SILENCER *t) { if (!t) return; if (t->next) free_silencers(t->next); @@ -31,38 +30,104 @@ void free_silencers(SILENCER *t) { return; } +/** + * Silencers to JSON Entry + * + * Fill the buffer with the other values given. + * + * @param wb a pointer to the output buffer + * @param var the json variable + * @param val the json value + * @param hasprev has it a previous value? + * + * @return + */ +int health_silencers2json_entry(BUFFER *wb, char* var, char* val, int hasprev) { + if (val) { + buffer_sprintf(wb, "%s\n\t\t\t\"%s\": \"%s\"", (hasprev)?",":"", var, val); + return 1; + } else { + return hasprev; + } +} +/** + * Silencer to JSON + * + * Write the silencer values using JSON format inside a buffer. + * + * @param wb is the buffer to write the silencers. + */ +void health_silencers2json(BUFFER *wb) { + buffer_sprintf(wb, "{\n\t\"all\": %s," + "\n\t\"type\": \"%s\"," + "\n\t\"silencers\": [", + (silencers->all_alarms)?"true":"false", + (silencers->stype == STYPE_NONE)?"None":((silencers->stype == STYPE_DISABLE_ALARMS)?"DISABLE":"SILENCE")); + + SILENCER *silencer; + int i = 0, j = 0; + for(silencer = silencers->silencers; silencer ; silencer = silencer->next) { + if(likely(i)) buffer_strcat(wb, ","); + buffer_strcat(wb, "\n\t\t{"); + j=health_silencers2json_entry(wb, HEALTH_ALARM_KEY, silencer->alarms, j); + j=health_silencers2json_entry(wb, HEALTH_CHART_KEY, silencer->charts, j); + j=health_silencers2json_entry(wb, HEALTH_CONTEXT_KEY, silencer->contexts, j); + j=health_silencers2json_entry(wb, HEALTH_HOST_KEY, silencer->hosts, j); + health_silencers2json_entry(wb, HEALTH_FAMILIES_KEY, silencer->families, j); + j=0; + buffer_strcat(wb, "\n\t\t}"); + i++; + } + if(likely(i)) buffer_strcat(wb, "\n\t"); + buffer_strcat(wb, "]\n}\n"); +} +/** + * Silencer to FILE + * + * Write the sliencer buffer to a file. + * @param wb + */ +void health_silencers2file(BUFFER *wb) { + if (wb->len == 0) return; + + FILE *fd = fopen(silencers_filename, "wb"); + if(fd) { + size_t written = (size_t)fprintf(fd, "%s", wb->buffer) ; + if (written == wb->len ) { + info("Silencer changes written to %s", silencers_filename); + } + fclose(fd); + return; + } + error("Silencer changes could not be written to %s. Error %s", silencers_filename, strerror(errno)); +} + +/** + * Request V1 MGMT Health + * + * Function called by api to management the health. + * + * @param host main structure with client information! + * @param w is the structure with all information of the client request. + * @param url is the url that netdata is working + * + * @return It returns 200 on success and another code otherwise. + */ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, char *url) { int ret = 400; (void) host; - - BUFFER *wb = w->response.data; buffer_flush(wb); wb->contenttype = CT_TEXT_PLAIN; buffer_flush(w->response.data); - static uint32_t - hash_alarm = 0, - hash_template = 0, - hash_chart = 0, - hash_context = 0, - hash_host = 0, - hash_families = 0; - - if (unlikely(!hash_alarm)) { - hash_alarm = simple_uhash(HEALTH_ALARM_KEY); - hash_template = simple_uhash(HEALTH_TEMPLATE_KEY); - hash_chart = simple_uhash(HEALTH_CHART_KEY); - hash_context = simple_uhash(HEALTH_CONTEXT_KEY); - hash_host = simple_uhash(HEALTH_HOST_KEY); - hash_families = simple_uhash(HEALTH_FAMILIES_KEY); - } - + //Local instance of the silencer SILENCER *silencer = NULL; + int config_changed = 1; if (!w->auth_bearer_token) { buffer_strcat(wb, HEALTH_CMDAPI_MSG_AUTHERROR); @@ -105,50 +170,17 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c free_silencers(silencers->silencers); silencers->silencers = NULL; buffer_strcat(wb, HEALTH_CMDAPI_MSG_RESET); + } else if (!strcmp(value, HEALTH_CMDAPI_CMD_LIST)) { + w->response.data->contenttype = CT_APPLICATION_JSON; + health_silencers2json(wb); + config_changed=0; } } else { - uint32_t hash = simple_uhash(key); - if (unlikely(silencer == NULL)) { - if ( - (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) || - (hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) || - (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) || - (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) || - (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) || - (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) - ) { - silencer = create_silencer(); - } - } - - if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) { - silencer->alarms = strdupz(value); - silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT); - } else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) { - silencer->charts = strdupz(value); - silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT); - } else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) { - silencer->contexts = strdupz(value); - silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT); - } else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) { - silencer->hosts = strdupz(value); - silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT); - } else if (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) { - silencer->families = strdupz(value); - silencer->families_pattern = simple_pattern_create(silencer->families, NULL, SIMPLE_PATTERN_EXACT); - } else { - buffer_strcat(wb, HEALTH_CMDAPI_MSG_INVALID_KEY); - } + silencer = health_silencers_addparam(silencer, key, value); } - } if (likely(silencer)) { - // Add the created instance to the linked list in silencers - silencer->next = silencers->silencers; - silencers->silencers = silencer; - debug(D_HEALTH, "HEALTH command API: Added silencer %s:%s:%s:%s:%s", silencer->alarms, - silencer->charts, silencer->contexts, silencer->hosts, silencer->families - ); + health_silencers_add(silencer); buffer_strcat(wb, HEALTH_CMDAPI_MSG_ADDED); if (silencers->stype == STYPE_NONE) { buffer_strcat(wb, HEALTH_CMDAPI_MSG_STYPEWARNING); @@ -162,5 +194,11 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c } w->response.data = wb; buffer_no_cacheable(w->response.data); + if (ret == 200 && config_changed) { + BUFFER *jsonb = buffer_create(200); + health_silencers2json(jsonb); + health_silencers2file(jsonb); + } + return ret; } diff --git a/web/api/health/health_cmdapi.h b/web/api/health/health_cmdapi.h index d0f30401c..d8ec6aaa0 100644 --- a/web/api/health/health_cmdapi.h +++ b/web/api/health/health_cmdapi.h @@ -12,6 +12,7 @@ #define HEALTH_CMDAPI_CMD_SILENCE "SILENCE" #define HEALTH_CMDAPI_CMD_DISABLE "DISABLE" #define HEALTH_CMDAPI_CMD_RESET "RESET" +#define HEALTH_CMDAPI_CMD_LIST "LIST" #define HEALTH_CMDAPI_MSG_AUTHERROR "Auth Error\n" #define HEALTH_CMDAPI_MSG_SILENCEALL "All alarm notifications are silenced\n" @@ -20,7 +21,6 @@ #define HEALTH_CMDAPI_MSG_DISABLE "Health checks disabled for alarms matching the selectors\n" #define HEALTH_CMDAPI_MSG_SILENCE "Alarm notifications silenced for alarms matching the selectors\n" #define HEALTH_CMDAPI_MSG_ADDED "Alarm selector added\n" -#define HEALTH_CMDAPI_MSG_INVALID_KEY "Invalid key. Ignoring it.\n" #define HEALTH_CMDAPI_MSG_STYPEWARNING "WARNING: Added alarm selector to silence/disable alarms without a SILENCE or DISABLE command.\n" #define HEALTH_CMDAPI_MSG_NOSELECTORWARNING "WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors.\n" diff --git a/web/api/netdata-swagger.json b/web/api/netdata-swagger.json index 2fa55c4fa..63bc5638d 100644 --- a/web/api/netdata-swagger.json +++ b/web/api/netdata-swagger.json @@ -77,6 +77,39 @@ } } }, + "/alarm_variables": { + "get": { + "summary": "List variables available to configure alarms for a chart", + "description": "Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family", + "parameters": [ + { + "name": "chart", + "in": "query", + "description": "The id of the chart as returned by the /charts call.", + "required": true, + "type": "string", + "format": "as returned by /charts" + } + ], + "responses": { + "200": { + "description": "A javascript object with information about the chart and the available variables", + "schema": { + "$ref": "#/definitions/alarm_variables" + } + }, + "400": { + "description": "Bad request - the body will include a message stating what is wrong." + }, + "404": { + "description": "No chart with the given id is found." + }, + "500": { + "description": "Internal server error. This usually means the server is out of memory." + } + } + } + }, "/data": { "get": { "summary": "Get collected data for a specific chart", @@ -631,7 +664,7 @@ { "name": "cmd", "in": "query", - "description": "DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors.", + "description": "DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors. LIST: Show active configuration.", "required": false, "type": "string", "enum": [ @@ -639,7 +672,8 @@ "SILENCE ALL", "DISABLE", "SILENCE", - "RESET" + "RESET", + "LIST" ] }, { @@ -951,6 +985,70 @@ } } }, + "alarm_variables": { + "type": "object", + "properties": { + "chart": { + "type": "string", + "description": "The unique id of the chart" + }, + "chart_name": { + "type": "string", + "description": "The name of the chart" + }, + "cnart_context": { + "type": "string", + "description": "The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates" + }, + "family": { + "type": "string", + "description": "The family of the chart." + }, + "host": { + "type": "string", + "description": "The host containing the chart." + }, + "chart_variables": { + "type": "object", + "properties": { + "varname1": { + "type": "number", + "format": "float" + }, + "varname2": { + "type": "number", + "format": "float" + } + } + }, + "family_variables": { + "type": "object", + "properties": { + "varname1": { + "type": "number", + "format": "float" + }, + "varname2": { + "type": "number", + "format": "float" + } + } + }, + "host_variables": { + "type": "object", + "properties": { + "varname1": { + "type": "number", + "format": "float" + }, + "varname2": { + "type": "number", + "format": "float" + } + } + } + } + }, "dimension": { "type": "object", "properties": { @@ -1208,6 +1306,14 @@ "crit_parsed": { "type": "string" }, + "warn_repeat_every": { + "type": "integer", + "format": "int32" + }, + "crit_repeat_every": { + "type": "integer", + "format": "int32" + }, "green": { "type": "string", "format": "nullable" diff --git a/web/api/netdata-swagger.yaml b/web/api/netdata-swagger.yaml index c021efefa..3386e01a7 100644 --- a/web/api/netdata-swagger.yaml +++ b/web/api/netdata-swagger.yaml @@ -63,6 +63,28 @@ paths: $ref: '#/definitions/chart' '404': description: 'No chart with the given id is found.' + /alarm_variables: + get: + summary: 'List variables available to configure alarms for a chart' + description: 'Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family' + parameters: + - name: chart + in: query + description: 'The id of the chart as returned by the /charts call.' + required: true + type: string + format: 'as returned by /charts' + responses: + '200': + description: 'A javascript object with information about the chart and the available variables' + schema: + $ref: '#/definitions/alarm_variables' + '400': + description: 'Bad request - the body will include a message stating what is wrong.' + '404': + description: 'No chart with the given id is found.' + '500': + description: 'Internal server error. This usually means the server is out of memory.' /data: get: summary: 'Get collected data for a specific chart' @@ -415,10 +437,10 @@ paths: parameters: - name: cmd in: query - description: 'DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors.' + description: 'DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors. LIST: Show active configuration.' required: false type: string - enum: ['DISABLE ALL', 'SILENCE ALL', 'DISABLE', 'SILENCE', 'RESET'] + enum: ['DISABLE ALL', 'SILENCE ALL', 'DISABLE', 'SILENCE', 'RESET', 'LIST'] - name: alarm in: query description: 'The expression provided will match both `alarm` and `template` names.' @@ -638,6 +660,51 @@ definitions: red: type: number description: 'Chart health red trheshold' + alarm_variables: + type: object + properties: + chart: + type: string + description: 'The unique id of the chart' + chart_name: + type: string + description: 'The name of the chart' + cnart_context: + type: string + description: 'The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates' + family: + type: string + description: 'The family of the chart.' + host: + type: string + description: 'The host containing the chart.' + chart_variables: + type: object + properties: + varname1: + type: number + format: float + varname2: + type: number + format: float + family_variables: + type: object + properties: + varname1: + type: number + format: float + varname2: + type: number + format: float + host_variables: + type: object + properties: + varname1: + type: number + format: float + varname2: + type: number + format: float dimension: type: object properties: @@ -825,6 +892,12 @@ definitions: type: string crit_parsed: type: string + warn_repeat_every: + type: integer + format: int32 + crit_repeat_every: + type: integer + format: int32 green: type: string format: nullable diff --git a/web/gui/Makefile.am b/web/gui/Makefile.am index 7d1ceef91..ef8aa05fd 100644 --- a/web/gui/Makefile.am +++ b/web/gui/Makefile.am @@ -61,6 +61,7 @@ dist_web_DATA = \ index.html \ main.css \ main.js \ + console.html \ infographic.html \ robots.txt \ refresh-badges.js \ @@ -69,12 +70,6 @@ dist_web_DATA = \ version.txt \ $(NULL) - -webconsoledir=$(webdir)/console -dist_webconsole_DATA = \ - console/index.html \ - $(NULL) - webstaticdir=$(webdir)/static/img dist_webstatic_DATA = \ static/img/netdata-logomark.svg \ diff --git a/web/gui/console.html b/web/gui/console.html new file mode 100644 index 000000000..942c8c3cd --- /dev/null +++ b/web/gui/console.html @@ -0,0 +1,72 @@ + + + + + Netdata Console + + + + + + + + + + + + + + + + + + + +
+ + + + + + \ No newline at end of file diff --git a/web/gui/console/index.html b/web/gui/console/index.html deleted file mode 100644 index 723201919..000000000 --- a/web/gui/console/index.html +++ /dev/null @@ -1,72 +0,0 @@ - - - - - Netdata Console - - - - - - - - - - - - - - - - - - - -
- - - - - - \ No newline at end of file diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js index aab11ba34..0013311e8 100644 --- a/web/gui/dashboard_info.js +++ b/web/gui/dashboard_info.js @@ -267,6 +267,12 @@ netdataDashboard.menu = { info: 'Performance metrics for RetroShare. RetroShare is open source software for encrypted filesharing, serverless email, instant messaging, online chat, and BBS, based on a friend-to-friend network built on GNU Privacy Guard (GPG).' }, + 'riakkv': { + title: 'Riak KV', + icon: '', + info: 'Metrics for Riak KV, the distributed key-value store.' + }, + 'ipfs': { title: 'IPFS', icon: '', @@ -279,6 +285,13 @@ netdataDashboard.menu = { info: 'Performance metrics for PHP-FPM, an alternative FastCGI implementation for PHP.' }, + 'pihole': { + title: 'Pi-hole', + icon: '', + info: 'Metrics for Pi-hole, a black hole for Internet advertisements.' + + ' The metrics returned by Pi-Hole API is all from the last 24 hours.' + }, + 'portcheck': { title: 'Port Check', icon: '', @@ -324,7 +337,7 @@ netdataDashboard.menu = { 'web_log': { title: undefined, icon: '', - info: 'Information extracted from a server log file. web_log plugin incrementally parses the server log file to provide, in real-time, a break down of key server performance metrics. For web servers, an extended log file format may optionally be used (for nginx and apache) offering timing information and bandwidth for both requests and responses. web_log plugin may also be configured to provide a break down of requests per URL pattern (check /etc/netdata/python.d/web_log.conf).' + info: 'Information extracted from a server log file. web_log plugin incrementally parses the server log file to provide, in real-time, a break down of key server performance metrics. For web servers, an extended log file format may optionally be used (for nginx and apache) offering timing information and bandwidth for both requests and responses. web_log plugin may also be configured to provide a break down of requests per URL pattern (check /etc/netdata/python.d/web_log.conf).' }, 'named': { @@ -461,6 +474,18 @@ netdataDashboard.menu = { title: '', icon: '', info: 'Xen domain resource utilization metrics. Netdata reads this information using xenstat library which gives access to the resource usage information (CPU, memory, disk I/O, network) for a virtual machine.' + }, + + 'wmi': { + title: 'wmi', + icon: '', + info: undefined + }, + + 'perf': { + title: 'Perf Counters', + icon: '', + info: 'Performance Monitoring Counters (PMC). Data collected using perf_event_open() system call which utilises Hardware Performance Monitoring Units (PMU).' } }; @@ -515,7 +540,7 @@ netdataDashboard.submenu = { }, 'web_log.urls': { - info: 'Number of requests for each URL pattern defined in /etc/netdata/python.d/web_log.conf. This chart counts all requests matching the URL patterns defined, independently of the web server response codes (i.e. both successful and unsuccessful).' + info: 'Number of requests for each URL pattern defined in /etc/netdata/python.d/web_log.conf. This chart counts all requests matching the URL patterns defined, independently of the web server response codes (i.e. both successful and unsuccessful).' }, 'web_log.clients': { @@ -1163,6 +1188,10 @@ netdataDashboard.context = { '' }, + 'mysql.innodb_deadlocks': { + info: 'A deadlock happens when two or more transactions mutually hold and request for locks, creating a cycle of dependencies. For more information about how to minimize and handle deadlocks.' + }, + // ------------------------------------------------------------------------ // POSTGRESQL @@ -1806,7 +1835,7 @@ netdataDashboard.context = { }, 'web_log.clients_all': { - info: 'Unique client IPs accessing the web server since the last restart of netdata. This plugin keeps in memory all the unique IPs that have accessed the web server. On very busy web servers (several millions of unique IPs) you may want to disable this chart (check /etc/netdata/python.d/web_log.conf).' + info: 'Unique client IPs accessing the web server since the last restart of netdata. This plugin keeps in memory all the unique IPs that have accessed the web server. On very busy web servers (several millions of unique IPs) you may want to disable this chart (check /etc/netdata/python.d/web_log.conf).' }, // ------------------------------------------------------------------------ @@ -1937,7 +1966,7 @@ netdataDashboard.context = { }, 'web_log.squid_clients_all': { - info: 'Unique client IPs accessing squid since the last restart of netdata. This plugin keeps in memory all the unique IPs that have accessed the server. On very busy squid servers (several millions of unique IPs) you may want to disable this chart (check /etc/netdata/python.d/web_log.conf).' + info: 'Unique client IPs accessing squid since the last restart of netdata. This plugin keeps in memory all the unique IPs that have accessed the server. On very busy squid servers (several millions of unique IPs) you may want to disable this chart (check /etc/netdata/python.d/web_log.conf).' }, 'web_log.squid_transport_methods': { diff --git a/web/gui/demosites.html b/web/gui/demosites.html index f908e0b4c..e00fbbfdd 100644 --- a/web/gui/demosites.html +++ b/web/gui/demosites.html @@ -2,6 +2,7 @@ + NetData: Get control of your Linux Servers. Simple. Effective. Awesome. diff --git a/web/gui/favicon.ico b/web/gui/favicon.ico index 857c582d1..064032aee 100644 Binary files a/web/gui/favicon.ico and b/web/gui/favicon.ico differ diff --git a/web/gui/images/android-icon-144x144.png b/web/gui/images/android-icon-144x144.png index c3013cc96..69efa5a23 100644 Binary files a/web/gui/images/android-icon-144x144.png and b/web/gui/images/android-icon-144x144.png differ diff --git a/web/gui/images/android-icon-192x192.png b/web/gui/images/android-icon-192x192.png index 77d18d9cc..e57443575 100644 Binary files a/web/gui/images/android-icon-192x192.png and b/web/gui/images/android-icon-192x192.png differ diff --git a/web/gui/images/android-icon-36x36.png b/web/gui/images/android-icon-36x36.png index 74576f6ba..4ba804d9c 100644 Binary files a/web/gui/images/android-icon-36x36.png and b/web/gui/images/android-icon-36x36.png differ diff --git a/web/gui/images/android-icon-48x48.png b/web/gui/images/android-icon-48x48.png index 5666fa102..04970d4bb 100644 Binary files a/web/gui/images/android-icon-48x48.png and b/web/gui/images/android-icon-48x48.png differ diff --git a/web/gui/images/android-icon-72x72.png b/web/gui/images/android-icon-72x72.png index 7f7043f14..5cbc701e2 100644 Binary files a/web/gui/images/android-icon-72x72.png and b/web/gui/images/android-icon-72x72.png differ diff --git a/web/gui/images/android-icon-96x96.png b/web/gui/images/android-icon-96x96.png index 1bbf594de..21f27cea8 100644 Binary files a/web/gui/images/android-icon-96x96.png and b/web/gui/images/android-icon-96x96.png differ diff --git a/web/gui/images/apple-icon-114x114.png b/web/gui/images/apple-icon-114x114.png index 7d093e856..7993e055d 100644 Binary files a/web/gui/images/apple-icon-114x114.png and b/web/gui/images/apple-icon-114x114.png differ diff --git a/web/gui/images/apple-icon-120x120.png b/web/gui/images/apple-icon-120x120.png index d4c38e7b1..3fbe8fda3 100644 Binary files a/web/gui/images/apple-icon-120x120.png and b/web/gui/images/apple-icon-120x120.png differ diff --git a/web/gui/images/apple-icon-144x144.png b/web/gui/images/apple-icon-144x144.png index c3013cc96..8d465692b 100644 Binary files a/web/gui/images/apple-icon-144x144.png and b/web/gui/images/apple-icon-144x144.png differ diff --git a/web/gui/images/apple-icon-152x152.png b/web/gui/images/apple-icon-152x152.png index c92f38172..11a10723d 100644 Binary files a/web/gui/images/apple-icon-152x152.png and b/web/gui/images/apple-icon-152x152.png differ diff --git a/web/gui/images/apple-icon-180x180.png b/web/gui/images/apple-icon-180x180.png index 1a58fdbb2..314efb122 100644 Binary files a/web/gui/images/apple-icon-180x180.png and b/web/gui/images/apple-icon-180x180.png differ diff --git a/web/gui/images/apple-icon-57x57.png b/web/gui/images/apple-icon-57x57.png index 36c273ced..852836161 100644 Binary files a/web/gui/images/apple-icon-57x57.png and b/web/gui/images/apple-icon-57x57.png differ diff --git a/web/gui/images/apple-icon-60x60.png b/web/gui/images/apple-icon-60x60.png index c3c48c8bd..2662e85d6 100644 Binary files a/web/gui/images/apple-icon-60x60.png and b/web/gui/images/apple-icon-60x60.png differ diff --git a/web/gui/images/apple-icon-72x72.png b/web/gui/images/apple-icon-72x72.png index 7f7043f14..4a6b056e0 100644 Binary files a/web/gui/images/apple-icon-72x72.png and b/web/gui/images/apple-icon-72x72.png differ diff --git a/web/gui/images/apple-icon-76x76.png b/web/gui/images/apple-icon-76x76.png index b5e73cd4e..c2bf6c9f5 100644 Binary files a/web/gui/images/apple-icon-76x76.png and b/web/gui/images/apple-icon-76x76.png differ diff --git a/web/gui/images/apple-icon-precomposed.png b/web/gui/images/apple-icon-precomposed.png index f69945bf9..9c3e73ef4 100644 Binary files a/web/gui/images/apple-icon-precomposed.png and b/web/gui/images/apple-icon-precomposed.png differ diff --git a/web/gui/images/apple-icon.png b/web/gui/images/apple-icon.png index f69945bf9..9c3e73ef4 100644 Binary files a/web/gui/images/apple-icon.png and b/web/gui/images/apple-icon.png differ diff --git a/web/gui/images/banner-icon-144x144.png b/web/gui/images/banner-icon-144x144.png index c3013cc96..fef3dca16 100644 Binary files a/web/gui/images/banner-icon-144x144.png and b/web/gui/images/banner-icon-144x144.png differ diff --git a/web/gui/images/favicon-128.png b/web/gui/images/favicon-128.png new file mode 100644 index 000000000..5371f920c Binary files /dev/null and b/web/gui/images/favicon-128.png differ diff --git a/web/gui/images/favicon-16x16.png b/web/gui/images/favicon-16x16.png index 43eb188fe..5729f5a2d 100644 Binary files a/web/gui/images/favicon-16x16.png and b/web/gui/images/favicon-16x16.png differ diff --git a/web/gui/images/favicon-196x196.png b/web/gui/images/favicon-196x196.png new file mode 100644 index 000000000..a208c27fa Binary files /dev/null and b/web/gui/images/favicon-196x196.png differ diff --git a/web/gui/images/favicon-32x32.png b/web/gui/images/favicon-32x32.png index e657e9212..cdb0a4806 100644 Binary files a/web/gui/images/favicon-32x32.png and b/web/gui/images/favicon-32x32.png differ diff --git a/web/gui/images/favicon-96x96.png b/web/gui/images/favicon-96x96.png index 1bbf594de..dbe7dea24 100644 Binary files a/web/gui/images/favicon-96x96.png and b/web/gui/images/favicon-96x96.png differ diff --git a/web/gui/images/favicon.ico b/web/gui/images/favicon.ico index 7ed957252..064032aee 100644 Binary files a/web/gui/images/favicon.ico and b/web/gui/images/favicon.ico differ diff --git a/web/gui/images/ms-icon-144x144.png b/web/gui/images/ms-icon-144x144.png index c3013cc96..8d465692b 100644 Binary files a/web/gui/images/ms-icon-144x144.png and b/web/gui/images/ms-icon-144x144.png differ diff --git a/web/gui/images/ms-icon-150x150.png b/web/gui/images/ms-icon-150x150.png index f0cf41287..4683d56a3 100644 Binary files a/web/gui/images/ms-icon-150x150.png and b/web/gui/images/ms-icon-150x150.png differ diff --git a/web/gui/images/ms-icon-310x150.png b/web/gui/images/ms-icon-310x150.png new file mode 100644 index 000000000..5d4ac57b6 Binary files /dev/null and b/web/gui/images/ms-icon-310x150.png differ diff --git a/web/gui/images/ms-icon-310x310.png b/web/gui/images/ms-icon-310x310.png index 4f5f7e621..bdb591b24 100644 Binary files a/web/gui/images/ms-icon-310x310.png and b/web/gui/images/ms-icon-310x310.png differ diff --git a/web/gui/images/ms-icon-36x36.png b/web/gui/images/ms-icon-36x36.png new file mode 100644 index 000000000..e251302ed Binary files /dev/null and b/web/gui/images/ms-icon-36x36.png differ diff --git a/web/gui/images/ms-icon-70x70.png b/web/gui/images/ms-icon-70x70.png index 70012c61f..5371f920c 100644 Binary files a/web/gui/images/ms-icon-70x70.png and b/web/gui/images/ms-icon-70x70.png differ diff --git a/web/gui/images/netdata-logomark.svg b/web/gui/images/netdata-logomark.svg index 87fb2bda7..18152fb7f 100644 --- a/web/gui/images/netdata-logomark.svg +++ b/web/gui/images/netdata-logomark.svg @@ -1,3 +1,8 @@ - - - + + + + + + + + \ No newline at end of file diff --git a/web/gui/index.html b/web/gui/index.html index c9dd89b22..4a8647dd9 100644 --- a/web/gui/index.html +++ b/web/gui/index.html @@ -15,8 +15,7 @@ - - + Redirecting to safety connection, case your browser does not support redirection, please click here."); + w->response.code = 301; + break; + } +#endif case HTTP_VALIDATION_NOT_SUPPORTED: debug(D_WEB_CLIENT_ACCESS, "%llu: Cannot understand '%s'.", w->id, w->response.data->buffer); @@ -1373,9 +1510,11 @@ ssize_t web_client_send_chunk_header(struct web_client *w, size_t len) { debug(D_DEFLATE, "%llu: OPEN CHUNK of %zu bytes (hex: %zx).", w->id, len, len); char buf[24]; - sprintf(buf, "%zX\r\n", len); - - ssize_t bytes = send(w->ofd, buf, strlen(buf), 0); + ssize_t bytes; + bytes = (ssize_t)sprintf(buf, "%zX\r\n", len); + buf[bytes] = 0x00; + + bytes = web_client_send_data(w,buf,strlen(buf),0); if(bytes > 0) { debug(D_DEFLATE, "%llu: Sent chunk header %zd bytes.", w->id, bytes); w->stats_sent_bytes += bytes; @@ -1397,7 +1536,8 @@ ssize_t web_client_send_chunk_close(struct web_client *w) { //debug(D_DEFLATE, "%llu: CLOSE CHUNK.", w->id); - ssize_t bytes = send(w->ofd, "\r\n", 2, 0); + ssize_t bytes; + bytes = web_client_send_data(w,"\r\n",2,0); if(bytes > 0) { debug(D_DEFLATE, "%llu: Sent chunk suffix %zd bytes.", w->id, bytes); w->stats_sent_bytes += bytes; @@ -1419,7 +1559,8 @@ ssize_t web_client_send_chunk_finalize(struct web_client *w) { //debug(D_DEFLATE, "%llu: FINALIZE CHUNK.", w->id); - ssize_t bytes = send(w->ofd, "\r\n0\r\n\r\n", 7, 0); + ssize_t bytes; + bytes = web_client_send_data(w,"\r\n0\r\n\r\n",7,0); if(bytes > 0) { debug(D_DEFLATE, "%llu: Sent chunk suffix %zd bytes.", w->id, bytes); w->stats_sent_bytes += bytes; @@ -1533,7 +1674,7 @@ ssize_t web_client_send_deflate(struct web_client *w) debug(D_WEB_CLIENT, "%llu: Sending %zu bytes of data (+%zd of chunk header).", w->id, w->response.zhave - w->response.zsent, t); - len = send(w->ofd, &w->response.zbuffer[w->response.zsent], (size_t) (w->response.zhave - w->response.zsent), MSG_DONTWAIT); + len = web_client_send_data(w,&w->response.zbuffer[w->response.zsent], (size_t) (w->response.zhave - w->response.zsent), MSG_DONTWAIT); if(len > 0) { w->stats_sent_bytes += len; w->response.zsent += len; @@ -1589,7 +1730,7 @@ ssize_t web_client_send(struct web_client *w) { return 0; } - bytes = send(w->ofd, &w->response.data->buffer[w->response.sent], w->response.data->len - w->response.sent, MSG_DONTWAIT); + bytes = web_client_send_data(w,&w->response.data->buffer[w->response.sent], w->response.data->len - w->response.sent, MSG_DONTWAIT); if(likely(bytes > 0)) { w->stats_sent_bytes += bytes; w->response.sent += bytes; @@ -1664,11 +1805,26 @@ ssize_t web_client_receive(struct web_client *w) if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) return web_client_read_file(w); + ssize_t bytes; + ssize_t left = w->response.data->size - w->response.data->len; + // do we have any space for more data? buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_RECEIVE_SIZE); - ssize_t left = w->response.data->size - w->response.data->len; - ssize_t bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); +#ifdef ENABLE_HTTPS + if ( (!web_client_check_unix(w)) && (netdata_srv_ctx) ) { + if ( ( w->ssl.conn ) && (!w->ssl.flags)) { + bytes = SSL_read(w->ssl.conn, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1)); + }else { + bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); + } + } + else{ + bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); + } +#else + bytes = recv(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t) (left - 1), MSG_DONTWAIT); +#endif if(likely(bytes > 0)) { w->stats_received_bytes += bytes; diff --git a/web/server/web_client.h b/web/server/web_client.h index 4263e252a..0a57e8d8e 100644 --- a/web/server/web_client.h +++ b/web/server/web_client.h @@ -129,6 +129,7 @@ struct web_client { char decoded_url[NETDATA_WEB_REQUEST_URL_SIZE + 1]; // we decode the URL in this buffer char last_url[NETDATA_WEB_REQUEST_URL_SIZE+1]; // we keep a copy of the decoded URL here + char host[256]; struct timeval tv_in, tv_ready; @@ -153,6 +154,9 @@ struct web_client { // STATIC-THREADED WEB SERVER MEMBERS size_t pollinfo_slot; // POLLINFO slot of the web client size_t pollinfo_filecopy_slot; // POLLINFO slot of the file read +#ifdef ENABLE_HTTPS + struct netdata_ssl ssl; +#endif }; extern uid_t web_files_uid(void); diff --git a/web/server/web_client_cache.c b/web/server/web_client_cache.c index ab470560e..763e7e96a 100644 --- a/web/server/web_client_cache.c +++ b/web/server/web_client_cache.c @@ -6,6 +6,18 @@ // ---------------------------------------------------------------------------- // allocate and free web_clients +#ifdef ENABLE_HTTPS + +static void web_client_reuse_ssl(struct web_client *w) { + if (netdata_srv_ctx) { + if (w->ssl.conn) { + SSL_clear(w->ssl.conn); + } + } +} +#endif + + static void web_client_zero(struct web_client *w) { // zero everything about it - but keep the buffers @@ -35,6 +47,14 @@ static void web_client_free(struct web_client *w) { buffer_free(w->response.header); buffer_free(w->response.data); freez(w->user_agent); +#ifdef ENABLE_HTTPS + if ((!web_client_check_unix(w)) && ( netdata_srv_ctx )) { + if (w->ssl.conn) { + SSL_free(w->ssl.conn); + w->ssl.conn = NULL; + } + } +#endif freez(w); } @@ -159,12 +179,25 @@ struct web_client *web_client_get_from_cache_or_allocate() { if(w->prev) w->prev->next = w->next; if(w->next) w->next->prev = w->prev; web_clients_cache.avail_count--; +#ifdef ENABLE_HTTPS + web_client_reuse_ssl(w); + SSL *ssl = w->ssl.conn; +#endif web_client_zero(w); web_clients_cache.reused++; +#ifdef ENABLE_HTTPS + w->ssl.conn = ssl; + w->ssl.flags = NETDATA_SSL_START; + debug(D_WEB_CLIENT_ACCESS,"Reusing SSL structure with (w->ssl = NULL, w->accepted = %d)",w->ssl.flags); +#endif } else { // allocate it w = web_client_alloc(); +#ifdef ENABLE_HTTPS + w->ssl.flags = NETDATA_SSL_START; + debug(D_WEB_CLIENT_ACCESS,"Starting SSL structure with (w->ssl = NULL, w->accepted = %d)",w->ssl.flags); +#endif web_clients_cache.allocated++; } @@ -205,6 +238,11 @@ void web_client_release(struct web_client *w) { if (w->ifd != -1) close(w->ifd); if (w->ofd != -1 && w->ofd != w->ifd) close(w->ofd); w->ifd = w->ofd = -1; +#ifdef ENABLE_HTTPS + web_client_reuse_ssl(w); + w->ssl.flags = NETDATA_SSL_START; +#endif + } // unlink it from the used diff --git a/web/server/web_server.c b/web/server/web_server.c index 11f7edf8a..9e51c81fe 100644 --- a/web/server/web_server.c +++ b/web/server/web_server.c @@ -138,5 +138,3 @@ void web_client_initialize_connection(struct web_client *w) { web_client_cache_verify(0); } - - -- cgit v1.2.3