diff options
65 files changed, 2472 insertions, 678 deletions
diff --git a/.gitignore b/.gitignore index afb0c67bd..0d8ba70f0 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,9 @@ tests/alarm_repetition/alarm.sh tests/template_dimension/template_dim.sh # tests and temp files +test-driver +**/tests/*_testdriver +**/tests/*_testdriver.trs python.d/python-modules-installer.sh # documentation generated files diff --git a/.travis.yml b/.travis.yml index efe1fb601..3833be236 100644 --- a/.travis.yml +++ b/.travis.yml @@ -57,16 +57,31 @@ stages: # Nightly operations - name: Nightly operations if: branch = master AND type = cron + - name: Nightly release if: branch = master AND type = cron + - name: Trigger deb and rpm package build (nightly release) + if: branch = master AND type = cron # Scheduled releases - name: Support activities on main branch if: branch = master AND type != pull_request AND type != cron - - name: Publish for release # We don't run on release candidates - if: branch = master AND type != pull_request AND type != cron AND commit_message =~ /\[netdata (release candidate|(major|minor|patch) release)\]/ AND tag !~ /(-rc)/ + - name: Publish for release + if: >- + branch = master + AND type != pull_request + AND type != cron + AND tag !~ /(-rc)/ + AND commit_message =~ /\[netdata (release candidate|(major|minor|patch) release)\]/ + - name: Trigger deb and rpm package build (release) + if: >- + branch = master + AND type != pull_request + AND type != cron + AND tag !~ /(-rc)/ + AND commit_message =~ /\[netdata (release candidate|(major|minor|patch) release)\]/ # Build DEB packages under special conditions - name: Package ubuntu/* and debian/* @@ -522,14 +537,11 @@ jobs: - .travis/draft_release.sh after_failure: post_message "TRAVIS_MESSAGE" "<!here> Draft release submission failed" - - name: Trigger .RPM and .DEB package generation - before_script: post_message "TRAVIS_MESSAGE" "Starting RPM and DEB package generation for release" "${NOTIF_CHANNEL}" - script: - - .travis/trigger_package_generation.sh - after_failure: post_message "TRAVIS_MESSAGE" "<!here> Stable release package generation produced errors" "${NOTIF_CHANNEL}" - git: - depth: false + - stage: Trigger deb and rpm package build (release) + name: Trigger deb and rpm package build + script: .travis/trigger_package_generation.sh + after_failure: post_message "TRAVIS_MESSAGE" "<!here> Failed to trigger deb and rpm package build during release" "${NOTIF_CHANNEL}" # This is the nightly pre-execution step (Jobs, preparatory steps for nightly, etc) @@ -610,14 +622,6 @@ jobs: - ALLOW_SOFT_FAILURE_HERE=true - ARCHS=aarch64 - - name: Trigger .RPM and .DEB package generation - before_script: post_message "TRAVIS_MESSAGE" "Starting RPM and DEB package generation for nightlies" "${NOTIF_CHANNEL}" - script: - - .travis/trigger_package_generation.sh "[Build latest]" - after_failure: post_message "TRAVIS_MESSAGE" "<!here> Nightly package generation produced errors" "${NOTIF_CHANNEL}" - git: - depth: false - - name: Create nightly release artifacts, publish to GCS before_script: post_message "TRAVIS_MESSAGE" "Starting artifacts generation for nightlies" "${NOTIF_CHANNEL}" script: @@ -668,3 +672,8 @@ jobs: branch: master condition: -d "artifacts" && ${TRAVIS_REPO_SLUG} = "netdata/netdata" after_deploy: rm -f .travis/gcs-credentials.json + + - stage: Trigger deb and rpm package build (nightly release) + name: Trigger deb and rpm package build + script: .travis/trigger_package_generation.sh "[Build latest]" + after_failure: post_message "TRAVIS_MESSAGE" "<!here> Failed to trigger deb and rpm package build during nightly release" "${NOTIF_CHANNEL}" diff --git a/CHANGELOG.md b/CHANGELOG.md index d2c1e8a08..c8b8d97cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +## [v1.18.1](https://github.com/netdata/netdata/tree/v1.18.1) (2019-10-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.18.0...v1.18.1) + +**Merged pull requests:** + +- Fix build when CMocka isn't installed [\#7129](https://github.com/netdata/netdata/pull/7129) ([vlvkobal](https://github.com/vlvkobal)) +- Fixing broken links in docs [\#7123](https://github.com/netdata/netdata/pull/7123) ([joelhans](https://github.com/joelhans)) +- Convert recursion timings to miliseconds. [\#7121](https://github.com/netdata/netdata/pull/7121) ([Ferroin](https://github.com/Ferroin)) +- Fix upgrade path from v1.17.1 to v1.18.x for deb packages [\#7118](https://github.com/netdata/netdata/pull/7118) ([knatsakis](https://github.com/knatsakis)) +- Fix CPU charts in apps plugin on FreeBSD [\#7115](https://github.com/netdata/netdata/pull/7115) ([vlvkobal](https://github.com/vlvkobal)) +- unbound: fix init [\#7112](https://github.com/netdata/netdata/pull/7112) ([ilyam8](https://github.com/ilyam8)) +- Add VMware VMXNET3 driver to the default interafaces list [\#7109](https://github.com/netdata/netdata/pull/7109) ([samm-git](https://github.com/samm-git)) +- megacli: search binary and sudo check fix [\#7108](https://github.com/netdata/netdata/pull/7108) ([ilyam8](https://github.com/ilyam8)) +- Run the triggers for deb and rpm package build in separate stages [\#7105](https://github.com/netdata/netdata/pull/7105) ([knatsakis](https://github.com/knatsakis)) +- Fix segmentation fault in FreeBSD when statsd is disabled [\#7102](https://github.com/netdata/netdata/pull/7102) ([vlvkobal](https://github.com/vlvkobal)) +- Clang warnings [\#7090](https://github.com/netdata/netdata/pull/7090) ([thiagoftsm](https://github.com/thiagoftsm)) +- SimpleService: change chart suppress msg level to info [\#7085](https://github.com/netdata/netdata/pull/7085) ([ilyam8](https://github.com/ilyam8)) +- 7040 enable stable channel option [\#7082](https://github.com/netdata/netdata/pull/7082) ([knatsakis](https://github.com/knatsakis)) +- fix\(freeipmi\): Update frequency config check [\#7078](https://github.com/netdata/netdata/pull/7078) ([stevenh](https://github.com/stevenh)) +- Fix problems with names when alarm is created [\#7069](https://github.com/netdata/netdata/pull/7069) ([thiagoftsm](https://github.com/thiagoftsm)) +- Fix dbengine not working when mmap fails [\#7065](https://github.com/netdata/netdata/pull/7065) ([mfundul](https://github.com/mfundul)) +- Fix typo in health\_alarm\_notify.conf [\#7062](https://github.com/netdata/netdata/pull/7062) ([sz4bi](https://github.com/sz4bi)) +- Fix size of a zeroed block [\#7061](https://github.com/netdata/netdata/pull/7061) ([vlvkobal](https://github.com/vlvkobal)) +- Partial fix for \#7039 [\#7060](https://github.com/netdata/netdata/pull/7060) ([knatsakis](https://github.com/knatsakis)) +- feat\(reaper\): Add process reaper support [\#7059](https://github.com/netdata/netdata/pull/7059) ([stevenh](https://github.com/stevenh)) +- Disable slabinfo plugin by default [\#7056](https://github.com/netdata/netdata/pull/7056) ([vlvkobal](https://github.com/vlvkobal)) +- Add release 1.18.0 to news [\#7054](https://github.com/netdata/netdata/pull/7054) ([cakrit](https://github.com/cakrit)) +- Fix BSD/pfSense documentation [\#7041](https://github.com/netdata/netdata/pull/7041) ([thiagoftsm](https://github.com/thiagoftsm)) +- Add dbengine RAM usage statistics [\#7038](https://github.com/netdata/netdata/pull/7038) ([mfundul](https://github.com/mfundul)) +- Don't write an HTTP response 204 to logs [\#7035](https://github.com/netdata/netdata/pull/7035) ([vlvkobal](https://github.com/vlvkobal)) +- Implement hangouts chat notifications [\#7013](https://github.com/netdata/netdata/pull/7013) ([hendrikhofstadt](https://github.com/hendrikhofstadt)) +- Documenting the structure of the data responses. [\#7012](https://github.com/netdata/netdata/pull/7012) ([amoss](https://github.com/amoss)) +- Tutorials to support v1.18 features [\#6993](https://github.com/netdata/netdata/pull/6993) ([joelhans](https://github.com/joelhans)) +- Add CMocka unit tests [\#6985](https://github.com/netdata/netdata/pull/6985) ([vlvkobal](https://github.com/vlvkobal)) + ## [v1.18.0](https://github.com/netdata/netdata/tree/v1.18.0) (2019-10-10) [Full Changelog](https://github.com/netdata/netdata/compare/v1.17.1...v1.18.0) @@ -93,8 +129,6 @@ - Fixes netdata group deletion on linux for uninstall script [\#6645](https://github.com/netdata/netdata/pull/6645) ([mbarper](https://github.com/mbarper)) - Gearman plugin for Netdata [\#6567](https://github.com/netdata/netdata/pull/6567) ([agronick](https://github.com/agronick)) - Create a template for all dimensions [\#6560](https://github.com/netdata/netdata/pull/6560) ([thiagoftsm](https://github.com/thiagoftsm)) -- Center the chart on timeframe when an alarm is raised [\#6391](https://github.com/netdata/netdata/pull/6391) ([thiagoftsm](https://github.com/thiagoftsm)) -- Cppcheck fixes [\#6386](https://github.com/netdata/netdata/pull/6386) ([ac000](https://github.com/ac000)) ## [v1.17.1](https://github.com/netdata/netdata/tree/v1.17.1) (2019-09-12) @@ -189,8 +223,6 @@ - mongodb: ssl connection [\#6546](https://github.com/netdata/netdata/pull/6546) ([ilyam8](https://github.com/ilyam8)) - Add MongoDB backend [\#6524](https://github.com/netdata/netdata/pull/6524) ([vlvkobal](https://github.com/vlvkobal)) - Netdata Cloud documentation [\#6476](https://github.com/netdata/netdata/pull/6476) ([joelhans](https://github.com/joelhans)) -- Variable Granularity support for data collection [\#6430](https://github.com/netdata/netdata/pull/6430) ([mfundul](https://github.com/mfundul)) -- \(re-open\) ZRAM info collector module \(proc.plugin\) [\#6424](https://github.com/netdata/netdata/pull/6424) ([RaZeR-RBI](https://github.com/RaZeR-RBI)) ## [v1.16.1](https://github.com/netdata/netdata/tree/v1.16.1) (2019-07-31) @@ -231,53 +263,11 @@ - Fix nodes menu sizing \(responsive\) [\#6455](https://github.com/netdata/netdata/pull/6455) ([builat](https://github.com/builat)) - Add netdata haproxy documentation page [\#6454](https://github.com/netdata/netdata/pull/6454) ([johnramsden](https://github.com/johnramsden)) - Fix CRC and I/O error handling in dbengine [\#6452](https://github.com/netdata/netdata/pull/6452) ([mfundul](https://github.com/mfundul)) -- Stop docs icon from linking to streaming page instead of docs root [\#6445](https://github.com/netdata/netdata/pull/6445) ([joelhans](https://github.com/joelhans)) -- Add more supported backends to the documentation [\#6443](https://github.com/netdata/netdata/pull/6443) ([vlvkobal](https://github.com/vlvkobal)) -- netdata/packaging: Remove Ventureer from demo sites [\#6442](https://github.com/netdata/netdata/pull/6442) ([paulkatsoulakis](https://github.com/paulkatsoulakis)) -- Safer container names [\#6441](https://github.com/netdata/netdata/pull/6441) ([ViViDboarder](https://github.com/ViViDboarder)) -- Update docs health monitoring and health management api [\#6435](https://github.com/netdata/netdata/pull/6435) ([jghaanstra](https://github.com/jghaanstra)) -- Fix issue with HTML docs generation [\#6433](https://github.com/netdata/netdata/pull/6433) ([cakrit](https://github.com/cakrit)) -- rethinkdb collector new driver support [\#6431](https://github.com/netdata/netdata/pull/6431) ([ilyam8](https://github.com/ilyam8)) -- New 'homepage' for documentation site [\#6428](https://github.com/netdata/netdata/pull/6428) ([joelhans](https://github.com/joelhans)) -- Utf8 Badge Fix And URL Parser International Support \(initial\) [\#6426](https://github.com/netdata/netdata/pull/6426) ([underhood](https://github.com/underhood)) -- Styling improvements to documentation [\#6425](https://github.com/netdata/netdata/pull/6425) ([joelhans](https://github.com/joelhans)) -- Netdata/packaging: Add documentation for binary packages, plus draft table for distributions support [\#6422](https://github.com/netdata/netdata/pull/6422) ([paulkatsoulakis](https://github.com/paulkatsoulakis)) -- netdata/packaging/doc: Update documentation dependencies [\#6421](https://github.com/netdata/netdata/pull/6421) ([paulkatsoulakis](https://github.com/paulkatsoulakis)) -- Add global configuration option for zero metrics [\#6419](https://github.com/netdata/netdata/pull/6419) ([vlvkobal](https://github.com/vlvkobal)) -- Updated logos in the infographic and remaining favicons [\#6417](https://github.com/netdata/netdata/pull/6417) ([cakrit](https://github.com/cakrit)) -- netdata/packaging: Fix RPM packaging workflow issues, plus draft changes for .DEB packaging [\#6415](https://github.com/netdata/netdata/pull/6415) ([paulkatsoulakis](https://github.com/paulkatsoulakis)) -- SSL vs. TLS consistency and clarification in documentation [\#6414](https://github.com/netdata/netdata/pull/6414) ([joelhans](https://github.com/joelhans)) -- Add more codeowners to the core [\#6413](https://github.com/netdata/netdata/pull/6413) ([vlvkobal](https://github.com/vlvkobal)) -- Add news of v1.16.0 to main README [\#6411](https://github.com/netdata/netdata/pull/6411) ([cakrit](https://github.com/cakrit)) -- Update Running-behind-apache.md [\#6406](https://github.com/netdata/netdata/pull/6406) ([Steve8291](https://github.com/Steve8291)) -- Fix Web API Health documentation [\#6404](https://github.com/netdata/netdata/pull/6404) ([thiagoftsm](https://github.com/thiagoftsm)) -- Snapshot uniqueId fix [\#6400](https://github.com/netdata/netdata/pull/6400) ([jacekkolasa](https://github.com/jacekkolasa)) -- Make use of GCC's \_\_attribute\_\_\(\(unused\)\) [\#6392](https://github.com/netdata/netdata/pull/6392) ([ac000](https://github.com/ac000)) -- Change default installation to stable in documentation [\#6388](https://github.com/netdata/netdata/pull/6388) ([joelhans](https://github.com/joelhans)) -- Daemon fix double kills of collection threads on shutdown [\#6387](https://github.com/netdata/netdata/pull/6387) ([emmrk](https://github.com/emmrk)) -- Add apps grouping debug messages [\#6375](https://github.com/netdata/netdata/pull/6375) ([vlvkobal](https://github.com/vlvkobal)) ## [v1.16.0](https://github.com/netdata/netdata/tree/v1.16.0) (2019-07-08) [Full Changelog](https://github.com/netdata/netdata/compare/v1.15.0...v1.16.0) -**Merged pull requests:** - -- Ignore /dev and /run space/inode usage [\#6399](https://github.com/netdata/netdata/pull/6399) ([vlvkobal](https://github.com/vlvkobal)) -- Update favicon with new logo [\#6398](https://github.com/netdata/netdata/pull/6398) ([cakrit](https://github.com/cakrit)) -- Update apps\_groups.conf for time group [\#6397](https://github.com/netdata/netdata/pull/6397) ([mbarper](https://github.com/mbarper)) -- Update to icons [\#6396](https://github.com/netdata/netdata/pull/6396) ([ivorjvr](https://github.com/ivorjvr)) -- Changed links from my-netdata.io to netdata.cloud [\#6389](https://github.com/netdata/netdata/pull/6389) ([joelhans](https://github.com/joelhans)) -- alarm-notify.sh should respect the cloud base url setting [\#6383](https://github.com/netdata/netdata/pull/6383) ([ladakis](https://github.com/ladakis)) -- Add a check for a macro declaration for the perf plugin [\#6382](https://github.com/netdata/netdata/pull/6382) ([vlvkobal](https://github.com/vlvkobal)) -- Add a .gitattributes file [\#6381](https://github.com/netdata/netdata/pull/6381) ([ac000](https://github.com/ac000)) -- Health fix double Free Corruption [\#6379](https://github.com/netdata/netdata/pull/6379) ([thiagoftsm](https://github.com/thiagoftsm)) -- Health giving wrong message [\#6377](https://github.com/netdata/netdata/pull/6377) ([thiagoftsm](https://github.com/thiagoftsm)) -- Health could not read properly the health silencers file [\#6374](https://github.com/netdata/netdata/pull/6374) ([thiagoftsm](https://github.com/thiagoftsm)) -- Add more debug messages for pluginsd pipe errors [\#6373](https://github.com/netdata/netdata/pull/6373) ([vlvkobal](https://github.com/vlvkobal)) -- Improve documentation about file descriptors and systemd configuration. [\#6372](https://github.com/netdata/netdata/pull/6372) ([mfundul](https://github.com/mfundul)) -- netdata/packaging: Netdata binary packages generation - spec file refinement, support for nightlies \(RPM\) [\#6369](https://github.com/netdata/netdata/pull/6369) ([paulkatsoulakis](https://github.com/paulkatsoulakis)) - ## [v1.15.0](https://github.com/netdata/netdata/tree/v1.15.0) (2019-05-22) [Full Changelog](https://github.com/netdata/netdata/compare/v1.14.0...v1.15.0) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75a5427a3..857f5b9af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -302,6 +302,7 @@ set(LIBNETDATA_FILES libnetdata/inlined.h libnetdata/libnetdata.c libnetdata/libnetdata.h + libnetdata/required_dummies.h libnetdata/locks/locks.c libnetdata/locks/locks.h libnetdata/log/log.c @@ -876,3 +877,25 @@ IF(ENABLE_PLUGIN_CGROUP_NETWORK) ELSE() message(STATUS "cgroup-network: disabled (requires Linux)") ENDIF() + + +# ----------------------------------------------------------------------------- +# Unit tests + +if(UNIT_TESTING) + message(STATUS "Looking for CMocka which is required for unit testing") + find_package(CMocka REQUIRED) + include(CTest) + +if(BUILD_TESTING) + add_executable(str2ld_testdriver libnetdata/tests/test_str2ld.c) + target_link_libraries(str2ld_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_str2ld COMMAND str2ld_testdriver) + + add_executable(storage_number_testdriver libnetdata/storage_number/tests/test_storage_number.c) + target_link_libraries(storage_number_testdriver libnetdata ${NETDATA_COMMON_LIBRARIES} ${CMOCKA_LIBRARIES}) + add_test(NAME test_storage_number COMMAND storage_number_testdriver) + + set_target_properties(str2ld_testdriver storage_number_testdriver PROPERTIES RUNTIME_OUTPUT_DIRECTORY tests) +endif() +endif() diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 2cfdee4e3..032f34ff9 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -10,7 +10,7 @@ This agreement is part of the legal framework of the open-source ecosystem that adds some red tape, but protects both the contributor and the project. To understand why this is needed, please read [a well-written chapter from -Karl Fogel’s Producing Open Source Software on CLAs](https://producingoss.com/en/copyright-assignment.html). +Karl Fogel’s Producing Open Source Software on CLAs](https://producingoss.com/en/contributor-agreements.html). By signing this agreement, you do not change your rights to use your own contributions for any other purpose. @@ -129,5 +129,6 @@ This is the list of contributors that have signed this agreement: |@skrzyp1|Jerzy S.|| |@akwan|Alan Kwan|| |@underhood|Timotej Šiškovič|| +|@stevenh|Steven Hartland|steven.hartland@multiplay.co.uk| [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2FCONTRIBUTORS&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/Makefile.am b/Makefile.am index 35d9712cb..d9da7b8a0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -137,6 +137,7 @@ LIBNETDATA_FILES = \ libnetdata/inlined.h \ libnetdata/libnetdata.c \ libnetdata/libnetdata.h \ + libnetdata/required_dummies.h \ libnetdata/locks/locks.c \ libnetdata/locks/locks.h \ libnetdata/log/log.c \ @@ -637,3 +638,24 @@ if ENABLE_BACKEND_MONGODB netdata_SOURCES += $(MONGODB_BACKEND_FILES) netdata_LDADD += $(OPTIONAL_MONGOC_LIBS) endif + +if ENABLE_UNITTESTS + check_PROGRAMS = \ + libnetdata/tests/str2ld_testdriver \ + libnetdata/storage_number/tests/storage_number_testdriver \ + $(NULL) + + TESTS = $(check_PROGRAMS) + + libnetdata_tests_str2ld_testdriver_SOURCES = \ + libnetdata/tests/test_str2ld.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + libnetdata_tests_str2ld_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) + + libnetdata_storage_number_tests_storage_number_testdriver_SOURCES = \ + libnetdata/storage_number/tests/test_storage_number.c \ + $(LIBNETDATA_FILES) \ + $(NULL) + libnetdata_storage_number_tests_storage_number_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) +endif @@ -152,6 +152,26 @@ not just visualize metrics. ## News +`Oct 10th, 2019` - **[Netdata v1.18.0 released!](https://github.com/netdata/netdata/releases)** + +Release v1.18.0 contains 5 new collectors, 16 bug fixes, 27 improvements, and 20 documentation updates. + +The **database engine** is now the default method of storing metrics in Netdata. You immediately get more efficient and configurable long-term metrics storage without any work on your part. By saving recent metrics in RAM and "spilling" historical metrics to disk for long-term storage, the database engine is laying the foundation for many more improvements to distributed metrics. + +We even have a [tutorial](https://docs.netdata.cloud/docs/tutorials/longer-metrics-storage/) on switching to the database engine and getting the most from it. Or, just read up on [how performant](https://docs.netdata.cloud/database/engine/#evaluation) the database engine really is. + +Both our `python.d` and `go.d` plugins now have more **intelligent auto-detection** by periodically dump a list of active modules to disk. When Netdata starts, such as after a reboot, the plugins use this list of known services to re-establish metrics collection much more reliably. No more worrying if the service or application you need to monitor starts up minutes after Netdata. + +Two of our new collectors will help those with Hadoop big data infrastructures. The **HDFS and Zookeeper collection modules** come with essential alarms requested by our community and Netdata's auto-detection capabilities to keep the required configuration to an absolute minimum. Read up on the process via our [HDFS and Zookeeper tutorial](https://docs.netdata.cloud/docs/tutorials/monitor-hadoop-cluster/). + +Speaking of new collectors—we also added the ability to collect metrics from SLAB cache, Gearman, and vCenter Server Appliances. + +Before v1.18, if you wanted to create alarms for each dimension in a single chart, you need to write separate entities for each dimension—not very efficient or user-friendly. New **dimension templates** fix that hassle. Now, a single entity can automatically generate alarms for any number of dimensions in a chart, even those you weren't aware of! Our [tutorial on dimension templates](https://docs.netdata.cloud/docs/tutorials/dimension-templates/) has all the details. + +v1.18 brings support for installing Netdata on offline or air-gapped systems. To help users comply with strict security policies, our installation scripts can now install Netdata using previously-downloaded tarball and checksums instead of downloading them at runtime. We have guides for installing offline via `kickstart.sh` or `kickstart-static64.sh` in our [installation documentation](https://docs.netdata.cloud/packaging/installer/#offline-installations). We're excited to bring real-time monitoring to once-inaccessible systems! + +--- + `Sep 12th, 2019` - **[Netdata v1.17.1 released!](https://github.com/netdata/netdata/releases)** Release v1.17.1 contains 2 bug fixes, 6 improvements, and 2 documentation updates. @@ -356,7 +376,7 @@ This is what you should expect from Netdata: ### Health Monitoring & Alarms - **Sophisticated alerting** - comes with hundreds of alarms, **out of the box**! Supports dynamic thresholds, hysteresis, alarm templates, multiple role-based notification methods. -- **Notifications**: [alerta.io](health/notifications/alerta/), [amazon sns](health/notifications/awssns/), [discordapp.com](health/notifications/discord/), [email](health/notifications/email/), [flock.com](health/notifications/flock/), [irc](health/notifications/irc/), [kavenegar.com](health/notifications/kavenegar/), [messagebird.com](health/notifications/messagebird/), [pagerduty.com](health/notifications/pagerduty/), [prowl](health/notifications/prowl/), [pushbullet.com](health/notifications/pushbullet/), [pushover.net](health/notifications/pushover/), [rocket.chat](health/notifications/rocketchat/), [slack.com](health/notifications/slack/), [smstools3](health/notifications/smstools3/), [syslog](health/notifications/syslog/), [telegram.org](health/notifications/telegram/), [twilio.com](health/notifications/twilio/), [web](health/notifications/web/) and [custom notifications](health/notifications/custom/). +- **Notifications**: [alerta.io](health/notifications/alerta/), [amazon sns](health/notifications/awssns/), [discordapp.com](health/notifications/discord/), [email](health/notifications/email/), [flock.com](health/notifications/flock/), [hangouts](health/notifications/hangouts/), [irc](health/notifications/irc/), [kavenegar.com](health/notifications/kavenegar/), [messagebird.com](health/notifications/messagebird/), [pagerduty.com](health/notifications/pagerduty/), [prowl](health/notifications/prowl/), [pushbullet.com](health/notifications/pushbullet/), [pushover.net](health/notifications/pushover/), [rocket.chat](health/notifications/rocketchat/), [slack.com](health/notifications/slack/), [smstools3](health/notifications/smstools3/), [syslog](health/notifications/syslog/), [telegram.org](health/notifications/telegram/), [twilio.com](health/notifications/twilio/), [web](health/notifications/web/) and [custom notifications](health/notifications/custom/). ### Integrations diff --git a/backends/prometheus/backend_prometheus.c b/backends/prometheus/backend_prometheus.c index 67342ea7a..03323ac54 100644 --- a/backends/prometheus/backend_prometheus.c +++ b/backends/prometheus/backend_prometheus.c @@ -780,7 +780,7 @@ int process_prometheus_remote_write_response(BUFFER *b) { const char *s = buffer_tostring(b); int len = buffer_strlen(b); - // do nothing with HTTP response 200 + // do nothing with HTTP responses 200 or 204 while(!isspace(*s) && len) { s++; @@ -789,7 +789,7 @@ int process_prometheus_remote_write_response(BUFFER *b) { s++; len--; - if(likely(len > 4 && !strncmp(s, "200 ", 4))) + if(likely(len > 4 && (!strncmp(s, "200 ", 4) || !strncmp(s, "204 ", 4)))) return 0; else return discard_response(b, "prometheus remote write"); diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 4dcbd38b7..93159406e 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -2569,7 +2569,7 @@ static int collect_data_for_all_processes(void) { size_t new_procbase_size; - int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_ALL }; + int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { error("sysctl error: Can't get processes data size"); return 0; diff --git a/collectors/freebsd.plugin/freebsd_getifaddrs.c b/collectors/freebsd.plugin/freebsd_getifaddrs.c index 7e2293e43..72fa908ce 100644 --- a/collectors/freebsd.plugin/freebsd_getifaddrs.c +++ b/collectors/freebsd.plugin/freebsd_getifaddrs.c @@ -144,7 +144,7 @@ int do_getifaddrs(int update_every, usec_t dt) { (void)dt; #define DEFAULT_EXLUDED_INTERFACES "lo*" -#define DEFAULT_PHYSICAL_INTERFACES "igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet*" +#define DEFAULT_PHYSICAL_INTERFACES "igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx*" #define CONFIG_SECTION_GETIFADDRS "plugin:freebsd:getifaddrs" static int enable_new_interfaces = -1; diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c index 271547ab3..bd9226631 100644 --- a/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -470,7 +470,7 @@ int do_dev_cpu_temperature(int update_every, usec_t dt) { pcpu_temperature = reallocz(pcpu_temperature, sizeof(int) * number_of_cpus); mib = reallocz(mib, sizeof(int) * number_of_cpus * 4); if (unlikely(number_of_cpus > old_number_of_cpus)) - memset(&mib[old_number_of_cpus * 4], 0, 4 * (number_of_cpus - old_number_of_cpus)); + memset(&mib[old_number_of_cpus * 4], 0, sizeof(int) * (number_of_cpus - old_number_of_cpus) * 4); } for (i = 0; i < number_of_cpus; i++) { if (unlikely(!(mib[i * 4]))) diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index ba1fbffae..74274ea21 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1790,7 +1790,7 @@ int main (int argc, char **argv) { errno = 0; - if(freq > netdata_update_every) + if(freq >= netdata_update_every) netdata_update_every = freq; else if(freq) diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c index 85b670df8..edbdb5730 100644 --- a/collectors/plugins.d/plugins_d.c +++ b/collectors/plugins.d/plugins_d.c @@ -647,7 +647,7 @@ static void pluginsd_worker_thread_cleanup(void *arg) { if (cd->pid) { siginfo_t info; info("killing child process pid %d", cd->pid); - if (killpid(cd->pid, SIGTERM) != -1) { + if (killpid(cd->pid) != -1) { info("waiting for child process pid %d to exit...", cd->pid); waitid(P_PID, (id_t) cd->pid, &info, WEXITED); } @@ -738,7 +738,7 @@ void *pluginsd_worker_thread(void *arg) { info("connected to '%s' running on pid %d", cd->fullfilename, cd->pid); count = pluginsd_process(localhost, cd, fp, 0); error("'%s' (pid %d) disconnected after %zu successful data collections (ENDs).", cd->fullfilename, cd->pid, count); - killpid(cd->pid, SIGTERM); + killpid(cd->pid); int worker_ret_code = mypclose(fp, cd->pid); @@ -779,6 +779,9 @@ void *pluginsd_main(void *ptr) { int scan_frequency = (int) config_get_number(CONFIG_SECTION_PLUGINS, "check for new plugins every", 60); if(scan_frequency < 1) scan_frequency = 1; + // disable some plugins by default + config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO); + // store the errno for each plugins directory // so that we don't log broken directories on each loop int directory_errors[PLUGINSD_MAX_DIRECTORIES] = { 0 }; diff --git a/collectors/python.d.plugin/megacli/megacli.chart.py b/collectors/python.d.plugin/megacli/megacli.chart.py index 3805a100e..4872eab80 100644 --- a/collectors/python.d.plugin/megacli/megacli.chart.py +++ b/collectors/python.d.plugin/megacli/megacli.chart.py @@ -163,8 +163,8 @@ class Battery: class Megacli: def __init__(self): self.s = find_binary('sudo') - self.m = find_binary('megacli') - self.sudo_check = [self.s, '-n', '-v'] + self.m = find_binary('megacli') or find_binary('MegaCli') # Binary on FreeBSD is MegaCli + self.sudo_check = [self.s, '-n', '-l'] self.disk_info = [self.s, '-n', self.m, '-LDPDInfo', '-aAll', '-NoLog'] self.battery_info = [self.s, '-n', self.m, '-AdpBbuCmd', '-a0', '-NoLog'] diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py index 088bf119e..4dfd226b0 100644 --- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py @@ -230,7 +230,7 @@ class SimpleService(PythonDLimitedLogger, object): continue elif self.charts.cleanup and chart.penalty >= self.charts.cleanup: chart.obsolete() - self.error("chart '{0}' was suppressed due to non updating".format(chart.name)) + self.info("chart '{0}' was suppressed due to non updating".format(chart.name)) continue ok = chart.update(data, interval) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py index 3b94fcdf2..337bf57d8 100644 --- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py @@ -14,6 +14,12 @@ except ImportError: else: _TLS_SUPPORT = True +if _TLS_SUPPORT: + try: + PROTOCOL_TLS = ssl.PROTOCOL_TLS + except AttributeError: + PROTOCOL_TLS = ssl.PROTOCOL_SSLv23 + from bases.FrameworkServices.SimpleService import SimpleService @@ -80,15 +86,18 @@ class SocketService(SimpleService): if self.tls: try: self.debug('Encapsulating socket with TLS') + self.debug('Using keyfile: {0}, certfile: {1}, cert_reqs: {2}, ssl_version: {3}'.format( + self.key, self.cert, ssl.CERT_NONE, PROTOCOL_TLS + )) self._sock = ssl.wrap_socket(self._sock, keyfile=self.key, certfile=self.cert, server_side=False, cert_reqs=ssl.CERT_NONE, - ssl_version=ssl.PROTOCOL_TLS, + ssl_version=PROTOCOL_TLS, ) - except (socket.error, ssl.SSLError) as error: - self.error('failed to wrap socket : {0}'.format(error)) + except (socket.error, ssl.SSLError, IOError, OSError) as error: + self.error('failed to wrap socket : {0}'.format(repr(error))) self._disconnect() self.__socket_config = None return False @@ -167,7 +176,8 @@ class SocketService(SimpleService): if self._connect2socket(res): break - except Exception: + except Exception as error: + self.error('unhandled exception during connect : {0}'.format(repr(error))) self._sock = None self.__socket_config = None diff --git a/collectors/python.d.plugin/springboot/README.md b/collectors/python.d.plugin/springboot/README.md index 75cfa22ee..37b4dd7cb 100644 --- a/collectors/python.d.plugin/springboot/README.md +++ b/collectors/python.d.plugin/springboot/README.md @@ -63,7 +63,7 @@ public class HeapPoolMetrics implements PublicMetrics { } ``` -Please refer [Spring Boot Actuator: Production-ready features](https://docs.spring.io/spring-boot/docs/current/reference/html/production-ready.html) and [81. Actuator - Part IX. ‘How-to’ guides](https://docs.spring.io/spring-boot/docs/current/reference/html/howto-actuator.html) for more information. +Please refer [Spring Boot Actuator: Production-ready Features](https://docs.spring.io/spring-boot/docs/current/reference/html/production-ready-features.html#production-ready) and [81. Actuator - Part IX. ‘How-to’ guides](https://docs.spring.io/spring-boot/docs/current/reference/html/howto.html#howto-actuator) for more information. ## Charts diff --git a/collectors/python.d.plugin/unbound/unbound.chart.py b/collectors/python.d.plugin/unbound/unbound.chart.py index 6e5a22c58..590de4c98 100644 --- a/collectors/python.d.plugin/unbound/unbound.chart.py +++ b/collectors/python.d.plugin/unbound/unbound.chart.py @@ -32,10 +32,10 @@ CHARTS = { ] }, 'recursion': { - 'options': [None, 'Recursion Timings', 'seconds', 'Unbound', 'unbound.recursion', 'line'], + 'options': [None, 'Recursion Timings', 'milliseconds', 'Unbound', 'unbound.recursion', 'line'], 'lines': [ - ['recursive_avg', 'average', 'absolute', 1, PRECISION], - ['recursive_med', 'median', 'absolute', 1, PRECISION] + ['recursive_avg', 'average', 'absolute', 1, 1], + ['recursive_med', 'median', 'absolute', 1, 1] ] }, 'reqlist': { @@ -83,11 +83,11 @@ PER_THREAD_CHARTS = { ] }, '_recursion': { - 'options': [None, '{longname} Recursion Timings', 'seconds', 'Recursive Timings', + 'options': [None, '{longname} Recursion Timings', 'milliseconds', 'Recursive Timings', 'unbound.threads.recursion', 'line'], 'lines': [ - ['{shortname}_recursive_avg', 'average', 'absolute', 1, PRECISION], - ['{shortname}_recursive_med', 'median', 'absolute', 1, PRECISION] + ['{shortname}_recursive_avg', 'average', 'absolute', 1, 1], + ['{shortname}_recursive_med', 'median', 'absolute', 1, 1] ] }, '_reqlist': { @@ -103,7 +103,6 @@ PER_THREAD_CHARTS = { } } - # This maps the Unbound stat names to our names and precision requiremnets. STAT_MAP = { 'total.num.queries_ip_ratelimited': ('ratelimit', 1), @@ -118,6 +117,7 @@ STAT_MAP = { 'total.requestlist.exceeded': ('reqlist_exceeded', 1), 'total.requestlist.current.all': ('reqlist_current', 1), 'total.requestlist.current.user': ('reqlist_user', 1), + # Unbound reports recursion timings as fractional seconds, but we want to show them as milliseconds. 'total.recursion.time.avg': ('recursive_avg', PRECISION), 'total.recursion.time.median': ('recursive_med', PRECISION), 'msg.cache.count': ('cache_message', 1), @@ -142,11 +142,16 @@ PER_THREAD_STAT_MAP = { '{shortname}.requestlist.exceeded': ('{shortname}_reqlist_exceeded', 1), '{shortname}.requestlist.current.all': ('{shortname}_reqlist_current', 1), '{shortname}.requestlist.current.user': ('{shortname}_reqlist_user', 1), + # Unbound reports recursion timings as fractional seconds, but we want to show them as milliseconds. '{shortname}.recursion.time.avg': ('{shortname}_recursive_avg', PRECISION), '{shortname}.recursion.time.median': ('{shortname}_recursive_med', PRECISION) } +def is_readable(name): + return os.access(name, os.R_OK) + + # Used to actually generate per-thread charts. def _get_perthread_info(thread): sname = 'thread{0}'.format(thread) @@ -203,25 +208,8 @@ class Service(SocketService): self.debug('Using certificate: {0}'.format(self.cert)) def _auto_config(self): - if self.ubconf and os.access(self.ubconf, os.R_OK): - self.debug('Unbound config: {0}'.format(self.ubconf)) - conf = dict() - try: - conf = load_config(self.ubconf) - except Exception as error: - self.error("error on loading '{0}' : {1}".format(self.ubconf, error)) - if self.ext is None: - if 'extended-statistics' in conf['server']: - self.ext = conf['server']['extended-statistics'] - if 'remote-control' in conf: - if conf['remote-control'].get('control-use-cert', False): - self.key = self.key or conf['remote-control'].get('control-key-file') - self.cert = self.cert or conf['remote-control'].get('control-cert-file') - self.port = self.port or conf['remote-control'].get('control-port') - else: - self.unix_socket = self.unix_socket or conf['remote-control'].get('control-interface') - else: - self.debug('Unbound configuration not found.') + self.load_unbound_config() + if not self.key: self.key = '/etc/unbound/unbound_control.key' if not self.cert: @@ -229,6 +217,38 @@ class Service(SocketService): if not self.port: self.port = 8953 + def load_unbound_config(self): + if not (self.ubconf and is_readable(self.ubconf)): + self.debug('Unbound configuration not found.') + return + + self.debug('Loading Unbound config: {0}'.format(self.ubconf)) + + try: + conf = load_config(self.ubconf) + except Exception as error: + self.error("error on loading '{0}' : {1}".format(self.ubconf, error)) + return + + srv = conf.get('server') + if self.ext is None: + if srv and 'extended-statistics' in srv: + self.ext = srv['extended-statistics'] + + rc = conf.get('remote-control') + if not (rc and isinstance(rc, dict)): + return + + if rc.get('control-use-cert', False): + self.key = self.key or rc.get('control-key-file') + self.cert = self.cert or rc.get('control-cert-file') + self.port = self.port or rc.get('control-port') + else: + ci = rc.get('control-interface', str()) + is_socket = '/' in ci + if is_socket: + self.unix_socket = ci + def _generate_perthread_charts(self): tmporder = list() for thread in range(0, self.threads): @@ -239,6 +259,14 @@ class Service(SocketService): self.order.extend(sorted(tmporder)) def check(self): + if not is_readable(self.key): + self.error("ssl key '{0}' is not readable".format(self.key)) + return False + + if not is_readable(self.cert): + self.error("ssl certificate '{0}' is not readable".format(self.certificate)) + return False + # Check if authentication is working. self._connect() result = bool(self._sock) @@ -268,12 +296,6 @@ class Service(SocketService): self.request = tmp return result - @staticmethod - def _check_raw_data(data): - # The server will close the connection when it's done sending - # data, so just keep looping until that happens. - return False - def _get_data(self): raw = self._get_raw_data() data = dict() @@ -288,3 +310,9 @@ class Service(SocketService): else: self.warning('Received no data from socket.') return data + + @staticmethod + def _check_raw_data(data): + # The server will close the connection when it's done sending + # data, so just keep looping until that happens. + return False diff --git a/collectors/slabinfo.plugin/README.md b/collectors/slabinfo.plugin/README.md index e21802397..444cd8e38 100644 --- a/collectors/slabinfo.plugin/README.md +++ b/collectors/slabinfo.plugin/README.md @@ -4,10 +4,12 @@ SLAB is a cache mechanism used by the Kernel to avoid fragmentation. Each internal structure (process, file descriptor, inode...) is stored within a SLAB. - ## configuring Netdata for slabinfo -There is currently no configuration needed. +The plugin is disabled by default because it collects and displays a huge amount of metrics. +To enable it set `slabinfo = yes` in the `plugins` section of the `netdata.conf` configuration file. + +There is currently no configuration needed for the plugin itself. As `/proc/slabinfo` is only readable by root, this plugin is setuid root. diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index 78f0e9807..7468f2746 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -2222,7 +2222,7 @@ void *statsd_main(void *ptr) { // ---------------------------------------------------------------------------------------------------------------- // statsd setup - if(!statsd.enabled) return NULL; + if(!statsd.enabled) goto cleanup; statsd_listen_sockets_setup(); if(!statsd.sockets.opened) { diff --git a/collectors/tc.plugin/plugin_tc.c b/collectors/tc.plugin/plugin_tc.c index 50383f4ce..9245b0857 100644 --- a/collectors/tc.plugin/plugin_tc.c +++ b/collectors/tc.plugin/plugin_tc.c @@ -851,12 +851,11 @@ static void tc_main_cleanup(void *ptr) { if(tc_child_pid) { info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid); - if(killpid(tc_child_pid, SIGTERM) != -1) { + if(killpid(tc_child_pid) != -1) { siginfo_t info; info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid); waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); - // info("TC: finished tc plugin child process pid %d.", tc_child_pid); } tc_child_pid = 0; diff --git a/configure.ac b/configure.ac index a966f0ea6..ea91a7299 100644 --- a/configure.ac +++ b/configure.ac @@ -1130,6 +1130,31 @@ AC_SUBST([OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS]) AC_SUBST([OPTIONAL_MONGOC_CFLAGS]) AC_SUBST([OPTIONAL_MONGOC_LIBS]) +# ----------------------------------------------------------------------------- +# Check if cmocka is available - needed for unit testing + +AC_ARG_ENABLE( + [unit-tests], + [AS_HELP_STRING([--disable-unit-tests], + [Disables building and running the unit tests suite])], + [], + [enable_unit_tests="yes"] +) + +PKG_CHECK_MODULES( + [CMOCKA], + [cmocka], + [have_cmocka="yes"], + [AC_MSG_NOTICE([CMocka not found on the system. Unit tests disabled])] +) +AM_CONDITIONAL([ENABLE_UNITTESTS], [test "${enable_unit_tests}" = "yes" -a "${have_cmocka}" = "yes" ]) +AC_SUBST([ENABLE_UNITTESTS]) + +TEST_CFLAGS="${CFLAGS} ${CMOCKA_CFLAGS}" +TEST_LIBS="${CMOCKA_LIBS}" + +AC_SUBST([TEST_CFLAGS]) +AC_SUBST([TEST_LIBS]) AC_CONFIG_FILES([ Makefile @@ -1172,6 +1197,7 @@ AC_CONFIG_FILES([ health/Makefile health/notifications/Makefile libnetdata/Makefile + libnetdata/tests/Makefile libnetdata/adaptive_resortable_list/Makefile libnetdata/avl/Makefile libnetdata/buffer/Makefile @@ -1187,6 +1213,7 @@ AC_CONFIG_FILES([ libnetdata/socket/Makefile libnetdata/statistical/Makefile libnetdata/storage_number/Makefile + libnetdata/storage_number/tests/Makefile libnetdata/threads/Makefile libnetdata/url/Makefile libnetdata/json/Makefile diff --git a/contrib/debian/control b/contrib/debian/control index be9a77860..b4dfa683d 100644 --- a/contrib/debian/control +++ b/contrib/debian/control @@ -50,6 +50,7 @@ Depends: adduser, libprotoc10, ${misc:Depends}, ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) and presents a web site to view and analyze them. The presentation diff --git a/contrib/debian/control.buster b/contrib/debian/control.buster index 1166883be..6b9d0dc50 100644 --- a/contrib/debian/control.buster +++ b/contrib/debian/control.buster @@ -50,6 +50,7 @@ Depends: adduser, libprotoc17, ${misc:Depends}, ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) and presents a web site to view and analyze them. The presentation diff --git a/contrib/debian/control.jessie b/contrib/debian/control.jessie index 79db7839c..4ffd25b55 100644 --- a/contrib/debian/control.jessie +++ b/contrib/debian/control.jessie @@ -48,6 +48,7 @@ Depends: adduser, libprotoc10, ${misc:Depends}, ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) and presents a web site to view and analyze them. The presentation diff --git a/contrib/debian/control.trusty b/contrib/debian/control.trusty index 918d2ad35..5edcb95fb 100644 --- a/contrib/debian/control.trusty +++ b/contrib/debian/control.trusty @@ -48,6 +48,7 @@ Depends: adduser, libprotoc8, ${misc:Depends}, ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) and presents a web site to view and analyze them. The presentation diff --git a/contrib/debian/control.xenial b/contrib/debian/control.xenial index 412b389e8..c562e8ef9 100644 --- a/contrib/debian/control.xenial +++ b/contrib/debian/control.xenial @@ -50,6 +50,7 @@ Depends: adduser, libprotoc9v5, ${misc:Depends}, ${shlibs:Depends} +Pre-Depends: dpkg (>= 1.17.14) Description: real-time charts for system monitoring Netdata is a daemon that collects data in realtime (per second) and presents a web site to view and analyze them. The presentation diff --git a/contrib/debian/netdata.postinst.in b/contrib/debian/netdata.postinst.in index 44b53ccfb..602f21c72 100644 --- a/contrib/debian/netdata.postinst.in +++ b/contrib/debian/netdata.postinst.in @@ -2,6 +2,19 @@ set -e +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" + case "$1" in configure) if [ -z "$2" ]; then diff --git a/contrib/debian/netdata.postrm b/contrib/debian/netdata.postrm index 94e82bcd4..de93dad1a 100644 --- a/contrib/debian/netdata.postrm +++ b/contrib/debian/netdata.postrm @@ -2,6 +2,19 @@ set -e +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" + case "$1" in remove) ;; diff --git a/contrib/debian/netdata.preinst b/contrib/debian/netdata.preinst new file mode 100644 index 000000000..c4da80fcc --- /dev/null +++ b/contrib/debian/netdata.preinst @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/.well-known /usr/share/netdata/www/.well-known 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/css /usr/share/netdata/www/css 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/fonts /usr/share/netdata/www/fonts 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/images /usr/share/netdata/www/images 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/lib /usr/share/netdata/www/lib 1.18.1~ netdata -- "$@" +dpkg-maintscript-helper dir_to_symlink \ + /var/lib/netdata/www/static /usr/share/netdata/www/static 1.18.1~ netdata -- "$@" diff --git a/daemon/Makefile.am b/daemon/Makefile.am index e020e517b..ee1b53d09 100644 --- a/daemon/Makefile.am +++ b/daemon/Makefile.am @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES= $(srcdir)/Makefile.in +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in CLEANFILES = \ anonymous-statistics.sh \ $(NULL) diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 53b7546f2..2bcc5c9fd 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -534,11 +534,30 @@ void global_statistics_charts(void) { // ---------------------------------------------------------------- #ifdef ENABLE_DBENGINE - if (localhost->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - unsigned long long stats_array[RRDENG_NR_STATS]; + RRDHOST *host; + unsigned long long stats_array[RRDENG_NR_STATS] = {0}; + unsigned long long local_stats_array[RRDENG_NR_STATS]; + unsigned hosts_with_dbengine = 0, i; + + rrd_rdlock(); + rrdhost_foreach_read(host) { + if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + ++hosts_with_dbengine; + /* get localhost's DB engine's statistics */ + rrdeng_get_33_statistics(host->rrdeng_ctx, local_stats_array); + for (i = 0 ; i < RRDENG_NR_STATS ; ++i) { + /* aggregate statistics across hosts */ + stats_array[i] += local_stats_array[i]; + } + } + } + rrd_unlock(); - /* get localhost's DB engine's statistics */ - rrdeng_get_33_statistics(localhost->rrdeng_ctx, stats_array); + if (hosts_with_dbengine) { + /* deduplicate global statistics by getting the ones from the last host */ + stats_array[30] = local_stats_array[30]; + stats_array[31] = local_stats_array[31]; + stats_array[32] = local_stats_array[32]; // ---------------------------------------------------------------- @@ -639,7 +658,7 @@ void global_statistics_charts(void) { static RRDSET *st_pg_cache_pages = NULL; static RRDDIM *rd_descriptors = NULL; static RRDDIM *rd_populated = NULL; - static RRDDIM *rd_commited = NULL; + static RRDDIM *rd_committed = NULL; static RRDDIM *rd_insertions = NULL; static RRDDIM *rd_deletions = NULL; static RRDDIM *rd_backfills = NULL; @@ -663,7 +682,7 @@ void global_statistics_charts(void) { rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_commited = rrddim_add(st_pg_cache_pages, "commited", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_committed = rrddim_add(st_pg_cache_pages, "committed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_insertions = rrddim_add(st_pg_cache_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_deletions = rrddim_add(st_pg_cache_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -674,7 +693,7 @@ void global_statistics_charts(void) { rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_commited, (collected_number)stats_array[4]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_committed, (collected_number)stats_array[4]); rrddim_set_by_pointer(st_pg_cache_pages, rd_insertions, (collected_number)stats_array[5]); rrddim_set_by_pointer(st_pg_cache_pages, rd_deletions, (collected_number)stats_array[6]); rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); @@ -818,6 +837,63 @@ void global_statistics_charts(void) { rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); rrdset_done(st_fd); } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_ram_usage = NULL; + static RRDDIM *rd_cached = NULL; + static RRDDIM *rd_pinned = NULL; + static RRDDIM *rd_metadata = NULL; + + collected_number cached_pages, pinned_pages, API_producers, populated_pages, metadata, pages_on_disk, + page_cache_descriptors; + + if (unlikely(!st_ram_usage)) { + st_ram_usage = rrdset_create_localhost( + "netdata" + , "dbengine_ram" + , NULL + , "dbengine" + , NULL + , "NetData DB engine RAM usage" + , "MiB" + , "netdata" + , "stats" + , 130509 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_cached = rrddim_add(st_ram_usage, "cache", NULL, 1, 256, RRD_ALGORITHM_ABSOLUTE); + rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, 1, 256, RRD_ALGORITHM_ABSOLUTE); + rd_metadata = rrddim_add(st_ram_usage, "metadata", NULL, 1, 1048576, RRD_ALGORITHM_ABSOLUTE); + } + else + rrdset_next(st_ram_usage); + + API_producers = (collected_number)stats_array[0]; + pages_on_disk = (collected_number)stats_array[2]; + populated_pages = (collected_number)stats_array[3]; + page_cache_descriptors = (collected_number)stats_array[27]; + + if (API_producers * 2 > populated_pages) { + pinned_pages = API_producers; + } else{ + pinned_pages = API_producers * 2; + } + cached_pages = populated_pages - pinned_pages; + + metadata = page_cache_descriptors * sizeof(struct page_cache_descr); + metadata += pages_on_disk * sizeof(struct rrdeng_page_descr); + /* This is an empirical estimation for Judy array indexing and extent structures */ + metadata += pages_on_disk * 58; + + rrddim_set_by_pointer(st_ram_usage, rd_cached, cached_pages); + rrddim_set_by_pointer(st_ram_usage, rd_pinned, pinned_pages); + rrddim_set_by_pointer(st_ram_usage, rd_metadata, metadata); + rrdset_done(st_ram_usage); + } } #endif diff --git a/daemon/main.c b/daemon/main.c index 4189ac7bd..0e56654db 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -146,46 +146,28 @@ void web_server_config_options(void) { } -int killpid(pid_t pid, int signal) -{ - int ret = -1; +// killpid kills pid with SIGTERM. +int killpid(pid_t pid) { + int ret; debug(D_EXIT, "Request to kill pid %d", pid); errno = 0; - if(kill(pid, 0) == -1) { + ret = kill(pid, SIGTERM); + if (ret == -1) { switch(errno) { case ESRCH: - error("Request to kill pid %d, but it is not running.", pid); - break; + // We wanted the process to exit so just let the caller handle. + return ret; case EPERM: - error("Request to kill pid %d, but I do not have enough permissions.", pid); + error("Cannot kill pid %d, but I do not have enough permissions.", pid); break; default: - error("Request to kill pid %d, but I received an error.", pid); + error("Cannot kill pid %d, but I received an error.", pid); break; } } - else { - errno = 0; - ret = kill(pid, signal); - if(ret == -1) { - switch(errno) { - case ESRCH: - error("Cannot kill pid %d, but it is not running.", pid); - break; - - case EPERM: - error("Cannot kill pid %d, but I do not have enough permissions.", pid); - break; - - default: - error("Cannot kill pid %d, but I received an error.", pid); - break; - } - } - } return ret; } diff --git a/daemon/main.h b/daemon/main.h index 687155981..9d9f4ef0f 100644 --- a/daemon/main.h +++ b/daemon/main.h @@ -41,7 +41,7 @@ struct netdata_static_thread { }; extern void cancel_main_threads(void); -extern int killpid(pid_t pid, int signal); +extern int killpid(pid_t pid); extern void netdata_cleanup_and_exit(int ret) NORETURN; extern void send_statistics(const char *action, const char *action_result, const char *action_data); diff --git a/daemon/signals.c b/daemon/signals.c index 71f271887..5378b04e5 100644 --- a/daemon/signals.c +++ b/daemon/signals.c @@ -2,6 +2,8 @@ #include "common.h" +static int reaper_enabled = 0; + typedef enum signal_action { NETDATA_SIGNAL_END_OF_LIST, NETDATA_SIGNAL_IGNORE, @@ -10,6 +12,7 @@ typedef enum signal_action { NETDATA_SIGNAL_LOG_ROTATE, NETDATA_SIGNAL_RELOAD_HEALTH, NETDATA_SIGNAL_FATAL, + NETDATA_SIGNAL_CHILD, } SIGNAL_ACTION; static struct { @@ -26,6 +29,7 @@ static struct { { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE }, { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, + { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, // terminator { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST } @@ -42,7 +46,7 @@ static void signal_handler(int signo) { char buffer[200 + 1]; snprintfz(buffer, 200, "\nSIGNAL HANLDER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) { - // nothing to do - we cannot write but there is no way to complaint about it + // nothing to do - we cannot write but there is no way to complain about it ; } } @@ -74,15 +78,33 @@ void signals_init(void) { struct sigaction sa; sa.sa_flags = 0; + // Enable process tracking / reaper if running as init (pid == 1). + // This prevents zombie processes when running in a container. + if (getpid() == 1) { + info("SIGNAL: Enabling reaper"); + myp_init(); + reaper_enabled = 1; + } else { + info("SIGNAL: Not enabling reaper"); + } + // ignore all signals while we run in a signal handler sigfillset(&sa.sa_mask); int i; for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { - if(signals_waiting[i].action == NETDATA_SIGNAL_IGNORE) + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_IGNORE: sa.sa_handler = SIG_IGN; - else + break; + case NETDATA_SIGNAL_CHILD: + if (reaper_enabled == 0) + continue; + // FALLTHROUGH + default: sa.sa_handler = signal_handler; + break; + } if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name); @@ -100,6 +122,76 @@ void signals_reset(void) { if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name); } + + if (reaper_enabled == 1) + myp_free(); +} + +// reap_child reaps the child identified by pid. +static void reap_child(pid_t pid) { + siginfo_t i; + + errno = 0; + debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid); + if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { + if (errno != ECHILD) + error("SIGNAL: Failed to wait for: %d", pid); + else + debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid); + return; + } else if (i.si_pid == 0) { + // Process didn't exit, this shouldn't happen. + return; + } + + switch (i.si_code) { + case CLD_EXITED: + debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status); + break; + case CLD_KILLED: + debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status); + break; + case CLD_DUMPED: + debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status); + break; + case CLD_STOPPED: + debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status); + break; + case CLD_TRAPPED: + debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status); + break; + case CLD_CONTINUED: + debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status); + break; + default: + debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + } +} + +// reap_children reaps all pending children which are not managed by myp. +static void reap_children() { + siginfo_t i; + + while (1 == 1) { + // Identify which process caused the signal so we can determine + // if we need to reap a re-parented process. + i.si_pid = 0; + if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) { + if (errno != ECHILD) // This shouldn't happen with WNOHANG but does. + error("SIGNAL: Failed to wait"); + return; + } else if (i.si_pid == 0) { + // No child exited. + return; + } else if (myp_reap(i.si_pid) == 0) { + // myp managed, sleep for a short time to avoid busy wait while + // this is handled by myp. + usleep(10000); + } else { + // Unknown process, likely a re-parented child, reap it. + reap_child(i.si_pid); + } + } } void signals_handle(void) { @@ -157,6 +249,11 @@ void signals_handle(void) { case NETDATA_SIGNAL_FATAL: fatal("SIGNAL: Received %s. netdata now exits.", name); + case NETDATA_SIGNAL_CHILD: + debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name); + reap_children(); + break; + default: info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name); break; diff --git a/database/rrddim.c b/database/rrddim.c index 8ab5a7237..0032940ce 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -160,7 +160,7 @@ void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) { RRDCALC *rrdc; for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) { if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) { - if (!strcmp(rrdc->chart, st->name)) { + if (rrdc->hash_chart == st->hash_name || !strcmp(rrdc->chart, st->name) || !strcmp(rrdc->chart, st->id)) { char *usename = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name)); if (usename) { if(rrdcalc_exists(host, st->name, usename, 0, 0)){ @@ -217,9 +217,9 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename); if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || - memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) { + memory_mode == RRD_MEMORY_MODE_RAM) { rd = (RRDDIM *)mymmap( - (memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE)?NULL:fullfilename + (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename , size , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE) , 1 @@ -240,7 +240,7 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte struct timeval now; now_realtime_timeval(&now); - if(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) { + if(memory_mode == RRD_MEMORY_MODE_RAM) { memset(rd, 0, size); } else { @@ -292,7 +292,10 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte if(unlikely(!rd)) { // if we didn't manage to get a mmap'd dimension, just create one rd = callocz(1, size); - rd->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC; + if (memory_mode == RRD_MEMORY_MODE_DBENGINE) + rd->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + else + rd->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC; } rd->memsize = size; @@ -460,7 +463,6 @@ void rrddim_free(RRDSET *st, RRDDIM *rd) case RRD_MEMORY_MODE_SAVE: case RRD_MEMORY_MODE_MAP: case RRD_MEMORY_MODE_RAM: - case RRD_MEMORY_MODE_DBENGINE: debug(D_RRD_CALLS, "Unmapping dimension '%s'.", rd->name); freez((void *)rd->id); freez(rd->cache_filename); @@ -469,6 +471,7 @@ void rrddim_free(RRDSET *st, RRDDIM *rd) case RRD_MEMORY_MODE_ALLOC: case RRD_MEMORY_MODE_NONE: + case RRD_MEMORY_MODE_DBENGINE: debug(D_RRD_CALLS, "Removing dimension '%s'.", rd->name); freez((void *)rd->id); freez(rd->cache_filename); diff --git a/database/rrdset.c b/database/rrdset.c index 26df8d737..e96d707be 100644 --- a/database/rrdset.c +++ b/database/rrdset.c @@ -369,13 +369,13 @@ void rrdset_free(RRDSET *st) { case RRD_MEMORY_MODE_SAVE: case RRD_MEMORY_MODE_MAP: case RRD_MEMORY_MODE_RAM: - case RRD_MEMORY_MODE_DBENGINE: debug(D_RRD_CALLS, "Unmapping stats '%s'.", st->name); munmap(st, st->memsize); break; case RRD_MEMORY_MODE_ALLOC: case RRD_MEMORY_MODE_NONE: + case RRD_MEMORY_MODE_DBENGINE: freez(st); break; } @@ -569,9 +569,9 @@ RRDSET *rrdset_create_custom( snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", cache_dir); if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || - memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) { + memory_mode == RRD_MEMORY_MODE_RAM) { st = (RRDSET *) mymmap( - (memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE)?NULL:fullfilename + (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename , size , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE) , 0 @@ -602,7 +602,7 @@ RRDSET *rrdset_create_custom( st->alarms = NULL; st->flags = 0x00000000; - if(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) { + if(memory_mode == RRD_MEMORY_MODE_RAM) { memset(st, 0, size); } else { diff --git a/docs/getting-started.md b/docs/getting-started.md index ce3558192..44e257383 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -159,14 +159,13 @@ Find the `SEND_EMAIL="YES"` line and change it to `SEND_EMAIL="NO"`. ## Change how long Netdata stores metrics -By default, Netdata stores 1 hour of historical metrics and uses about 25MB of RAM. +By default, Netdata uses a database engine uses RAM to store recent metrics. For long-term metrics storage, the database +engine uses a "spill to disk" feature that also takes advantage of available disk space and keeps RAM usage low. -If that's not enough for you, Netdata is quite adaptable to long-term storage of your system's metrics. +The database engine allows you to store a much larger dataset than your system's available RAM. -There are two quick ways to increase the depth of historical metrics: increase the `history` value for the round-robin -that's enabled by default, or switch to the database engine. - -We have a tutorial that walks you through both options: [**Changing how long Netdata stores +If you're not sure whether you're using the database engine, or want to tweak the default settings to store even more +historical metrics, check out our tutorial: [**Changing how long Netdata stores metrics**](../docs/tutorials/longer-metrics-storage.md). **What's next?**: diff --git a/docs/tutorials/dimension-templates.md b/docs/tutorials/dimension-templates.md new file mode 100644 index 000000000..67648b9d6 --- /dev/null +++ b/docs/tutorials/dimension-templates.md @@ -0,0 +1,169 @@ +# Use dimension templates to create dynamic alarms + +Your ability to monitor the health of your systems and applications relies on your ability to create and maintain +the best set of alarms for your particular needs. + +In v1.18 of Netdata, we introduced **dimension templates** for alarms, which simplifies the process of writing [alarm +entities](../../health/README.md#entities-in-the-health-files) for charts with many dimensions. + +Dimension templates can condense many individual entities into one—no more copy-pasting one entity and changing the +`alarm`/`template` and `lookup` lines for each dimension you'd like to monitor. + +They are, however, an advanced health monitoring feature. For more basic instructions on creating your first alarm, +check out our [health monitoring documentation](../../health/), which also includes +[examples](../../health/README.md#examples). + +## The fundamentals of `foreach` + +Our dimension templates update creates a new `foreach` parameter to the existing [`lookup` +line](../../health/README.md#alarm-line-lookup). This is where the magic happens. + +You use the `foreach` parameter to specify which dimensions you want to monitor with this single alarm. You can separate +them with a comma (`,`) or a pipe (`|`). You can also use a [Netdata simple pattern](../../libnetdata/simple_pattern/README.md) +to create many alarms with a regex-like syntax. + +The `foreach` parameter _has_ to be the last parameter in your `lookup` line, and if you have both `of` and `foreach` in +the same `lookup` line, Netdata will ignore the `of` parameter and use `foreach` instead. + +Let's get into some examples so you can see how the new parameter works. + +> ⚠️ The following entities are examples to showcase the functionality and syntax of dimension templates. They are not +> meant to be run as-is on production systems. + +## Condensing entities with `foreach` + +Let's say you want to monitor the `system`, `user`, and `nice` dimensions in your system's overall CPU utilization. +Before dimension templates, you would need the following three entities: + +```yaml + alarm: cpu_system + on: system.cpu +lookup: average -10m percentage of system + every: 1m + warn: $this > 50 + crit: $this > 80 + + alarm: cpu_user + on: system.cpu +lookup: average -10m percentage of user + every: 1m + warn: $this > 50 + crit: $this > 80 + + alarm: cpu_nice + on: system.cpu +lookup: average -10m percentage of nice + every: 1m + warn: $this > 50 + crit: $this > 80 +``` + +With dimension templates, you can condense these into a single alarm. Take note of the `alarm` and `lookup` lines. + +```yaml + alarm: cpu_template + on: system.cpu +lookup: average -10m percentage foreach system,user,nice + every: 1m + warn: $this > 50 + crit: $this > 80 +``` + +The `alarm` line specifies the naming scheme Netdata will use. You can use whatever naming scheme you'd like, with `.` +and `_` being the only allowed symbols. + +The `lookup` line has changed from `of` to `foreach`, and we're now passing three dimensions. + +In this example, Netdata will create three alarms with the names `cpu_template_system`, `cpu_template_user`, and +`cpu_template_nice`. Every minute, each alarm will use the same database query to calculate the average CPU usage for +the `system`, `user`, and `nice` dimensions over the last 10 minutes and send out alarms if necessary. + +You can find these three alarms active by clicking on the **Alarms** button in the top navigation, and then clicking on +the **All** tab and scrolling to the **system - cpu** collapsible section. + +![Three new alarms created from the dimension template](https://user-images.githubusercontent.com/1153921/66218994-29523800-e67f-11e9-9bcb-9bca23e2c554.png) + +Let's look at some other examples of how `foreach` works so you can best apply it in your configurations. + +### Using a Netdata simple pattern in `foreach` + +In the last example, we used `foreach system,user,nice` to create three distinct alarms using dimension templates. But +what if you want to quickly create alarms for _all_ the dimensions of a given chart? + +Use a [simple pattern](../../libnetdata/simple_pattern/README.md)! One example of a simple pattern is a single wildcard +(`*`). + +Instead of monitoring system CPU usage, let's monitor per-application CPU usage using the `apps.cpu` chart. Passing a +wildcard as the simple pattern tells Netdata to create a separate alarm for _every_ process on your system: + +```yaml + alarm: app_cpu + on: apps.cpu +lookup: average -10m percentage foreach * + every: 1m + warn: $this > 50 + crit: $this > 80 +``` + +This entity will now create alarms for every dimension in the `apps.cpu` chart. Given that most `apps.cpu` charts have +10 or more dimensions, using the wildcard ensures you catch every CPU-hogging process. + +To learn more about how to use simple patterns with dimension templates, see our [simple patterns +documentation](../../libnetdata/simple_pattern/README.md). + +## Using `foreach` with alarm templates + +Dimension templates also work with [alarm templates](../../health/README.md#entities-in-the-health-files). Alarm +templates help you create alarms for all the charts with a given context—for example, all the cores of your system's +CPU. + +By combining the two, you can create dozens of individual alarms with a single template entity. Here's how you would +create alarms for the `system`, `user`, and `nice` dimensions for every chart in the `cpu.cpu` context—or, in other +words, every CPU core. + +```yaml +template: cpu_template + on: cpu.cpu + lookup: average -10m percentage foreach system,user,nice + every: 1m + warn: $this > 50 + crit: $this > 80 +``` + +On a system with a 6-core, 12-thread Ryzen 5 1600 CPU, this one entity creates alarms on the following charts and +dimensions: + +- `cpu.cpu0` + - `cpu_template_user` + - `cpu_template_system` + - `cpu_template_nice` +- `cpu.cpu1` + - `cpu_template_user` + - `cpu_template_system` + - `cpu_template_nice` +- `cpu.cpu2` + - `cpu_template_user` + - `cpu_template_system` + - `cpu_template_nice` +- ... +- `cpu.cpu11` + - `cpu_template_user` + - `cpu_template_system` + - `cpu_template_nice` + +And how just a few of those dimension template-generated alarms look like in the Netdata dashboard. + +![A few of the created alarms in the Netdata dashboard](https://user-images.githubusercontent.com/1153921/66219669-708cf880-e680-11e9-8b3a-7bfe178fa28b.png) + +All in all, this single entity creates 36 individual alarms. Much easier than writing 36 separate entities in your +health configuration files! + +## What's next? + +We hope you're excited about the possibilities of using dimension templates! Maybe they'll inspire you to build new +alarms that will help you better monitor the health of your systems. + +Or, at the very least, simplify your configuration files. + +For information about other advanced features in Netdata's health monitoring toolkit, check out our [health +documentation](../../health/). And if you have some cool alarms you built using dimension templates, diff --git a/docs/tutorials/longer-metrics-storage.md b/docs/tutorials/longer-metrics-storage.md index e227f5bda..1ef3f8d8e 100644 --- a/docs/tutorials/longer-metrics-storage.md +++ b/docs/tutorials/longer-metrics-storage.md @@ -7,30 +7,27 @@ Many people think Netdata can only store about an hour's worth of real-time metr configuration today. With the right settings, Netdata is quite capable of efficiently storing hours or days worth of historical, per-second metrics without having to rely on a [backend](../../backends/). -This tutorial gives two options for configuring Netdata to store more metrics. We recommend the [**database -engine**](#using-the-database-engine), as it will soon be the default configuration. However, you can stick with the -current default **round-robin database** if you prefer. +This tutorial gives two options for configuring Netdata to store more metrics. **We recommend the default [database +engine](#using-the-database-engine)**, but you can stick with or switch to the round-robin database if you prefer. Let's get started. ## Using the database engine The database engine uses RAM to store recent metrics while also using a "spill to disk" feature that takes advantage of -available disk space for long-term metrics storage.This feature of the database engine allows you to store a much larger -dataset than your system's available RAM. +available disk space for long-term metrics storage. This feature of the database engine allows you to store a much +larger dataset than your system's available RAM. -The database engine will eventually become the default method of retaining metrics, but until then, you can switch to -the database engine by changing a single option. - -Edit your `netdata.conf` file and change the `memory mode` setting to `dbengine`: +The database engine is currently the default method of storing metrics, but if you're not sure which database you're +using, check out your `netdata.conf` file and look for the `memory mode` setting: ```conf [global] memory mode = dbengine ``` -Next, restart Netdata. On Linux systems, we recommend running `sudo service netdata restart`. You're now using the -database engine! +If `memory mode` is set to anything but `dbengine`, change it and restart Netdata using the standard command for +restarting services on your system. You're now using the database engine! > Learn more about how we implemented the database engine, and our vision for its future, on our blog: [_How and why > we're bringing long-term storage to Netdata_](https://blog.netdata.cloud/posts/db-engine/). @@ -55,10 +52,11 @@ size` and `dbengine disk space`. `dbengine disk space` sets the maximum disk space (again, in MiB) the database engine will use for storing compressed metrics. -Based on our testing, these default settings will retain about two day's worth of metrics when Netdata collects 2,000 -metrics every second. +Based on our testing, these default settings will retain about a day's worth of metrics when Netdata collects roughly +4,000 metrics every second. If you increase either `page cache size` or `dbengine disk space`, Netdata will retain even +more historical metrics. -If you'd like to change these options, read more about the [database engine's memory +But before you change these options too dramatically, read up on the [database engine's memory footprint](../../database/engine/README.md#memory-requirements). With the database engine active, you can back up your `/var/cache/netdata/dbengine/` folder to another location for @@ -69,15 +67,18 @@ aren't ready to make the move. ## Using the round-robin database -By default, Netdata uses a round-robin database to store 1 hour of per-second metrics. Here's the default setting for -`history` in the `netdata.conf` file that comes pre-installed with Netdata. +In previous versions, Netdata used a round-robin database to store 1 hour of per-second metrics. + +To see if you're still using this database, or if you would like to switch to it, open your `netdata.conf` file and see +if `memory mode` option is set to `save`. ```conf [global] - history = 3600 + memory mode = save ``` -One hour has 3,600 seconds, hence the `3600` value! +If `memory mode` is set to `save`, then you're using the round-robin database. If so, the `history` option is set to +`3600`, which is the equivalent to 3,600 seconds, or one hour. To increase your historical metrics, you can increase `history` to the number of seconds you'd like to store: diff --git a/docs/tutorials/monitor-hadoop-cluster.md b/docs/tutorials/monitor-hadoop-cluster.md new file mode 100644 index 000000000..6d6e79708 --- /dev/null +++ b/docs/tutorials/monitor-hadoop-cluster.md @@ -0,0 +1,197 @@ +# Monitor a Hadoop cluster with Netdata + +Hadoop is an [Apache project](https://hadoop.apache.org/) is a framework for processing large sets of data across a +distributed cluster of systems. + +And while Hadoop is designed to be a highly-available and fault-tolerant service, those who operate a Hadoop cluster +will want to monitor the health and performance of their [Hadoop Distributed File System +(HDFS)](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) and [Zookeeper](https://zookeeper.apache.org/) +implementations. + +Netdata comes with built-in and pre-configured support for monitoring both HDFS and Zookeeper. + +This tutorial assumes you have a Hadoop cluster, with HDFS and Zookeeper, running already. If you don't, please follow +the [official Hadoop +instructions](http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html) or an +alternative, like the guide available from +[DigitalOcean](https://www.digitalocean.com/community/tutorials/how-to-install-hadoop-in-stand-alone-mode-on-ubuntu-18-04). + +For more specifics on the collection modules used in this tutorial, read the respective pages in our documentation: + +- [HDFS](../../collectors/go.d.plugin/modules/hdfs/README.md) +- [Zookeeper](../../collectors/go.d.plugin/modules/zookeeper/README.md) + +## Set up your HDFS and Zookeeper installations + +As with all data sources, Netdata can auto-detect HDFS and Zookeeper nodes if you installed them using the standard +installation procedure. + +For Netdata to collect HDFS metrics, it needs to be able to access the node's `/jmx` endpoint. You can test whether an +JMX endpoint is accessible by using `curl HDFS-IP:PORT/jmx`. For a NameNode, you should see output similar to the +following: + +```json +{ + "beans" : [ { + "name" : "Hadoop:service=NameNode,name=JvmMetrics", + "modelerType" : "JvmMetrics", + "MemNonHeapUsedM" : 65.67851, + "MemNonHeapCommittedM" : 67.3125, + "MemNonHeapMaxM" : -1.0, + "MemHeapUsedM" : 154.46341, + "MemHeapCommittedM" : 215.0, + "MemHeapMaxM" : 843.0, + "MemMaxM" : 843.0, + "GcCount" : 15, + "GcTimeMillis" : 305, + "GcNumWarnThresholdExceeded" : 0, + "GcNumInfoThresholdExceeded" : 0, + "GcTotalExtraSleepTime" : 92, + "ThreadsNew" : 0, + "ThreadsRunnable" : 6, + "ThreadsBlocked" : 0, + "ThreadsWaiting" : 7, + "ThreadsTimedWaiting" : 34, + "ThreadsTerminated" : 0, + "LogFatal" : 0, + "LogError" : 0, + "LogWarn" : 2, + "LogInfo" : 348 + }, + { ... } + ] +} +``` + +The JSON result for a DataNode's `/jmx` endpoint is slightly different: + +```json +{ + "beans" : [ { + "name" : "Hadoop:service=DataNode,name=DataNodeActivity-dev-slave-01.dev.loc +al-9866", + "modelerType" : "DataNodeActivity-dev-slave-01.dev.local-9866", + "tag.SessionId" : null, + "tag.Context" : "dfs", + "tag.Hostname" : "dev-slave-01.dev.local", + "BytesWritten" : 500960407, + "TotalWriteTime" : 463, + "BytesRead" : 80689178, + "TotalReadTime" : 41203, + "BlocksWritten" : 16, + "BlocksRead" : 16, + "BlocksReplicated" : 4, + ... + }, + { ... } + ] +} +``` + +If Netdata can't access the `/jmx` endpoint for either a NameNode or DataNode, it will not be able to auto-detect and +collect metrics from your HDFS implementation. + +Zookeeper auto-detection relies on an accessible client port and a whitelisted `mntr` command. For more details on +`mntr`, see Zookeeper's documentation on [cluster +options](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_clusterOptions) and [Zookeeper +commands](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_zkCommands). + +## Configure the HDFS and Zookeeper modules + +To configure Netdata's HDFS module, navigate to your Netdata directory (typically at `/etc/netdata/`) and use +`edit-config` to initialize and edit your HDFS configuration file. + +```bash +cd /etc/netdata/ +sudo ./edit-config go.d/hdfs.conf +``` + +At the bottom of the file, you will see two example jobs, both of which are commented out: + +```yaml +# [ JOBS ] +#jobs: +# - name: namenode +# url: http://127.0.0.1:9870/jmx +# +# - name: datanode +# url: http://127.0.0.1:9864/jmx +``` + +Uncomment these lines and edit the `url` value(s) according to your setup. Now's the time to add any other configuration +details, which you can find inside of the `hdfs.conf` file itself. Most production implementations will require TLS +certificates. + +The result for a simple HDFS setup, running entirely on `localhost` and without certificate authentication, might look +like this: + +```yaml +# [ JOBS ] +jobs: + - name: namenode + url: http://127.0.0.1:9870/jmx + + - name: datanode + url: http://127.0.0.1:9864/jmx +``` + +At this point, Netdata should be configured to collect metrics from your HDFS servers. Let's move on to Zookeeper. + +Next, use `edit-config` again to initialize/edit your `zookeeper.conf` file. + +```bash +cd /etc/netdata/ +sudo ./edit-config go.d/zookeeper.conf +``` + +As with the `hdfs.conf` file, head to the bottom, uncomment the example jobs, and tweak the `address` values according +to your setup. Again, you may need to add additional configuration options, like TLS certificates. + +```yaml +jobs: + - name : local + address : 127.0.0.1:2181 + + - name : remote + address : 203.0.113.10:2182 +``` + +Finally, restart Netdata. + +```sh +sudo service restart netdata +``` + +Upon restart, Netdata should recognize your HDFS/Zookeeper servers, enable the HDFS and Zookeeper modules, and begin +showing real-time metrics for both in your Netdata dashboard. 🎉 + +## Configuring HDFS and Zookeeper alarms + +The Netdata community helped us create sane defaults for alarms related to both HDFS and Zookeeper. You may want to +investigate these to ensure they work well with your Hadoop implementation. + +- [HDFS alarms](https://raw.githubusercontent.com/netdata/netdata/master/health/health.d/hdfs.conf) +- [Zookeeper alarms](https://raw.githubusercontent.com/netdata/netdata/master/health/health.d/zookeeper.conf) + +You can also access/edit these files directly with `edit-config`: + +```bash +sudo /etc/netdata/edit-config health.d/hdfs.conf +sudo /etc/netdata/edit-config health.d/zookeeper.conf +``` + +For more information about editing the defaults or writing new alarm entities, see our [health monitoring +documentation](../../health/README.md). + +## What's next? + +If you're having issues with Netdata auto-detecting your HDFS/Zookeeper servers, or want to help improve how Netdata +collects or presents metrics from these services, feel free to [file an +issue](https://github.com/netdata/netdata/issues/new?labels=bug%2C+needs+triage&template=bug_report.md). + +- Read up on the [HDFS configuration + file](https://github.com/netdata/go.d.plugin/blob/master/config/go.d/hdfs.conf) to understand how to configure + global options or per-job options, such as username/password, TLS certificates, timeouts, and more. +- Read up on the [Zookeeper configuration + file](https://github.com/netdata/go.d.plugin/blob/master/config/go.d/zookeeper.conf) to understand how to configure + global options or per-job options, timeouts, TLS certificates, and more. diff --git a/health/notifications/Makefile.am b/health/notifications/Makefile.am index e3970de04..efce90bda 100644 --- a/health/notifications/Makefile.am +++ b/health/notifications/Makefile.am @@ -31,6 +31,7 @@ include awssns/Makefile.inc include discord/Makefile.inc include email/Makefile.inc include flock/Makefile.inc +include hangouts/Makefile.inc include irc/Makefile.inc include kavenegar/Makefile.inc include messagebird/Makefile.inc @@ -44,4 +45,4 @@ include syslog/Makefile.inc include telegram/Makefile.inc include twilio/Makefile.inc include web/Makefile.inc -include custom/Makefile.inc
\ No newline at end of file +include custom/Makefile.inc diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in index 509a8e88d..b56c67e65 100755 --- a/health/notifications/alarm-notify.sh.in +++ b/health/notifications/alarm-notify.sh.in @@ -33,6 +33,7 @@ # - syslog messages by @Ferroin # - Microsoft Team notification by @tioumen # - RocketChat notifications by @Hermsi1337 #3777 +# - Google Hangouts Chat notifications by @EnzoAkira and @hendrikhofstadt # ----------------------------------------------------------------------------- # testing notifications @@ -166,6 +167,7 @@ prowl awssns rocketchat sms +hangouts " # ----------------------------------------------------------------------------- @@ -359,6 +361,9 @@ IRC_NICKNAME= IRC_REALNAME= IRC_NETWORK= +# hangouts configs +declare -A HANGOUTS_WEBHOOK_URI + # load the stock and user configuration files # these will overwrite the variables above @@ -491,6 +496,9 @@ filter_recipient_by_criticality() { # check irc [ -z "${IRC_NETWORK}" ] && SEND_IRC="NO" +# check hangouts +[ ${#HANGOUTS_WEBHOOK_URI[@]} -eq 0 ] && SEND_HANGOUTS="NO" + # check fleep #shellcheck disable=SC2153 { [ -z "${FLEEP_SERVER}" ] || [ -z "${FLEEP_SENDER}" ]; } && SEND_FLEEP="NO" @@ -511,6 +519,7 @@ if [ "${SEND_PUSHOVER}" = "YES" ] || [ "${SEND_KAFKA}" = "YES" ] || [ "${SEND_FLEEP}" = "YES" ] || [ "${SEND_PROWL}" = "YES" ] || + [ "${SEND_HANGOUTS}" = "YES" ] || [ "${SEND_CUSTOM}" = "YES" ] || [ "${SEND_MSTEAM}" = "YES" ]; then # if we need curl, check for the curl command @@ -536,6 +545,7 @@ if [ "${SEND_PUSHOVER}" = "YES" ] || SEND_KAFKA="NO" SEND_FLEEP="NO" SEND_PROWL="NO" + SEND_HANGOUTS="NO" SEND_CUSTOM="NO" fi fi @@ -659,6 +669,7 @@ for method in "${SEND_EMAIL}" \ "${SEND_PROWL}" \ "${SEND_CUSTOM}" \ "${SEND_IRC}" \ + "${SEND_HANGOUTS}" \ "${SEND_AWSSNS}" \ "${SEND_SYSLOG}" \ "${SEND_SMS}" \ @@ -1771,6 +1782,112 @@ send_sms() { } # ----------------------------------------------------------------------------- +# hangouts sender + +send_hangouts() { + local rooms="${1}" httpcode sent=0 room color payload webhook + + [ "${SEND_HANGOUTS}" != "YES" ] && return 1 + + case "${status}" in + WARNING) color="#ffa700" ;; + CRITICAL) color="#d62d20" ;; + CLEAR) color="#008744" ;; + *) color="#777777" ;; + esac + + for room in ${rooms}; do + if [ -z "${HANGOUTS_WEBHOOK_URI[$room]}" ] ; then + info "Can't send Hangouts notification for: ${host} ${chart}.${name} to room ${room}. HANGOUTS_WEBHOOK_URI[$room] not defined" + else + webhook="${HANGOUTS_WEBHOOK_URI[$room]}" + payload="$( + cat <<EOF + { + "cards": [ + { + "header": { + "title": "Netdata on ${host}", + "imageUrl": "${images_base_url}/images/banner-icon-144x144.png", + "imageStyle": "IMAGE" + }, + "sections": [ + { + "header": "<b>${host}</b>", + "widgets": [ + { + "keyValue": { + "topLabel": "Status Message", + "content": "<b>${status_message}</b>", + "contentMultiline": "true", + "iconUrl": "${image}", + "onClick": { + "openLink": { + "url": "${goto_url}" + } + } + } + }, + { + "keyValue": { + "topLabel": "${chart} | ${family}", + "content": "<font color=${color}>${alarm}</font>", + "contentMultiline": "true" + } + } + ] + }, + { + "widgets": [ + { + "textParagraph": { + "text": "<font color=\"#0057e7\">@ ${date}\n<b>${info}</b></font>" + } + } + ] + }, + { + "widgets": [ + { + "buttons": [ + { + "textButton": { + "text": "Go to ${host}", + "onClick": { + "openLink": { + "url": "${goto_url}" + } + } + } + } + ] + } + ] + } + ] + } + ] + } +EOF + )" + + httpcode=$(docurl -H "Content-Type: application/json" -X POST -d "${payload}" "${webhook}") + + if [ "${httpcode}" = "200" ]; then + info "sent hangouts notification for: ${host} ${chart}.${name} is ${status} to '${room}'" + sent=$((sent + 1)) + else + error "failed to send hangouts notification for: ${host} ${chart}.${name} is ${status} to '${room}', with HTTP error code ${httpcode}." + fi + fi + done + + [ ${sent} -gt 0 ] && return 0 + + return 1 +} + +# ----------------------------------------------------------------------------- # prepare the content of the notification # the url to send the user on click @@ -1889,6 +2006,15 @@ send_slack "${SLACK_WEBHOOK_URL}" "${to_slack}" SENT_SLACK=$? # ----------------------------------------------------------------------------- +# send the hangouts notification + +# hangouts aggregates posts from the same room +# so we use "${host} ${status}" as the room, to make them diff + +send_hangouts "${to_hangouts}" +SENT_HANGOUTS=$? + +# ----------------------------------------------------------------------------- # send the Microsoft notification # Microsoft team aggregates posts from the same username @@ -2270,6 +2396,7 @@ for state in "${SENT_EMAIL}" \ "${SENT_PUSHOVER}" \ "${SENT_TELEGRAM}" \ "${SENT_SLACK}" \ + "${SENT_HANGOUTS}" \ "${SENT_ROCKETCHAT}" \ "${SENT_ALERTA}" \ "${SENT_FLOCK}" \ diff --git a/health/notifications/hangouts/Makefile.inc b/health/notifications/hangouts/Makefile.inc new file mode 100644 index 000000000..835154fe8 --- /dev/null +++ b/health/notifications/hangouts/Makefile.inc @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_noinst_DATA += \ + hangouts/README.md \ + hangouts/Makefile.inc \ + $(NULL) diff --git a/health/notifications/hangouts/README.md b/health/notifications/hangouts/README.md new file mode 100644 index 000000000..25dccad61 --- /dev/null +++ b/health/notifications/hangouts/README.md @@ -0,0 +1,33 @@ +# Hangouts Chat + +This is what you will get: +![Netdata on Hangouts](https://user-images.githubusercontent.com/1153921/66427166-47de6900-e9c8-11e9-8322-b4b03f084dc1.png) +To receive notifications in Google Hangouts, you need the following in your Hangouts setup: + +1. One or more rooms +2. An **incoming webhook** for each room + +How to create an incoming webhook: +https://developers.google.com/hangouts/chat/how-tos/webhooks + +Set the webhook URIs and room names in `health_alarm_notify.conf`. To edit it on your system, run `/etc/netdata/edit-config health_alarm_notify.conf`): + +``` +#------------------------------------------------------------------------------ +# hangouts (google hangouts chat) global notification options +# enable/disable sending hangouts notifications +SEND_HANGOUTS="YES" +# On Hangouts, in the room you choose, create an incoming webhook, +# copy the link and paste it below and also identify the room name. +# Without it, netdata cannot send hangouts notifications to that room. +# HANGOUTS_WEBHOOK_URI[ROOM_NAME]="URLforroom1" +HANGOUTS_WEBHOOK_URI[systems]="https://chat.googleapis.com/v1/spaces/AAAAXXXXXXX/..." +HANGOUTS_WEBHOOK_URI[development]="https://chat.googleapis.com/v1/spaces/AAAAYYYYY/..." +# if a DEFAULT_RECIPIENT_HANGOUTS are not configured, +# notifications wouldn't be send to hangouts rooms. +# DEFAULT_RECIPIENT_HANGOUTS="systems development|critical" +DEFAULT_RECIPIENT_HANGOUTS="sysadmin devops alarms|critical" +``` +You can define multiple rooms like this: `sysadmin devops alarms|critical`. + +The keywords `sysadmin`, `devops` and `alarms` are Hangouts rooms. diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf index 60621df27..955dd25b5 100755 --- a/health/notifications/health_alarm_notify.conf +++ b/health/notifications/health_alarm_notify.conf @@ -21,6 +21,7 @@ # - messages to a local or remote syslog daemon # - message to Microsoft Team (through webhook) # - message to Rocket.Chat (through webhook) +# - message to Google Hangouts Chat (through webhook) # # The 'to' line given at netdata alarms defines a *role*, so that many # people can be notified for each role. @@ -176,6 +177,7 @@ sendsms="" # kavenegar : "09155555555 09177777777|critical" # pd : "<pd_service_key_1> <pd_service_key_2>|critical" # irc : "<irc_channel_1> <irc_channel_2>|critical" +# hangouts : "alarms disasters|critical" # # If a recipient is set to empty string, the default recipient of the given # notification method (email, pushover, telegram, slack, alerta, etc) will be used. @@ -228,6 +230,27 @@ DEFAULT_RECIPIENT_EMAIL="root" #EMAIL_PLAINTEXT_ONLY="YES" #------------------------------------------------------------------------------ +# hangouts (google hangouts chat) global notification options + +# enable/disable sending hangouts notifications +SEND_HANGOUTS="YES" + +# On Hangouts, in the room you choose, create an incoming webhook, +# copy the link and paste it below and also give it a room name. +# Without it, netdata cannot send hangouts notifications to that room. +# You will then use the same room name in your recipients list. For each URI, you need +# HANGOUTS_WEBHOOK_URI[room_name]="WEBHOOK_URI" +# e.g. to define systems and development rooms/recipients: +# HANGOUTS_WEBHOOK_URI[systems]="URLforroom1" +# HANGOUTS_WEBHOOK_URI[development]="URLforroom2" + +# if a DEFAULT_RECIPIENT_HANGOUTS is not configured, +# notifications won't be send to hangouts rooms. For the example above, +# a valid recipients list is the following +# DEFAULT_RECIPIENT_HANGOUTS="systems development|critical" +DEFAULT_RECIPIENT_HANGOUTS="" + +#------------------------------------------------------------------------------ # pushover (pushover.net) global notification options # multiple recipients can be given like this: @@ -375,10 +398,10 @@ SEND_SLACK="YES" SLACK_WEBHOOK_URL="" # if a role's recipients are not configured, a notification will be send to: -# - A slack channel (syntax: '#channel' or 'channel') +# - A slack channel (syntax: '#channel' or 'channel') # - A slack user (syntax: '@user') # - The channel or user defined in slack for the webhook (syntax: '#') -# empty = do not send a notification for unconfigured roles +# empty = do not send a notification for unconfigured roles DEFAULT_RECIPIENT_SLACK="" #------------------------------------------------------------------------------ @@ -640,7 +663,7 @@ SEND_SYSLOG="NO" # configuration to change that needs to happen in the syslog daemon # configuration, not here. -# This controls which facility is used by defalt for logging. Defaults +# This controls which facility is used by default for logging. Defaults # to local6. SYSLOG_FACILITY='' @@ -808,6 +831,8 @@ custom_sender() { role_recipients_email[sysadmin]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[sysadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[sysadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[sysadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -853,6 +878,8 @@ role_recipients_rocketchat[sysadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" role_recipients_email[domainadmin]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[domainadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[domainadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[domainadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -901,6 +928,8 @@ role_recipients_sms[domainadmin]="${DEFAULT_RECIPIENT_SMS}" role_recipients_email[dba]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[dba]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[dba]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[dba]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -949,6 +978,8 @@ role_recipients_sms[dba]="${DEFAULT_RECIPIENT_SMS}" role_recipients_email[webmaster]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[webmaster]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[webmaster]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[webmaster]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -997,6 +1028,8 @@ role_recipients_sms[webmaster]="${DEFAULT_RECIPIENT_SMS}" role_recipients_email[proxyadmin]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[proxyadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[proxyadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[proxyadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -1046,6 +1079,8 @@ role_recipients_sms[proxyadmin]="${DEFAULT_RECIPIENT_SMS}" role_recipients_email[sitemgr]="${DEFAULT_RECIPIENT_EMAIL}" +role_recipients_hangouts[sitemgr]="${DEFAULT_RECIPIENT_HANGOUTS}" + role_recipients_pushover[sitemgr]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[sitemgr]="${DEFAULT_RECIPIENT_PUSHBULLET}" diff --git a/libnetdata/Makefile.am b/libnetdata/Makefile.am index 87f12b32c..7dc808fab 100644 --- a/libnetdata/Makefile.am +++ b/libnetdata/Makefile.am @@ -23,6 +23,7 @@ SUBDIRS = \ storage_number \ threads \ url \ + tests \ $(NULL) dist_noinst_DATA = \ diff --git a/libnetdata/health/health.c b/libnetdata/health/health.c index a70f284b1..e03538db3 100644 --- a/libnetdata/health/health.c +++ b/libnetdata/health/health.c @@ -112,7 +112,7 @@ int health_silencers_json_read_callback(JSON_ENTRY *e) case JSON_OBJECT: #ifndef ENABLE_JSONC e->callback_function = health_silencers_json_read_callback; - if(e->name && strcmp(e->name,"")) { + if(strcmp(e->name,"")) { // init silencer debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name); #endif diff --git a/libnetdata/json/json.c b/libnetdata/json/json.c index 7c5adca3d..3ccc561c6 100644 --- a/libnetdata/json/json.c +++ b/libnetdata/json/json.c @@ -284,18 +284,13 @@ size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) * @param t the tokens * @param nest the length of structure t * @param start the first position - * @param e the output structure. + * @param e the structure with values and callback to be used inside the function. * * @return It returns the array length */ size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) { - JSON_ENTRY ne = { - .name = "", - .fullname = "", - .callback_data = NULL, - .callback_function = NULL - }; + JSON_ENTRY ne; char old = js[t[start].end]; js[t[start].end] = '\0'; @@ -315,7 +310,7 @@ size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_E start++; for(i = 0; i < size ; i++) { ne.pos = i; - if (!e->name || !e->fullname || strlen(e->name) > JSON_NAME_LEN - 24 || strlen(e->fullname) > JSON_FULLNAME_LEN -24) { + if (strlen(e->name) > JSON_NAME_LEN - 24 || strlen(e->fullname) > JSON_FULLNAME_LEN -24) { info("JSON: JSON walk_array ignoring element with name:%s fullname:%s",e->name, e->fullname); continue; } diff --git a/libnetdata/popen/popen.c b/libnetdata/popen/popen.c index 906b10535..1c4ae64d6 100644 --- a/libnetdata/popen/popen.c +++ b/libnetdata/popen/popen.c @@ -2,46 +2,79 @@ #include "../libnetdata.h" -/* +static pthread_mutex_t myp_lock; +static int myp_tracking = 0; + struct mypopen { pid_t pid; - FILE *fp; struct mypopen *next; struct mypopen *prev; }; static struct mypopen *mypopen_root = NULL; -static void mypopen_add(FILE *fp, pid_t *pid) { - struct mypopen *mp = malloc(sizeof(struct mypopen)); - if(!mp) { - fatal("Cannot allocate %zu bytes", sizeof(struct mypopen)) +// myp_add_lock takes the lock if we're tracking. +static void myp_add_lock(void) { + if (myp_tracking == 0) return; - } - mp->fp = fp; + netdata_mutex_lock(&myp_lock); +} + +// myp_add_unlock release the lock if we're tracking. +static void myp_add_unlock(void) { + if (myp_tracking == 0) + return; + + netdata_mutex_unlock(&myp_lock); +} + +// myp_add_locked adds pid if we're tracking. +// myp_add_lock must have been called previously. +static void myp_add_locked(pid_t pid) { + struct mypopen *mp; + + if (myp_tracking == 0) + return; + + mp = mallocz(sizeof(struct mypopen)); mp->pid = pid; - mp->next = popen_root; + + mp->next = mypopen_root; mp->prev = NULL; - if(mypopen_root) mypopen_root->prev = mp; + if (mypopen_root != NULL) + mypopen_root->prev = mp; mypopen_root = mp; + netdata_mutex_unlock(&myp_lock); } -static void mypopen_del(FILE *fp) { +// myp_del deletes pid if we're tracking. +static void myp_del(pid_t pid) { struct mypopen *mp; - for(mp = mypopen_root; mp; mp = mp->next) - if(mp->fd == fp) break; + if (myp_tracking == 0) + return; - if(!mp) error("Cannot find mypopen() file pointer in open childs."); - else { - if(mp->next) mp->next->prev = mp->prev; - if(mp->prev) mp->prev->next = mp->next; - if(mypopen_root == mp) mypopen_root = mp->next; - free(mp); + netdata_mutex_lock(&myp_lock); + for (mp = mypopen_root; mp != NULL; mp = mp->next) { + if (mp->pid == pid) { + if (mp->next != NULL) + mp->next->prev = mp->prev; + if (mp->prev != NULL) + mp->prev->next = mp->next; + if (mypopen_root == mp) + mypopen_root = mp->next; + freez(mp); + break; + } } + + if (mp == NULL) + error("Cannot find pid %d.", pid); + + netdata_mutex_unlock(&myp_lock); } -*/ + #define PIPE_READ 0 #define PIPE_WRITE 1 @@ -58,7 +91,7 @@ static inline FILE *custom_popene(const char *command, volatile pid_t *pidptr, c posix_spawnattr_t attr; posix_spawn_file_actions_t fa; - if(pipe(pipefd) == -1) + if (pipe(pipefd) == -1) return NULL; if ((fp = fdopen(pipefd[PIPE_READ], "r")) == NULL) { goto error_after_pipe; @@ -66,7 +99,7 @@ static inline FILE *custom_popene(const char *command, volatile pid_t *pidptr, c // Mark all files to be closed by the exec() stage of posix_spawn() int i; - for(i = (int) (sysconf(_SC_OPEN_MAX) - 1); i >= 0; i--) + for (i = (int) (sysconf(_SC_OPEN_MAX) - 1); i >= 0; i--) if(i != STDIN_FILENO && i != STDERR_FILENO) (void)fcntl(i, F_SETFD, FD_CLOEXEC); @@ -92,10 +125,16 @@ static inline FILE *custom_popene(const char *command, volatile pid_t *pidptr, c } else { error("posix_spawnattr_init() failed."); } + + // Take the lock while we fork to ensure we don't race with SIGCHLD + // delivery on a process which exits quickly. + myp_add_lock(); if (!posix_spawn(&pid, "/bin/sh", &fa, &attr, spawn_argv, env)) { *pidptr = pid; + myp_add_locked(pid); debug(D_CHILDS, "Spawned command: '%s' on pid %d from parent pid %d.", command, pid, getpid()); } else { + myp_add_unlock(); error("Failed to spawn command: '%s' from parent pid %d.", command, getpid()); fclose(fp); fp = NULL; @@ -128,6 +167,60 @@ error_after_pipe: // See man environ extern char **environ; +// myp_init should be called by apps which act as init +// (pid 1) so that processes created by mypopen and mypopene +// are tracked. This enables the reaper to ignore processes +// which will be handled internally, by calling myp_reap, to +// avoid issues with already reaped processes during wait calls. +// +// Callers should call myp_free() to clean up resources. +void myp_init(void) { + info("process tracking enabled."); + myp_tracking = 1; + + if (netdata_mutex_init(&myp_lock) != 0) { + fatal("myp_init() mutex init failed."); + } +} + +// myp_free cleans up any resources allocated for process +// tracking. +void myp_free(void) { + struct mypopen *mp, *next; + + if (myp_tracking == 0) + return; + + netdata_mutex_lock(&myp_lock); + for (mp = mypopen_root; mp != NULL; mp = next) { + next = mp->next; + freez(mp); + } + + mypopen_root = NULL; + myp_tracking = 0; + netdata_mutex_unlock(&myp_lock); +} + +// myp_reap returns 1 if pid should be reaped, 0 otherwise. +int myp_reap(pid_t pid) { + struct mypopen *mp; + + if (myp_tracking == 0) + return 0; + + netdata_mutex_lock(&myp_lock); + for (mp = mypopen_root; mp != NULL; mp = mp->next) { + if (mp->pid == pid) { + netdata_mutex_unlock(&myp_lock); + return 0; + } + } + netdata_mutex_unlock(&myp_lock); + + return 1; +} + FILE *mypopen(const char *command, volatile pid_t *pidptr) { return custom_popene(command, pidptr, environ); } @@ -137,9 +230,10 @@ FILE *mypopene(const char *command, volatile pid_t *pidptr, char **env) { } int mypclose(FILE *fp, pid_t pid) { - debug(D_EXIT, "Request to mypclose() on pid %d", pid); + int ret; + siginfo_t info; - /*mypopen_del(fp);*/ + debug(D_EXIT, "Request to mypclose() on pid %d", pid); // close the pipe fd // this is required in musl @@ -151,9 +245,11 @@ int mypclose(FILE *fp, pid_t pid) { errno = 0; - siginfo_t info; - if(waitid(P_PID, (id_t) pid, &info, WEXITED) != -1) { - switch(info.si_code) { + ret = waitid(P_PID, (id_t) pid, &info, WEXITED); + myp_del(pid); + + if (ret != -1) { + switch (info.si_code) { case CLD_EXITED: if(info.si_status) error("child pid %d exited with code %d.", info.si_pid, info.si_status); diff --git a/libnetdata/popen/popen.h b/libnetdata/popen/popen.h index 90d4b829b..32f64e460 100644 --- a/libnetdata/popen/popen.h +++ b/libnetdata/popen/popen.h @@ -11,6 +11,9 @@ extern FILE *mypopen(const char *command, volatile pid_t *pidptr); extern FILE *mypopene(const char *command, volatile pid_t *pidptr, char **env); extern int mypclose(FILE *fp, pid_t pid); +extern void myp_init(void); +extern void myp_free(void); +extern int myp_reap(pid_t pid); extern void signals_unblock(void); extern void signals_reset(void); diff --git a/libnetdata/required_dummies.h b/libnetdata/required_dummies.h new file mode 100644 index 000000000..aa87e3964 --- /dev/null +++ b/libnetdata/required_dummies.h @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LIB_DUMMIES_H +#define NETDATA_LIB_DUMMIES_H 1 + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret) +{ + exit(ret); +} + +void send_statistics(const char *action, const char *action_result, const char *action_data) +{ + (void)action; + (void)action_result; + (void)action_data; + return; +} + +// callbacks required by popen() +void signals_block(void){}; +void signals_unblock(void){}; +void signals_reset(void){}; + +// callback required by eval() +int health_variable_lookup(const char *variable, uint32_t hash, struct rrdcalc *rc, calculated_number *result) +{ + (void)variable; + (void)hash; + (void)rc; + (void)result; + return 0; +}; + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + +#endif // NETDATA_LIB_DUMMIES_H diff --git a/libnetdata/storage_number/Makefile.am b/libnetdata/storage_number/Makefile.am index 1cb69ed99..349dd71f1 100644 --- a/libnetdata/storage_number/Makefile.am +++ b/libnetdata/storage_number/Makefile.am @@ -3,6 +3,9 @@ AUTOMAKE_OPTIONS = subdir-objects MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +SUBDIRS = \ + tests \ + $(NULL) dist_noinst_DATA = \ README.md \ diff --git a/libnetdata/storage_number/tests/Makefile.am b/libnetdata/storage_number/tests/Makefile.am new file mode 100644 index 000000000..babdcf0df --- /dev/null +++ b/libnetdata/storage_number/tests/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/libnetdata/storage_number/tests/test_storage_number.c b/libnetdata/storage_number/tests/test_storage_number.c new file mode 100644 index 000000000..61a0c1880 --- /dev/null +++ b/libnetdata/storage_number/tests/test_storage_number.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../../libnetdata.h" +#include "../../required_dummies.h" +#include <setjmp.h> +#include <cmocka.h> + +static void test_number_pinting(void **state) +{ + (void)state; + + char value[50]; + + print_calculated_number(value, 0); + assert_string_equal(value, "0"); + + print_calculated_number(value, 0.0000001); + assert_string_equal(value, "0.0000001"); + + print_calculated_number(value, 0.00000009); + assert_string_equal(value, "0.0000001"); + + print_calculated_number(value, 0.000000001); + assert_string_equal(value, "0"); + + print_calculated_number(value, 99.99999999999999999); + assert_string_equal(value, "100"); + + print_calculated_number(value, -99.99999999999999999); + assert_string_equal(value, "-100"); + + print_calculated_number(value, 123.4567890123456789); + assert_string_equal(value, "123.456789"); + + print_calculated_number(value, 9999.9999999); + assert_string_equal(value, "9999.9999999"); + + print_calculated_number(value, -9999.9999999); + assert_string_equal(value, "-9999.9999999"); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_number_pinting) + }; + + return cmocka_run_group_tests_name("storage_number", tests, NULL, NULL); +} diff --git a/libnetdata/tests/Makefile.am b/libnetdata/tests/Makefile.am new file mode 100644 index 000000000..babdcf0df --- /dev/null +++ b/libnetdata/tests/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/libnetdata/tests/test_str2ld.c b/libnetdata/tests/test_str2ld.c new file mode 100644 index 000000000..9d59f6c0e --- /dev/null +++ b/libnetdata/tests/test_str2ld.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" +#include "../required_dummies.h" +#include <setjmp.h> +#include <cmocka.h> + +static void test_str2ld(void **state) +{ + (void)state; + char *values[] = { + "1.2345678", + "-35.6", + "0.00123", + "23842384234234.2", + ".1", + "1.2e-10", + "hello", + "1wrong", + "nan", + "inf", + NULL + }; + + for (int i = 0; values[i]; i++) { + char *e_mine = "hello", *e_sys = "world"; + LONG_DOUBLE mine = str2ld(values[i], &e_mine); + LONG_DOUBLE sys = strtold(values[i], &e_sys); + + if (isnan(mine)) + assert_true(isnan(sys)); + else if (isinf(mine)) + assert_true(isinf(sys)); + else if (mine != sys) + assert_false(abs(mine - sys) > 0.000001); + + assert_ptr_equal(e_mine, e_sys); + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_str2ld) + }; + + return cmocka_run_group_tests_name("str2ld", tests, NULL, NULL); +} diff --git a/netdata-installer.sh b/netdata-installer.sh index 4c3aeee71..bf7ac4048 100755 --- a/netdata-installer.sh +++ b/netdata-installer.sh @@ -1050,63 +1050,20 @@ END echo >&2 "Uninstall script copied to: ${TPUT_RED}${TPUT_BOLD}${NETDATA_PREFIX}/usr/libexec/netdata/netdata-uninstaller.sh${TPUT_RESET}" echo >&2 -progress "Install netdata updater tool" - -if [ -f "${NETDATA_PREFIX}"/usr/libexec/netdata-updater.sh ]; then - echo >&2 "Removing updater from previous location" - rm -f "${NETDATA_PREFIX}"/usr/libexec/netdata-updater.sh -fi +# ----------------------------------------------------------------------------- +progress "Install (but not enable) netdata updater tool" +cleanup_old_netdata_updater || run_failed "Cannot cleanup old netdata updater tool." +install_netdata_updater || run_failed "Cannot install netdata updater tool." -if [ -f "${INSTALLER_DIR}/packaging/installer/netdata-updater.sh" ]; then - sed "s|THIS_SHOULD_BE_REPLACED_BY_INSTALLER_SCRIPT|${NETDATA_USER_CONFIG_DIR}/.environment|" "${INSTALLER_DIR}/packaging/installer/netdata-updater.sh" > "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" || exit 1 +progress "Check if we must enable/disable the netdata updater tool" +if [ "${AUTOUPDATE}" = "1" ]; then + enable_netdata_updater || run_failed "Cannot enable netdata updater tool" else - sed "s|THIS_SHOULD_BE_REPLACED_BY_INSTALLER_SCRIPT|${NETDATA_USER_CONFIG_DIR}/.environment|" "${NETDATA_SOURCE_DIR}/packaging/installer/netdata-updater.sh" > "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" || exit 1 + disable_netdata_updater || run_failed "Cannot disable netdata updater tool" fi -chmod 0755 ${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh -echo >&2 "Update script is located at ${TPUT_GREEN}${TPUT_BOLD}${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh${TPUT_RESET}" -echo >&2 - -# Figure out the cron directory for the distro -crondir= -[ -d "/etc/periodic/daily" ] && crondir="/etc/periodic/daily" -[ -d "/etc/cron.daily" ] && crondir="/etc/cron.daily" - -if [ -z "${crondir}" ]; then - echo >&2 "Cannot figure out the cron directory to handle netdata-updater.sh activation/deactivation" -elif [ "${UID}" -ne "0" ]; then - # We cant touch cron if we are not running as root - echo >&2 "You need to run the installer as root for auto-updating via cron." -else - progress "Check if we must enable/disable the netdata updater" - if [ "${AUTOUPDATE}" = "1" ]; then - if [ -f "${crondir}/netdata-updater.sh" ]; then - progress "Removing incorrect netdata-updater filename in cron" - rm -f "${crondir}/netdata-updater.sh" - fi - - echo >&2 "Adding to cron" - - rm -f "${crondir}/netdata-updater" - ln -sf "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" "${crondir}/netdata-updater" - - echo >&2 "Auto-updating has been enabled. Updater script linked to: ${TPUT_RED}${TPUT_BOLD}${crondir}/netdata-update${TPUT_RESET}" - echo >&2 - echo >&2 "${TPUT_DIM}${TPUT_BOLD}netdata-updater.sh${TPUT_RESET}${TPUT_DIM} works from cron. It will trigger an email from cron" - echo >&2 "only if it fails (it should not print anything when it can update netdata).${TPUT_RESET}" - else - echo >&2 "You chose *NOT* to enable auto-update, removing any links to the updater from cron (it may have happened if you are reinstalling)" - echo >&2 - - if [ -f "${crondir}/netdata-updater" ]; then - echo >&2 "Removing cron reference: ${crondir}/netdata-updater" - rm -f "${crondir}/netdata-updater" - else - echo >&2 "Did not find any cron entries to remove" - fi - fi -fi +# ----------------------------------------------------------------------------- progress "Wrap up environment set up" # Save environment variables diff --git a/packaging/installer/README.md b/packaging/installer/README.md index 22633895f..eb6d89423 100644 --- a/packaging/installer/README.md +++ b/packaging/installer/README.md @@ -333,7 +333,7 @@ This is how to install the latest Netdata version from sources on FreeBSD: ```sh # install required packages -pkg install bash e2fsprogs-libuuid git curl autoconf automake pkgconf pidof +pkg install bash e2fsprogs-libuuid git curl autoconf automake pkgconf pidof Judy liblz4 libuv json-c # download Netdata git clone https://github.com/netdata/netdata.git --depth=100 @@ -347,15 +347,17 @@ cd netdata To install Netdata on pfSense, run the following commands (within a shell or under the **Diagnostics/Command** prompt within the pfSense web interface). -Note that the first four packages are downloaded from the pfSense repository for maintaining compatibility with pfSense, Netdata and Python are downloaded from the FreeBSD repository. +Note that the first four packages are downloaded from the pfSense repository for maintaining compatibility with pfSense, Netdata, Judy and Python are downloaded from the FreeBSD repository. ```sh pkg install pkgconf pkg install bash pkg install e2fsprogs-libuuid pkg install libuv +pkg add http://pkg.freebsd.org/FreeBSD:11:amd64/latest/All/Judy-1.0.5_2.txz pkg add http://pkg.freebsd.org/FreeBSD:11:amd64/latest/All/python36-3.6.9.txz -pkg add http://pkg.freebsd.org/FreeBSD:11:amd64/latest/All/netdata-1.15.0.txz +ln -s /usr/local/lib/libjson-c.so /usr/local/lib/libjson-c.so.4 +pkg add http://pkg.freebsd.org/FreeBSD:11:amd64/latest/All/netdata-1.17.1.txz ``` **Note:** If you receive a ` Not Found` error during the last two commands above, you will either need to manually look in the [repo folder](http://pkg.freebsd.org/FreeBSD:11:amd64/latest/All/) for the latest available package and use its URL instead, or you can try manually changing the netdata version in the URL to the latest version. diff --git a/packaging/installer/functions.sh b/packaging/installer/functions.sh index 2e3044340..03dd8dc89 100644 --- a/packaging/installer/functions.sh +++ b/packaging/installer/functions.sh @@ -727,3 +727,100 @@ safe_sha256sum() { fatal "I could not find a suitable checksum binary to use" fi } + +get_crondir() { + crondir= + [ -d "/etc/periodic/daily" ] && crondir="/etc/periodic/daily" + [ -d "/etc/cron.daily" ] && crondir="/etc/cron.daily" + + echo "${crondir}" +} + +check_crondir_permissions() { + if [ -z "${1}" ]; then + echo >&2 "Cannot figure out the cron directory to handle netdata-updater.sh activation/deactivation" + return 1 + elif [ "${UID}" -ne "0" ]; then + # We cant touch cron if we are not running as root + echo >&2 "You need to run the installer as root for auto-updating via cron" + return 1 + fi + + return 0 +} + +install_netdata_updater() { + if [ "${INSTALLER_DIR}" ] && [ -f "${INSTALLER_DIR}/packaging/installer/netdata-updater.sh" ]; then + cat "${INSTALLER_DIR}/packaging/installer/netdata-updater.sh" > "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" || return 1 + fi + + if [ "${NETDATA_SOURCE_DIR}" ] && [ -f "${NETDATA_SOURCE_DIR}/packaging/installer/netdata-updater.sh" ]; then + cat "${NETDATA_SOURCE_DIR}/packaging/installer/netdata-updater.sh" > "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" || return 1 + fi + + sed -e "s|THIS_SHOULD_BE_REPLACED_BY_INSTALLER_SCRIPT|${NETDATA_USER_CONFIG_DIR}/.environment|" -i "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" || return 1 + + chmod 0755 ${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh + echo >&2 "Update script is located at ${TPUT_GREEN}${TPUT_BOLD}${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh${TPUT_RESET}" + echo >&2 + + return 0 +} + +cleanup_old_netdata_updater() { + if [ -f "${NETDATA_PREFIX}"/usr/libexec/netdata-updater.sh ]; then + echo >&2 "Removing updater from deprecated location" + rm -f "${NETDATA_PREFIX}"/usr/libexec/netdata-updater.sh + fi + + crondir="$(get_crondir)" + check_crondir_permissions "${crondir}" || return 1 + + if [ -f "${crondir}/netdata-updater.sh" ]; then + echo >&2 "Removing incorrect netdata-updater filename in cron" + rm -f "${crondir}/netdata-updater.sh" + fi + + return 0 +} + +enable_netdata_updater() { + crondir="$(get_crondir)" + check_crondir_permissions "${crondir}" || return 1 + + echo >&2 "Adding to cron" + + rm -f "${crondir}/netdata-updater" + ln -sf "${NETDATA_PREFIX}/usr/libexec/netdata/netdata-updater.sh" "${crondir}/netdata-updater" + + echo >&2 "Auto-updating has been enabled. Updater script linked to: ${TPUT_RED}${TPUT_BOLD}${crondir}/netdata-update${TPUT_RESET}" + echo >&2 + echo >&2 "${TPUT_DIM}${TPUT_BOLD}netdata-updater.sh${TPUT_RESET}${TPUT_DIM} works from cron. It will trigger an email from cron" + echo >&2 "only if it fails (it should not print anything when it can update netdata).${TPUT_RESET}" + echo >&2 + + return 0 +} + +disable_netdata_updater() { + crondir="$(get_crondir)" + check_crondir_permissions "${crondir}" || return 1 + + echo >&2 "You chose *NOT* to enable auto-update, removing any links to the updater from cron (it may have happened if you are reinstalling)" + echo >&2 + + if [ -f "${crondir}/netdata-updater" ]; then + echo >&2 "Removing cron reference: ${crondir}/netdata-updater" + echo >&2 + rm -f "${crondir}/netdata-updater" + else + echo >&2 "Did not find any cron entries to remove" + echo >&2 + fi + + return 0 +} + +set_netdata_updater_channel() { + sed -e "s/^RELEASE_CHANNEL=.*/RELEASE_CHANNEL=\"${RELEASE_CHANNEL}\"/" -i "${NETDATA_USER_CONFIG_DIR}/.environment" +} diff --git a/packaging/makeself/install-or-update.sh b/packaging/makeself/install-or-update.sh index 9796eb085..393525554 100755 --- a/packaging/makeself/install-or-update.sh +++ b/packaging/makeself/install-or-update.sh @@ -9,6 +9,9 @@ umask 002 # Be nice on production environments renice 19 $$ >/dev/null 2>/dev/null +NETDATA_PREFIX="/opt/netdata" +NETDATA_USER_CONFIG_DIR="${NETDATA_PREFIX}/etc/netdata" + # ----------------------------------------------------------------------------- if [ -d /opt/netdata/etc/netdata.old ]; then progress "Found old etc/netdata directory, reinstating this" @@ -21,16 +24,17 @@ if [ -d /opt/netdata/etc/netdata.old ]; then fi STARTIT=1 - -while [ ! -z "${1}" ] -do - if [ "${1}" = "--dont-start-it" ] - then - STARTIT=0 - else - echo >&2 "Unknown option '${1}'. Ignoring it." - fi - shift +AUTOUPDATE=0 +RELEASE_CHANNEL="nightly" + +while [ "${1}" ]; do + case "${1}" in + "--dont-start-it") STARTIT=0;; + "--auto-update"|"-u") AUTOUPDATE=1;; + "--stable-channel") RELEASE_CHANNEL="stable";; + *) echo >&2 "Unknown option '${1}'. Ignoring it.";; + esac + shift 1 done deleted_stock_configs=0 @@ -137,6 +141,22 @@ progress "Install netdata at system init" install_netdata_service || run_failed "Cannot install netdata init service." +set_netdata_updater_channel || run_failed "Cannot set netdata updater tool release channel to '${RELEASE_CHANNEL}'" + + +# ----------------------------------------------------------------------------- +progress "Install (but not enable) netdata updater tool" +cleanup_old_netdata_updater || run_failed "Cannot cleanup old netdata updater tool." +install_netdata_updater || run_failed "Cannot install netdata updater tool." + +progress "Check if we must enable/disable the netdata updater tool" +if [ "${AUTOUPDATE}" = "1" ]; then + enable_netdata_updater || run_failed "Cannot enable netdata updater tool" +else + disable_netdata_updater || run_failed "Cannot disable netdata updater tool" +fi + + # ----------------------------------------------------------------------------- progress "creating quick links" diff --git a/packaging/version b/packaging/version index f72019128..2a34c700c 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.18.0 +v1.18.1 diff --git a/web/api/netdata-swagger.json b/web/api/netdata-swagger.json index 1a0ec8638..8ec0a3121 100644 --- a/web/api/netdata-swagger.json +++ b/web/api/netdata-swagger.json @@ -21,13 +21,13 @@ "description": "The info endpoint returns basic information about netdata. It provides:\n* netdata version\n* netdata unique id\n* list of hosts mirrored (includes itself)\n* Operating System, Virtualization and Container technology information\n* List of active collector plugins and modules\n* number of alarms in the host\n * number of alarms in normal state\n * number of alarms in warning state\n * number of alarms in critical state\n", "responses": { "200": { - "description": "netdata basic information", + "description": "netdata basic information.", "schema": { "$ref": "#/definitions/info" } }, "503": { - "description": "netdata daemon not ready (used for health checks)" + "description": "netdata daemon not ready (used for health checks)." } } } @@ -38,7 +38,7 @@ "description": "The charts endpoint returns a summary about all charts stored in the netdata server.", "responses": { "200": { - "description": "An array of charts", + "description": "An array of charts.", "schema": { "type": "array", "items": { @@ -71,6 +71,9 @@ "$ref": "#/definitions/chart" } }, + "400": { + "description": "No chart id was supplied in the request." + }, "404": { "description": "No chart with the given id is found." } @@ -80,7 +83,7 @@ "/alarm_variables": { "get": { "summary": "List variables available to configure alarms for a chart", - "description": "Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family", + "description": "Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family.", "parameters": [ { "name": "chart", @@ -93,7 +96,7 @@ ], "responses": { "200": { - "description": "A javascript object with information about the chart and the available variables", + "description": "A javascript object with information about the chart and the available variables.", "schema": { "$ref": "#/definitions/alarm_variables" } @@ -113,7 +116,7 @@ "/data": { "get": { "summary": "Get collected data for a specific chart", - "description": "The Data endpoint returns data stored in the round robin database of a chart.\n", + "description": "The data endpoint returns data stored in the round robin database of a chart.", "parameters": [ { "name": "chart", @@ -128,7 +131,7 @@ { "name": "dimension", "in": "query", - "description": "zero, one or more dimension ids or names, as returned by the /chart call, separated with comma or pipe. Netdata simple patterns are supported.", + "description": "Zero, one or more dimension ids or names, as returned by the /chart call, separated with comma or pipe. Netdata simple patterns are supported.", "required": false, "type": "array", "items": { @@ -281,9 +284,9 @@ ], "responses": { "200": { - "description": "The call was successful. The response should include the data.", + "description": "The call was successful. The response includes the data in the format requested. Swagger2.0 does not process the discriminator field to show polymorphism. The response will be one of the sub-types of the data-schema according to the chosen format, e.g. json -> data_json.", "schema": { - "$ref": "#/definitions/chart" + "$ref": "#/definitions/data" } }, "400": { @@ -301,7 +304,7 @@ "/badge.svg": { "get": { "summary": "Generate a SVG image for a chart (or dimension)", - "description": "Successful responses are SVG images\n", + "description": "Successful responses are SVG images.", "parameters": [ { "name": "chart", @@ -316,7 +319,7 @@ { "name": "alarm", "in": "query", - "description": "the name of an alarm linked to the chart", + "description": "The name of an alarm linked to the chart.", "required": false, "type": "string", "format": "any text", @@ -325,7 +328,7 @@ { "name": "dimension", "in": "query", - "description": "zero, one or more dimension ids, as returned by the /chart call.", + "description": "Zero, one or more dimension ids, as returned by the /chart call.", "required": false, "type": "array", "items": { @@ -399,7 +402,7 @@ { "name": "label", "in": "query", - "description": "a text to be used as the label", + "description": "A text to be used as the label.", "required": false, "type": "string", "format": "any text", @@ -408,7 +411,7 @@ { "name": "units", "in": "query", - "description": "a text to be used as the units", + "description": "A text to be used as the units.", "required": false, "type": "string", "format": "any text", @@ -417,7 +420,7 @@ { "name": "label_color", "in": "query", - "description": "a color to be used for the background of the label", + "description": "A color to be used for the background of the label.", "required": false, "type": "string", "format": "any text", @@ -426,7 +429,7 @@ { "name": "value_color", "in": "query", - "description": "a color to be used for the background of the label. You can set multiple using a pipe with a condition each, like this: color<value|color>value|color:null The following operators are supported: >, <, >=, <=, =, :null (to check if no value exists).", + "description": "A color to be used for the background of the label. You can set multiple using a pipe with a condition each, like this: color<value|color>value|color:null The following operators are supported: >, <, >=, <=, =, :null (to check if no value exists).", "required": false, "type": "string", "format": "any text", @@ -435,7 +438,7 @@ { "name": "multiply", "in": "query", - "description": "multiply the value with this number for rendering it at the image (integer value required)", + "description": "Multiply the value with this number for rendering it at the image (integer value required).", "required": false, "type": "number", "format": "integer", @@ -444,7 +447,7 @@ { "name": "divide", "in": "query", - "description": "divide the value with this number for rendering it at the image (integer value required)", + "description": "Divide the value with this number for rendering it at the image (integer value required).", "required": false, "type": "number", "format": "integer", @@ -453,7 +456,7 @@ { "name": "scale", "in": "query", - "description": "set the scale of the badge (greater or equal to 100)", + "description": "Set the scale of the badge (greater or equal to 100).", "required": false, "type": "number", "format": "integer", @@ -479,12 +482,12 @@ "/allmetrics": { "get": { "summary": "Get a value of all the metrics maintained by netdata", - "description": "The charts endpoint returns the latest value of all charts and dimensions stored in the netdata server.", + "description": "The allmetrics endpoint returns the latest value of all charts and dimensions stored in the netdata server.", "parameters": [ { "name": "format", "in": "query", - "description": "The format of the response to be returned", + "description": "The format of the response to be returned.", "required": true, "type": "string", "enum": [ @@ -498,7 +501,7 @@ { "name": "help", "in": "query", - "description": "enable or disable HELP lines in prometheus output", + "description": "Enable or disable HELP lines in prometheus output.", "required": false, "type": "string", "enum": [ @@ -510,7 +513,7 @@ { "name": "types", "in": "query", - "description": "enable or disable TYPE lines in prometheus output", + "description": "Enable or disable TYPE lines in prometheus output.", "required": false, "type": "string", "enum": [ @@ -522,7 +525,7 @@ { "name": "timestamps", "in": "query", - "description": "enable or disable timestamps in prometheus output", + "description": "Enable or disable timestamps in prometheus output.", "required": false, "type": "string", "enum": [ @@ -546,7 +549,7 @@ { "name": "oldunits", "in": "query", - "description": "When enabled, netdata will show metric names for the default source=average as they appeared before 1.12, by using the legacy unit naming conventions", + "description": "When enabled, netdata will show metric names for the default source=average as they appeared before 1.12, by using the legacy unit naming conventions.", "required": false, "type": "string", "enum": [ @@ -586,7 +589,7 @@ { "name": "data", "in": "query", - "description": "Select the prometheus response data source. The default is controlled in netdata.conf", + "description": "Select the prometheus response data source. There is a setting in netdata.conf for the default.", "required": false, "type": "string", "enum": [ @@ -599,10 +602,10 @@ ], "responses": { "200": { - "description": "All the metrics returned in the format requested" + "description": "All the metrics returned in the format requested." }, "400": { - "description": "The format requested is not supported" + "description": "The format requested is not supported." } } } @@ -615,7 +618,7 @@ { "name": "all", "in": "query", - "description": "If passed, all enabled alarms are returned", + "description": "If passed, all enabled alarms are returned.", "required": false, "type": "boolean", "allowEmptyValue": true @@ -623,7 +626,7 @@ ], "responses": { "200": { - "description": "An object containing general info and a linked list of alarms", + "description": "An object containing general info and a linked list of alarms.", "schema": { "$ref": "#/definitions/alarms" } @@ -639,14 +642,14 @@ { "name": "after", "in": "query", - "description": "Passing the parameter after=UNIQUEID returns all the events in the alarm log that occurred after UNIQUEID. An automated series of calls would call the interface once without after=, store the last UNIQUEID of the returned set, and give it back to get incrementally the next events", + "description": "Passing the parameter after=UNIQUEID returns all the events in the alarm log that occurred after UNIQUEID. An automated series of calls would call the interface once without after=, store the last UNIQUEID of the returned set, and give it back to get incrementally the next events.", "required": false, "type": "integer" } ], "responses": { "200": { - "description": "An array of alarm log entries", + "description": "An array of alarm log entries.", "schema": { "type": "array", "items": { @@ -665,7 +668,7 @@ { "in": "query", "name": "context", - "description": "Specify context which should be checked", + "description": "Specify context which should be checked.", "required": false, "allowEmptyValue": true, "type": "array", @@ -680,7 +683,7 @@ { "in": "query", "name": "status", - "description": "Specify alarm status to count", + "description": "Specify alarm status to count.", "required": false, "allowEmptyValue": true, "type": "string", @@ -698,7 +701,7 @@ ], "responses": { "200": { - "description": "An object containing a count of alarms with given status for given contexts", + "description": "An object containing a count of alarms with given status for given contexts.", "schema": { "type": "array", "items": { @@ -741,7 +744,7 @@ { "name": "chart", "in": "query", - "description": "Chart ids/names, as shown on the dashboard. These will match the `on` entry of a configured `alarm`", + "description": "Chart ids/names, as shown on the dashboard. These will match the `on` entry of a configured `alarm`.", "type": "string" }, { @@ -765,7 +768,7 @@ ], "responses": { "200": { - "description": "A plain text response based on the result of the command" + "description": "A plain text response based on the result of the command." }, "403": { "description": "Bearer authentication error." @@ -790,7 +793,7 @@ }, "mirrored_hosts": { "type": "array", - "description": "list of hosts mirrored of the server (include itself).", + "description": "List of hosts mirrored of the server (include itself).", "items": { "type": "string" }, @@ -801,83 +804,83 @@ }, "os_name": { "type": "string", - "description": "Operating System Name", + "description": "Operating System Name.", "example": "Manjaro Linux" }, "os_id": { "type": "string", - "description": "Operating System ID", + "description": "Operating System ID.", "example": "manjaro" }, "os_id_like": { "type": "string", - "description": "Known OS similar to this OS", + "description": "Known OS similar to this OS.", "example": "arch" }, "os_version": { "type": "string", - "description": "Operating System Version", + "description": "Operating System Version.", "example": "18.0.4" }, "os_version_id": { "type": "string", - "description": "Operating System Version ID", + "description": "Operating System Version ID.", "example": "unknown" }, "os_detection": { "type": "string", - "description": "OS parameters detection method", + "description": "OS parameters detection method.", "example": "Mixed" }, "kernel_name": { "type": "string", - "description": "Kernel Name", + "description": "Kernel Name.", "example": "Linux" }, "kernel_version": { "type": "string", - "description": "Kernel Version", + "description": "Kernel Version.", "example": "4.19.32-1-MANJARO" }, "architecture": { "type": "string", - "description": "Kernel architecture", + "description": "Kernel architecture.", "example": "x86_64" }, "virtualization": { "type": "string", - "description": "Virtualization Type", + "description": "Virtualization Type.", "example": "kvm" }, "virt_detection": { "type": "string", - "description": "Virtualization detection method", + "description": "Virtualization detection method.", "example": "systemd-detect-virt" }, "container": { "type": "string", - "description": "Container technology", + "description": "Container technology.", "example": "docker" }, "container_detection": { "type": "string", - "description": "Container technology detection method", + "description": "Container technology detection method.", "example": "dockerenv" }, "collectors": { "type": "array", "items": { "type": "object", - "description": "Array of collector plugins and modules", + "description": "Array of collector plugins and modules.", "properties": { "plugin": { "type": "string", - "description": "Collector plugin", + "description": "Collector plugin.", "example": "python.d.plugin" }, "module": { "type": "string", - "description": "Module of the collector plugin", + "description": "Module of the collector plugin.", "example": "dockerd" } } @@ -885,19 +888,19 @@ }, "alarms": { "type": "object", - "description": "number of alarms in the server.", + "description": "Number of alarms in the server.", "properties": { "normal": { "type": "integer", - "description": "number of alarms in normal state." + "description": "Number of alarms in normal state." }, "warning": { "type": "integer", - "description": "number of alarms in warning state." + "description": "Number of alarms in warning state." }, "critical": { "type": "integer", - "description": "number of alarms in critical state." + "description": "Number of alarms in critical state." } } } @@ -914,6 +917,15 @@ "type": "string", "description": "netdata version of the server." }, + "release_channel": { + "type": "string", + "description": "The release channel of the build on the server.", + "example": "nightly" + }, + "timezone": { + "type": "string", + "description": "The current timezone on the server." + }, "os": { "type": "string", "description": "The netdata server host operating system.", @@ -927,6 +939,10 @@ "type": "number", "description": "The duration, in seconds, of the round robin database maintained by netdata." }, + "memory_mode": { + "type": "string", + "description": "The name of the database memory mode on the server." + }, "update_every": { "type": "number", "description": "The default update frequency of the netdata server. All charts have an update frequency equal or bigger than this." @@ -963,11 +979,11 @@ "properties": { "id": { "type": "string", - "description": "The unique id of the chart" + "description": "The unique id of the chart." }, "name": { "type": "string", - "description": "The name of the chart" + "description": "The name of the chart." }, "type": { "type": "string", @@ -1041,11 +1057,11 @@ }, "green": { "type": "number", - "description": "Chart health green threshold" + "description": "Chart health green threshold." }, "red": { "type": "number", - "description": "Chart health red trheshold" + "description": "Chart health red threshold." } } }, @@ -1054,15 +1070,15 @@ "properties": { "chart": { "type": "string", - "description": "The unique id of the chart" + "description": "The unique id of the chart." }, "chart_name": { "type": "string", - "description": "The name of the chart" + "description": "The name of the chart." }, "cnart_context": { "type": "string", - "description": "The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates" + "description": "The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates." }, "family": { "type": "string", @@ -1126,24 +1142,26 @@ "properties": { "name": { "type": "string", - "description": "The name of the dimension" + "description": "The name of the dimension." } } }, - "json_wrap": { + "data": { "type": "object", + "discriminator": "format", + "description": "Response will contain the appropriate subtype, e.g. data_json depending on the requested format.", "properties": { "api": { "type": "number", - "description": "The API version this conforms to, currently 1" + "description": "The API version this conforms to, currently 1." }, "id": { "type": "string", - "description": "The unique id of the chart" + "description": "The unique id of the chart." }, "name": { "type": "string", - "description": "The name of the chart" + "description": "The name of the chart." }, "update_every": { "type": "number", @@ -1224,12 +1242,158 @@ "$ref": "#/definitions/chart_variables" } } - }, - "result": { - "description": "The result requested, in the format requested." } } }, + "data_json": { + "description": "Data response in json format.", + "allOf": [ + { + "$ref": "#/definitions/data" + }, + { + "properties": { + "result": { + "type": "object", + "properties": { + "labels": { + "description": "The dimensions retrieved from the chart.", + "type": "array", + "items": { + "type": "string" + } + }, + "data": { + "description": "The data requested, one element per sample with each element containing the values of the dimensions described in the labels value.", + "type": "array", + "items": { + "type": "number" + } + } + }, + "description": "The result requested, in the format requested." + } + } + } + ] + }, + "data_flat": { + "description": "Data response in csv / tsv / tsv-excel / ssv / ssv-comma / markdown / html formats.", + "allOf": [ + { + "$ref": "#/definitions/data" + }, + { + "properties": { + "result": { + "type": "string" + } + } + } + ] + }, + "data_array": { + "description": "Data response in array format.", + "allOf": [ + { + "$ref": "#/definitions/data" + }, + { + "properties": { + "result": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + ] + }, + "data_csvjsonarray": { + "description": "Data response in csvjsonarray format.", + "allOf": [ + { + "$ref": "#/definitions/data" + }, + { + "properties": { + "result": { + "description": "The first inner array contains strings showing the labels of each column, each subsequent array contains the values for each point in time.", + "type": "array", + "items": { + "type": "array", + "items": {} + } + } + } + } + ] + }, + "data_datatable": { + "description": "Data response in datatable / datasource formats (suitable for Google Charts).", + "allOf": [ + { + "$ref": "#/definitions/data" + }, + { + "properties": { + "result": { + "type": "object", + "properties": { + "cols": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "description": "Always empty - for future use." + }, + "label": { + "description": "The dimension returned from the chart." + }, + "pattern": { + "description": "Always empty - for future use." + }, + "type": { + "description": "The type of data in the column / chart-dimension." + }, + "p": { + "description": "Contains any annotations for the column." + } + }, + "required": [ + "id", + "label", + "pattern", + "type" + ] + } + }, + "rows": { + "type": "array", + "items": { + "type": "object", + "properties": { + "c": { + "type": "array", + "items": { + "properties": { + "v": { + "description": "Each value in the row is represented by an object named `c` with five v fields: data, null, null, 0, the value. This format is fixed by the Google Charts API." + } + } + } + } + } + } + } + } + } + } + } + ] + }, "alarms": { "type": "object", "properties": { @@ -1259,7 +1423,7 @@ }, "name": { "type": "string", - "description": "Full alarm name" + "description": "Full alarm name." }, "chart": { "type": "string" @@ -1269,7 +1433,7 @@ }, "active": { "type": "boolean", - "description": "Will be false only if the alarm is disabled in the configuration" + "description": "Will be false only if the alarm is disabled in the configuration." }, "disabled": { "type": "boolean", @@ -1524,4 +1688,4 @@ } } } -}
\ No newline at end of file +} diff --git a/web/api/netdata-swagger.yaml b/web/api/netdata-swagger.yaml index 50e661400..d92ef1d71 100644 --- a/web/api/netdata-swagger.yaml +++ b/web/api/netdata-swagger.yaml @@ -1,8 +1,7 @@ -# SPDX-License-Identifier: GPL-3.0-or-later swagger: '2.0' info: title: NetData API - description: 'Real-time performance and health monitoring.' + description: Real-time performance and health monitoring. version: 1.11.1_rolling host: registry.my-netdata.io schemes: @@ -28,90 +27,113 @@ paths: * number of alarms in critical state responses: '200': - description: netdata basic information + description: netdata basic information. schema: $ref: '#/definitions/info' '503': - description: netdata daemon not ready (used for health checks) + description: netdata daemon not ready (used for health checks). /charts: get: - summary: 'Get a list of all charts available at the server' - description: 'The charts endpoint returns a summary about all charts stored in the netdata server.' + summary: Get a list of all charts available at the server + description: >- + The charts endpoint returns a summary about all charts stored in the + netdata server. responses: '200': - description: 'An array of charts' + description: An array of charts. schema: type: array items: $ref: '#/definitions/chart_summary' /chart: get: - summary: 'Get info about a specific chart' - description: 'The Chart endpoint returns detailed information about a chart.' + summary: Get info about a specific chart + description: The Chart endpoint returns detailed information about a chart. parameters: - name: chart in: query - description: 'The id of the chart as returned by the /charts call.' + description: The id of the chart as returned by the /charts call. required: true type: string - format: 'as returned by /charts' - default: 'system.cpu' + format: as returned by /charts + default: system.cpu responses: '200': - description: 'A javascript object with detailed information about the chart.' + description: A javascript object with detailed information about the chart. schema: $ref: '#/definitions/chart' + '400': + description: No chart id was supplied in the request. '404': - description: 'No chart with the given id is found.' + description: No chart with the given id is found. /alarm_variables: get: - summary: 'List variables available to configure alarms for a chart' - description: 'Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family' + summary: List variables available to configure alarms for a chart + description: >- + Returns the basic information of a chart and all the variables that can + be used in alarm and template health configurations for the particular + chart or family. parameters: - name: chart in: query - description: 'The id of the chart as returned by the /charts call.' + description: The id of the chart as returned by the /charts call. required: true type: string - format: 'as returned by /charts' + format: as returned by /charts responses: '200': - description: 'A javascript object with information about the chart and the available variables' - schema: - $ref: '#/definitions/alarm_variables' + description: >- + A javascript object with information about the chart and the + available variables. + schema: + $ref: '#/definitions/alarm_variables' '400': - description: 'Bad request - the body will include a message stating what is wrong.' + description: Bad request - the body will include a message stating what is wrong. '404': - description: 'No chart with the given id is found.' + description: No chart with the given id is found. '500': - description: 'Internal server error. This usually means the server is out of memory.' + description: >- + Internal server error. This usually means the server is out of + memory. /data: get: - summary: 'Get collected data for a specific chart' - description: | - The Data endpoint returns data stored in the round robin database of a chart. + summary: Get collected data for a specific chart + description: >- + The data endpoint returns data stored in the round robin database of a + chart. parameters: - name: chart in: query - description: 'The id of the chart as returned by the /charts call.' + description: The id of the chart as returned by the /charts call. required: true type: string - format: 'as returned by /charts' + format: as returned by /charts allowEmptyValue: false default: system.cpu - name: dimension in: query - description: 'zero, one or more dimension ids or names, as returned by the /chart call, separated with comma or pipe. Netdata simple patterns are supported.' + description: >- + Zero, one or more dimension ids or names, as returned by the /chart + call, separated with comma or pipe. Netdata simple patterns are + supported. required: false type: array items: type: string collectionFormat: pipes - format: 'as returned by /charts' + format: as returned by /charts allowEmptyValue: false - name: after in: query - description: 'This parameter can either be an absolute timestamp specifying the starting point of the data to be returned, or a relative number of seconds (negative, relative to parameter: before). Netdata will assume it is a relative number if it is less that 3 years (in seconds). Netdata will adapt this parameter to the boundaries of the round robin database. The default is the beginning of the round robin database (i.e. by default netdata will attempt to return data for the entire database).' + description: >- + This parameter can either be an absolute timestamp specifying the + starting point of the data to be returned, or a relative number of + seconds (negative, relative to parameter: before). Netdata will + assume it is a relative number if it is less that 3 years (in + seconds). Netdata will adapt this parameter to the boundaries of the + round robin database. The default is the beginning of the round + robin database (i.e. by default netdata will attempt to return data + for the entire database). required: true type: number format: integer @@ -119,14 +141,25 @@ paths: default: -600 - name: before in: query - description: 'This parameter can either be an absolute timestamp specifying the ending point of the data to be returned, or a relative number of seconds (negative), relative to the last collected timestamp. Netdata will assume it is a relative number if it is less than 3 years (in seconds). Netdata will adapt this parameter to the boundaries of the round robin database. The default is zero (i.e. the timestamp of the last value collected).' + description: >- + This parameter can either be an absolute timestamp specifying the + ending point of the data to be returned, or a relative number of + seconds (negative), relative to the last collected timestamp. + Netdata will assume it is a relative number if it is less than 3 + years (in seconds). Netdata will adapt this parameter to the + boundaries of the round robin database. The default is zero (i.e. + the timestamp of the last value collected). required: false type: number format: integer default: 0 - name: points in: query - description: 'The number of points to be returned. If not given, or it is <= 0, or it is bigger than the points stored in the round robin database for this chart for the given duration, all the available collected values for the given duration will be returned.' + description: >- + The number of points to be returned. If not given, or it is <= 0, or + it is bigger than the points stored in the round robin database for + this chart for the given duration, all the available collected + values for the given duration will be returned. required: true type: number format: integer @@ -134,15 +167,31 @@ paths: default: 20 - name: group in: query - description: 'The grouping method. If multiple collected values are to be grouped in order to return fewer points, this parameters defines the method of grouping. methods supported "min", "max", "average", "sum", "incremental-sum". "max" is actually calculated on the absolute value collected (so it works for both positive and negative dimesions to return the most extreme value in either direction).' + description: >- + The grouping method. If multiple collected values are to be grouped + in order to return fewer points, this parameters defines the method + of grouping. methods supported "min", "max", "average", "sum", + "incremental-sum". "max" is actually calculated on the absolute + value collected (so it works for both positive and negative + dimesions to return the most extreme value in either direction). required: true type: string - enum: [ 'min', 'max', 'average', 'median', 'stddev', 'sum', 'incremental-sum' ] - default: 'average' + enum: + - min + - max + - average + - median + - stddev + - sum + - incremental-sum + default: average allowEmptyValue: false - name: gtime in: query - description: 'The grouping number of seconds. This is used in conjunction with group=average to change the units of metrics (ie when the data is per-second, setting gtime=60 will turn them to per-minute).' + description: >- + The grouping number of seconds. This is used in conjunction with + group=average to change the units of metrics (ie when the data is + per-second, setting gtime=60 will turn them to per-minute). required: false type: number format: integer @@ -150,22 +199,54 @@ paths: default: 0 - name: format in: query - description: 'The format of the data to be returned.' + description: The format of the data to be returned. required: true type: string - enum: [ 'json', 'jsonp', 'csv', 'tsv', 'tsv-excel', 'ssv', 'ssvcomma', 'datatable', 'datasource', 'html', 'markdown', 'array', 'csvjsonarray' ] + enum: + - json + - jsonp + - csv + - tsv + - tsv-excel + - ssv + - ssvcomma + - datatable + - datasource + - html + - markdown + - array + - csvjsonarray default: json allowEmptyValue: false - name: options in: query - description: 'Options that affect data generation.' + description: Options that affect data generation. required: false type: array items: type: string - enum: [ 'nonzero', 'flip', 'jsonwrap', 'min2max', 'seconds', 'milliseconds', 'abs', 'absolute', 'absolute-sum', 'null2zero', 'objectrows', 'google_json', 'percentage', 'unaligned', 'match-ids', 'match-names', 'showcustomvars' ] + enum: + - nonzero + - flip + - jsonwrap + - min2max + - seconds + - milliseconds + - abs + - absolute + - absolute-sum + - null2zero + - objectrows + - google_json + - percentage + - unaligned + - match-ids + - match-names + - showcustomvars collectionFormat: pipes - default: [seconds, jsonwrap] + default: + - seconds + - jsonwrap allowEmptyValue: false - name: callback in: query @@ -175,61 +256,81 @@ paths: allowEmptyValue: true - name: filename in: query - description: 'Add Content-Disposition: attachment; filename=<filename> header to the response, that will instruct the browser to save the response with the given filename.' + description: >- + Add Content-Disposition: attachment; filename=<filename> header to + the response, that will instruct the browser to save the response + with the given filename. required: false type: string allowEmptyValue: true - name: tqx in: query - description: '[Google Visualization API](https://developers.google.com/chart/interactive/docs/dev/implementing_data_source?hl=en) formatted parameter.' + description: >- + [Google Visualization + API](https://developers.google.com/chart/interactive/docs/dev/implementing_data_source?hl=en) + formatted parameter. required: false type: string allowEmptyValue: true responses: '200': - description: 'The call was successful. The response should include the data.' + description: >- + The call was successful. The response includes the data in the + format requested. Swagger2.0 does not process the discriminator + field to show polymorphism. The response will be one of the + sub-types of the data-schema according to the chosen format, e.g. + json -> data_json. schema: - $ref: '#/definitions/chart' + $ref: '#/definitions/data' '400': - description: 'Bad request - the body will include a message stating what is wrong.' + description: Bad request - the body will include a message stating what is wrong. '404': - description: 'No chart with the given id is found.' + description: No chart with the given id is found. '500': - description: 'Internal server error. This usually means the server is out of memory.' + description: >- + Internal server error. This usually means the server is out of + memory. /badge.svg: get: - summary: 'Generate a SVG image for a chart (or dimension)' - description: | - Successful responses are SVG images + summary: Generate a SVG image for a chart (or dimension) + description: Successful responses are SVG images. parameters: - name: chart in: query - description: 'The id of the chart as returned by the /charts call.' + description: The id of the chart as returned by the /charts call. required: true type: string - format: 'as returned by /charts' + format: as returned by /charts allowEmptyValue: false default: system.cpu - name: alarm in: query - description: 'the name of an alarm linked to the chart' + description: The name of an alarm linked to the chart. required: false type: string - format: 'any text' + format: any text allowEmptyValue: true - name: dimension in: query - description: 'zero, one or more dimension ids, as returned by the /chart call.' + description: 'Zero, one or more dimension ids, as returned by the /chart call.' required: false type: array items: type: string collectionFormat: pipes - format: 'as returned by /charts' + format: as returned by /charts allowEmptyValue: false - name: after in: query - description: 'This parameter can either be an absolute timestamp specifying the starting point of the data to be returned, or a relative number of seconds, to the last collected timestamp. Netdata will assume it is a relative number if it is smaller than the duration of the round robin database for this chart. So, if the round robin database is 3600 seconds, any value from -3600 to 3600 will trigger relative arithmetics. Netdata will adapt this parameter to the boundaries of the round robin database.' + description: >- + This parameter can either be an absolute timestamp specifying the + starting point of the data to be returned, or a relative number of + seconds, to the last collected timestamp. Netdata will assume it is + a relative number if it is smaller than the duration of the round + robin database for this chart. So, if the round robin database is + 3600 seconds, any value from -3600 to 3600 will trigger relative + arithmetics. Netdata will adapt this parameter to the boundaries of + the round robin database. required: true type: number format: integer @@ -237,267 +338,391 @@ paths: default: -600 - name: before in: query - description: 'This parameter can either be an absolute timestamp specifying the ending point of the data to be returned, or a relative number of seconds, to the last collected timestamp. Netdata will assume it is a relative number if it is smaller than the duration of the round robin database for this chart. So, if the round robin database is 3600 seconds, any value from -3600 to 3600 will trigger relative arithmetics. Netdata will adapt this parameter to the boundaries of the round robin database.' + description: >- + This parameter can either be an absolute timestamp specifying the + ending point of the data to be returned, or a relative number of + seconds, to the last collected timestamp. Netdata will assume it is + a relative number if it is smaller than the duration of the round + robin database for this chart. So, if the round robin database is + 3600 seconds, any value from -3600 to 3600 will trigger relative + arithmetics. Netdata will adapt this parameter to the boundaries of + the round robin database. required: false type: number format: integer default: 0 - name: group in: query - description: 'The grouping method. If multiple collected values are to be grouped in order to return fewer points, this parameters defines the method of grouping. methods are supported "min", "max", "average", "sum", "incremental-sum". "max" is actually calculated on the absolute value collected (so it works for both positive and negative dimesions to return the most extreme value in either direction).' + description: >- + The grouping method. If multiple collected values are to be grouped + in order to return fewer points, this parameters defines the method + of grouping. methods are supported "min", "max", "average", "sum", + "incremental-sum". "max" is actually calculated on the absolute + value collected (so it works for both positive and negative + dimesions to return the most extreme value in either direction). required: true type: string - enum: [ 'min', 'max', 'average', 'median', 'stddev', 'sum', 'incremental-sum' ] - default: 'average' + enum: + - min + - max + - average + - median + - stddev + - sum + - incremental-sum + default: average allowEmptyValue: false - name: options in: query - description: 'Options that affect data generation.' + description: Options that affect data generation. required: false type: array items: type: string - enum: [ 'abs', 'absolute', 'display-absolute', 'absolute-sum', 'null2zero', 'percentage', 'unaligned' ] + enum: + - abs + - absolute + - display-absolute + - absolute-sum + - null2zero + - percentage + - unaligned collectionFormat: pipes - default: ['absolute'] + default: + - absolute allowEmptyValue: true - name: label in: query - description: 'a text to be used as the label' + description: A text to be used as the label. required: false type: string - format: 'any text' + format: any text allowEmptyValue: true - name: units in: query - description: 'a text to be used as the units' + description: A text to be used as the units. required: false type: string - format: 'any text' + format: any text allowEmptyValue: true - name: label_color in: query - description: 'a color to be used for the background of the label' + description: A color to be used for the background of the label. required: false type: string - format: 'any text' + format: any text allowEmptyValue: true - name: value_color in: query - description: 'a color to be used for the background of the label. You can set multiple using a pipe with a condition each, like this: color<value|color>value|color:null The following operators are supported: >, <, >=, <=, =, :null (to check if no value exists).' + description: >- + A color to be used for the background of the label. You can set + multiple using a pipe with a condition each, like this: + color<value|color>value|color:null The following operators are + supported: >, <, >=, <=, =, :null (to check if no value exists). required: false type: string - format: 'any text' + format: any text allowEmptyValue: true - name: multiply in: query - description: 'multiply the value with this number for rendering it at the image (integer value required)' + description: >- + Multiply the value with this number for rendering it at the image + (integer value required). required: false type: number format: integer allowEmptyValue: true - name: divide in: query - description: 'divide the value with this number for rendering it at the image (integer value required)' + description: >- + Divide the value with this number for rendering it at the image + (integer value required). required: false type: number format: integer allowEmptyValue: true - name: scale in: query - description: 'set the scale of the badge (greater or equal to 100)' + description: Set the scale of the badge (greater or equal to 100). required: false type: number format: integer allowEmptyValue: true responses: '200': - description: 'The call was successful. The response should be an SVG image.' + description: The call was successful. The response should be an SVG image. '400': - description: 'Bad request - the body will include a message stating what is wrong.' + description: Bad request - the body will include a message stating what is wrong. '404': - description: 'No chart with the given id is found.' + description: No chart with the given id is found. '500': - description: 'Internal server error. This usually means the server is out of memory.' + description: >- + Internal server error. This usually means the server is out of + memory. /allmetrics: get: - summary: 'Get a value of all the metrics maintained by netdata' - description: 'The charts endpoint returns the latest value of all charts and dimensions stored in the netdata server.' + summary: Get a value of all the metrics maintained by netdata + description: >- + The allmetrics endpoint returns the latest value of all charts and + dimensions stored in the netdata server. parameters: - name: format in: query - description: 'The format of the response to be returned' + description: The format of the response to be returned. required: true type: string - enum: [ 'shell', 'prometheus', 'prometheus_all_hosts', 'json' ] - default: 'shell' + enum: + - shell + - prometheus + - prometheus_all_hosts + - json + default: shell - name: help in: query - description: 'enable or disable HELP lines in prometheus output' + description: Enable or disable HELP lines in prometheus output. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'no' - name: types in: query - description: 'enable or disable TYPE lines in prometheus output' + description: Enable or disable TYPE lines in prometheus output. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'no' - name: timestamps in: query - description: 'enable or disable timestamps in prometheus output' + description: Enable or disable timestamps in prometheus output. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'yes' - name: names in: query - description: 'When enabled netdata will report dimension names. When disabled netdata will report dimension IDs. The default is controlled in netdata.conf.' + description: >- + When enabled netdata will report dimension names. When disabled + netdata will report dimension IDs. The default is controlled in + netdata.conf. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'yes' - name: oldunits in: query - description: 'When enabled, netdata will show metric names for the default source=average as they appeared before 1.12, by using the legacy unit naming conventions' + description: >- + When enabled, netdata will show metric names for the default + source=average as they appeared before 1.12, by using the legacy + unit naming conventions. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'yes' - name: hideunits in: query - description: 'When enabled, netdata will not include the units in the metric names, for the default source=average.' + description: >- + When enabled, netdata will not include the units in the metric + names, for the default source=average. required: false type: string - enum: [ 'yes', 'no' ] + enum: + - 'yes' + - 'no' default: 'yes' - name: server in: query - description: 'Set a distinct name of the client querying prometheus metrics. Netdata will use the client IP if this is not set.' + description: >- + Set a distinct name of the client querying prometheus metrics. + Netdata will use the client IP if this is not set. required: false type: string - format: 'any text' + format: any text - name: prefix in: query - description: 'Prefix all prometheus metrics with this string.' + description: Prefix all prometheus metrics with this string. required: false type: string - format: 'any text' + format: any text - name: data in: query - description: 'Select the prometheus response data source. The default is controlled in netdata.conf' + description: >- + Select the prometheus response data source. There is a setting in + netdata.conf for the default. required: false type: string - enum: [ 'as-collected', 'average', 'sum' ] - default: 'average' + enum: + - as-collected + - average + - sum + default: average responses: '200': - description: 'All the metrics returned in the format requested' + description: All the metrics returned in the format requested. '400': - description: 'The format requested is not supported' + description: The format requested is not supported. /alarms: get: - summary: 'Get a list of active or raised alarms on the server' - description: 'The alarms endpoint returns the list of all raised or enabled alarms on the netdata server. Called without any parameters, the raised alarms in state WARNING or CRITICAL are returned. By passing "?all", all the enabled alarms are returned.' + summary: Get a list of active or raised alarms on the server + description: >- + The alarms endpoint returns the list of all raised or enabled alarms on + the netdata server. Called without any parameters, the raised alarms in + state WARNING or CRITICAL are returned. By passing "?all", all the + enabled alarms are returned. parameters: - name: all in: query - description: 'If passed, all enabled alarms are returned' + description: 'If passed, all enabled alarms are returned.' required: false type: boolean allowEmptyValue: true responses: '200': - description: 'An object containing general info and a linked list of alarms' + description: An object containing general info and a linked list of alarms. schema: $ref: '#/definitions/alarms' /alarm_log: get: - summary: 'Retrieves the entries of the alarm log' - description: 'Returns an array of alarm_log entries, with historical information on raised and cleared alarms.' + summary: Retrieves the entries of the alarm log + description: >- + Returns an array of alarm_log entries, with historical information on + raised and cleared alarms. parameters: - name: after in: query - description: 'Passing the parameter after=UNIQUEID returns all the events in the alarm log that occurred after UNIQUEID. An automated series of calls would call the interface once without after=, store the last UNIQUEID of the returned set, and give it back to get incrementally the next events' + description: >- + Passing the parameter after=UNIQUEID returns all the events in the + alarm log that occurred after UNIQUEID. An automated series of calls + would call the interface once without after=, store the last + UNIQUEID of the returned set, and give it back to get incrementally + the next events. required: false type: integer responses: '200': - description: 'An array of alarm log entries' + description: An array of alarm log entries. schema: type: array items: $ref: '#/definitions/alarm_log_entry' /alarm_count: get: - summary: 'Get an overall status of the chart' - description: "Checks multiple charts with the same context and counts number of alarms with given status." + summary: Get an overall status of the chart + description: >- + Checks multiple charts with the same context and counts number of alarms + with given status. parameters: - in: query name: context - description: "Specify context which should be checked" + description: Specify context which should be checked. required: false allowEmptyValue: true type: array items: type: string collectionFormat: pipes - default: ['system.cpu'] + default: + - system.cpu - in: query name: status - description: "Specify alarm status to count" + description: Specify alarm status to count. required: false allowEmptyValue: true type: string - enum: ['REMOVED', 'UNDEFINED', 'UNINITIALIZED', 'CLEAR', 'RAISED', 'WARNING', 'CRITICAL'] - default: 'RAISED' + enum: + - REMOVED + - UNDEFINED + - UNINITIALIZED + - CLEAR + - RAISED + - WARNING + - CRITICAL + default: RAISED responses: '200': - description: 'An object containing a count of alarms with given status for given contexts' + description: >- + An object containing a count of alarms with given status for given + contexts. schema: type: array - items: + items: type: number '500': - description: 'Internal server error. This usually means the server is out of memory.' + description: >- + Internal server error. This usually means the server is out of + memory. /manage/health: get: - summary: 'Accesses the health management API to control health checks and notifications at runtime.' - description: 'Available from Netdata v1.12 and above, protected via bearer authorization. Especially useful for maintenance periods, the API allows you to disable health checks completely, silence alarm notifications, or Disable/Silence specific alarms that match selectors on alarm/template name, chart, context, host and family. For the simple disable/silence all scenaria, only the cmd parameter is required. The other parameters are used to define alarm selectors. For more information and examples, refer to the netdata documentation.' + summary: >- + Accesses the health management API to control health checks and + notifications at runtime. + description: >- + Available from Netdata v1.12 and above, protected via bearer + authorization. Especially useful for maintenance periods, the API allows + you to disable health checks completely, silence alarm notifications, or + Disable/Silence specific alarms that match selectors on alarm/template + name, chart, context, host and family. For the simple disable/silence + all scenaria, only the cmd parameter is required. The other parameters + are used to define alarm selectors. For more information and examples, + refer to the netdata documentation. parameters: - name: cmd in: query - description: 'DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors. LIST: Show active configuration.' + description: >- + DISABLE ALL: No alarm criteria are evaluated, nothing is written in + the alarm log. SILENCE ALL: No notifications are sent. RESET: Return + to the default state. DISABLE/SILENCE: Set the mode to be used for + the alarms matching the criteria of the alarm selectors. LIST: Show + active configuration. required: false type: string - enum: ['DISABLE ALL', 'SILENCE ALL', 'DISABLE', 'SILENCE', 'RESET', 'LIST'] + enum: + - DISABLE ALL + - SILENCE ALL + - DISABLE + - SILENCE + - RESET + - LIST - name: alarm in: query - description: 'The expression provided will match both `alarm` and `template` names.' + description: >- + The expression provided will match both `alarm` and `template` + names. type: string - name: chart in: query - description: 'Chart ids/names, as shown on the dashboard. These will match the `on` entry of a configured `alarm`' + description: >- + Chart ids/names, as shown on the dashboard. These will match the + `on` entry of a configured `alarm`. type: string - name: context in: query - description: 'Chart context, as shown on the dashboard. These will match the `on` entry of a configured `template`.' - type: string + description: >- + Chart context, as shown on the dashboard. These will match the `on` + entry of a configured `template`. + type: string - name: hosts in: query - description: 'The hostnames that will need to match.' - type: string + description: The hostnames that will need to match. + type: string - name: families in: query - description: 'The alarm families.' - type: string + description: The alarm families. + type: string responses: '200': - description: 'A plain text response based on the result of the command' + description: A plain text response based on the result of the command. '403': - description: 'Bearer authentication error.' + description: Bearer authentication error. definitions: info: type: object @@ -512,7 +737,7 @@ definitions: example: 24e9fe3c-f2ac-11e8-bafc-0242ac110002 mirrored_hosts: type: array - description: list of hosts mirrored of the server (include itself). + description: List of hosts mirrored of the server (include itself). items: type: string example: @@ -520,169 +745,215 @@ definitions: - host2.example.com os_name: type: string - description: Operating System Name - example: Manjaro Linux + description: Operating System Name. + example: Manjaro Linux os_id: type: string - description: Operating System ID + description: Operating System ID. example: manjaro os_id_like: type: string - description: Known OS similar to this OS + description: Known OS similar to this OS. example: arch os_version: type: string - description: Operating System Version + description: Operating System Version. example: 18.0.4 os_version_id: type: string - description: Operating System Version ID + description: Operating System Version ID. example: unknown os_detection: type: string - description: OS parameters detection method + description: OS parameters detection method. example: Mixed kernel_name: type: string - description: Kernel Name + description: Kernel Name. example: Linux kernel_version: type: string - description: Kernel Version + description: Kernel Version. example: 4.19.32-1-MANJARO architecture: type: string - description: Kernel architecture + description: Kernel architecture. example: x86_64 virtualization: type: string - description: Virtualization Type + description: Virtualization Type. example: kvm virt_detection: type: string - description: Virtualization detection method + description: Virtualization detection method. example: systemd-detect-virt container: type: string - description: Container technology + description: Container technology. example: docker container_detection: type: string - description: Container technology detection method + description: Container technology detection method. example: dockerenv collectors: type: array items: type: object - description: Array of collector plugins and modules + description: Array of collector plugins and modules. properties: plugin: type: string - description: Collector plugin + description: Collector plugin. example: python.d.plugin module: type: string - description: Module of the collector plugin + description: Module of the collector plugin. example: dockerd alarms: type: object - description: number of alarms in the server. + description: Number of alarms in the server. properties: normal: type: integer - description: number of alarms in normal state. + description: Number of alarms in normal state. warning: type: integer - description: number of alarms in warning state. + description: Number of alarms in warning state. critical: type: integer - description: number of alarms in critical state. + description: Number of alarms in critical state. chart_summary: type: object properties: hostname: type: string - description: 'The hostname of the netdata server.' + description: The hostname of the netdata server. version: type: string - description: 'netdata version of the server.' + description: netdata version of the server. + release_channel: + type: string + description: The release channel of the build on the server. + example: nightly + timezone: + type: string + description: The current timezone on the server. os: type: string - description: 'The netdata server host operating system.' - enum: [ 'macos', 'linux', 'freebsd' ] + description: The netdata server host operating system. + enum: + - macos + - linux + - freebsd history: type: number - description: 'The duration, in seconds, of the round robin database maintained by netdata.' + description: >- + The duration, in seconds, of the round robin database maintained by + netdata. + memory_mode: + type: string + description: The name of the database memory mode on the server. update_every: type: number - description: 'The default update frequency of the netdata server. All charts have an update frequency equal or bigger than this.' + description: >- + The default update frequency of the netdata server. All charts have an + update frequency equal or bigger than this. charts: type: object - description: 'An object containing all the chart objects available at the netdata server. This is used as an indexed array. The key of each chart object is the id of the chart.' + description: >- + An object containing all the chart objects available at the netdata + server. This is used as an indexed array. The key of each chart object + is the id of the chart. properties: key: $ref: '#/definitions/chart' charts_count: type: number - description: 'The number of charts.' + description: The number of charts. dimensions_count: type: number - description: 'The total number of dimensions.' + description: The total number of dimensions. alarms_count: type: number - description: 'The number of alarms.' + description: The number of alarms. rrd_memory_bytes: type: number - description: 'The size of the round robin database in bytes.' + description: The size of the round robin database in bytes. chart: type: object properties: id: type: string - description: 'The unique id of the chart' + description: The unique id of the chart. name: type: string - description: 'The name of the chart' + description: The name of the chart. type: type: string - description: 'The type of the chart. Types are not handled by netdata. You can use this field for anything you like.' + description: >- + The type of the chart. Types are not handled by netdata. You can use + this field for anything you like. family: type: string - description: 'The family of the chart. Families are not handled by netdata. You can use this field for anything you like.' + description: >- + The family of the chart. Families are not handled by netdata. You can + use this field for anything you like. title: type: string - description: 'The title of the chart.' + description: The title of the chart. priority: type: string - description: 'The relative priority of the chart. NetData does not care about priorities. This is just an indication of importance for the chart viewers to sort charts of higher priority (lower number) closer to the top. Priority sorting should only be used among charts of the same type or family.' + description: >- + The relative priority of the chart. NetData does not care about + priorities. This is just an indication of importance for the chart + viewers to sort charts of higher priority (lower number) closer to the + top. Priority sorting should only be used among charts of the same + type or family. enabled: type: boolean - description: 'True when the chart is enabled. Disabled charts do not currently collect values, but they may have historical values available.' + description: >- + True when the chart is enabled. Disabled charts do not currently + collect values, but they may have historical values available. units: type: string - description: 'The unit of measurement for the values of all dimensions of the chart.' + description: The unit of measurement for the values of all dimensions of the chart. data_url: type: string - description: 'The absolute path to get data values for this chart. You are expected to use this path as the base when constructing the URL to fetch data values for this chart.' + description: >- + The absolute path to get data values for this chart. You are expected + to use this path as the base when constructing the URL to fetch data + values for this chart. chart_type: type: string - description: 'The chart type.' - enum: [ 'line', 'area', 'stacked' ] + description: The chart type. + enum: + - line + - area + - stacked duration: type: number - description: 'The duration, in seconds, of the round robin database maintained by netdata.' + description: >- + The duration, in seconds, of the round robin database maintained by + netdata. first_entry: type: number - description: 'The UNIX timestamp of the first entry (the oldest) in the round robin database.' + description: >- + The UNIX timestamp of the first entry (the oldest) in the round robin + database. last_entry: type: number - description: 'The UNIX timestamp of the latest entry in the round robin database.' + description: The UNIX timestamp of the latest entry in the round robin database. update_every: type: number - description: 'The update frequency of this chart, in seconds. One value every this amount of time is kept in the round robin database.' + description: >- + The update frequency of this chart, in seconds. One value every this + amount of time is kept in the round robin database. dimensions: type: object - description: 'An object containing all the chart dimensions available for the chart. This is used as an indexed array. The key of the object the id of the dimension.' + description: >- + An object containing all the chart dimensions available for the chart. + This is used as an indexed array. The key of the object the id of the + dimension. properties: key: $ref: '#/definitions/dimension' @@ -693,28 +964,30 @@ definitions: $ref: '#/definitions/chart_variables' green: type: number - description: 'Chart health green threshold' + description: Chart health green threshold. red: type: number - description: 'Chart health red trheshold' + description: Chart health red threshold. alarm_variables: type: object properties: chart: type: string - description: 'The unique id of the chart' + description: The unique id of the chart. chart_name: type: string - description: 'The name of the chart' + description: The name of the chart. cnart_context: type: string - description: 'The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates' + description: >- + The context of the chart. It is shared across multiple monitored + software or hardware instances and used in alarm templates. family: type: string - description: 'The family of the chart.' + description: The family of the chart. host: type: string - description: 'The host containing the chart.' + description: The host containing the chart. chart_variables: type: object properties: @@ -752,192 +1025,317 @@ definitions: properties: name: type: string - description: 'The name of the dimension' - json_wrap: + description: The name of the dimension. + data: type: object + discriminator: format + description: >- + Response will contain the appropriate subtype, e.g. data_json depending on + the requested format. properties: api: type: number - description: 'The API version this conforms to, currently 1' + description: 'The API version this conforms to, currently 1.' id: type: string - description: 'The unique id of the chart' + description: The unique id of the chart. name: type: string - description: 'The name of the chart' + description: The name of the chart. update_every: type: number - description: 'The update frequency of this chart, in seconds. One value every this amount of time is kept in the round robin database (indepedently of the current view).' + description: >- + The update frequency of this chart, in seconds. One value every this + amount of time is kept in the round robin database (indepedently of + the current view). view_update_every: type: number - description: 'The current view appropriate update frequency of this chart, in seconds. There is no point to request chart refreshes, using the same settings, more frequently than this.' + description: >- + The current view appropriate update frequency of this chart, in + seconds. There is no point to request chart refreshes, using the same + settings, more frequently than this. first_entry: type: number - description: 'The UNIX timestamp of the first entry (the oldest) in the round robin database (indepedently of the current view).' + description: >- + The UNIX timestamp of the first entry (the oldest) in the round robin + database (indepedently of the current view). last_entry: type: number - description: 'The UNIX timestamp of the latest entry in the round robin database (indepedently of the current view).' + description: >- + The UNIX timestamp of the latest entry in the round robin database + (indepedently of the current view). after: type: number - description: 'The UNIX timestamp of the first entry (the oldest) returned in this response.' + description: >- + The UNIX timestamp of the first entry (the oldest) returned in this + response. before: type: number - description: 'The UNIX timestamp of the latest entry returned in this response.' + description: The UNIX timestamp of the latest entry returned in this response. min: type: number - description: 'The minimum value returned in the current view. This can be used to size the y-series of the chart.' + description: >- + The minimum value returned in the current view. This can be used to + size the y-series of the chart. max: type: number - description: 'The maximum value returned in the current view. This can be used to size the y-series of the chart.' + description: >- + The maximum value returned in the current view. This can be used to + size the y-series of the chart. dimension_names: - description: 'The dimension names of the chart as returned in the current view.' + description: The dimension names of the chart as returned in the current view. type: array items: type: string dimension_ids: - description: 'The dimension IDs of the chart as returned in the current view.' + description: The dimension IDs of the chart as returned in the current view. type: array items: type: string latest_values: - description: 'The latest values collected for the chart (indepedently of the current view).' + description: >- + The latest values collected for the chart (indepedently of the current + view). type: array items: type: string view_latest_values: - description: 'The latest values returned with this response.' + description: The latest values returned with this response. type: array items: type: string dimensions: type: number - description: 'The number of dimensions returned.' + description: The number of dimensions returned. points: type: number - description: 'The number of rows / points returned.' + description: The number of rows / points returned. format: type: string - description: 'The format of the result returned.' + description: The format of the result returned. chart_variables: type: object properties: key: $ref: '#/definitions/chart_variables' - result: - description: 'The result requested, in the format requested.' + data_json: + description: Data response in json format. + allOf: + - $ref: '#/definitions/data' + - properties: + result: + type: object + properties: + labels: + description: The dimensions retrieved from the chart. + type: array + items: + type: string + data: + description: >- + The data requested, one element per sample with each element + containing the values of the dimensions described in the + labels value. + type: array + items: + type: number + description: 'The result requested, in the format requested.' + data_flat: + description: >- + Data response in csv / tsv / tsv-excel / ssv / ssv-comma / markdown / html + formats. + allOf: + - $ref: '#/definitions/data' + - properties: + result: + type: string + data_array: + description: Data response in array format. + allOf: + - $ref: '#/definitions/data' + - properties: + result: + type: array + items: + type: number + data_csvjsonarray: + description: Data response in csvjsonarray format. + allOf: + - $ref: '#/definitions/data' + - properties: + result: + description: >- + The first inner array contains strings showing the labels of each + column, each subsequent array contains the values for each point + in time. + type: array + items: + type: array + items: {} + data_datatable: + description: >- + Data response in datatable / datasource formats (suitable for Google + Charts). + allOf: + - $ref: '#/definitions/data' + - properties: + result: + type: object + properties: + cols: + type: array + items: + type: object + properties: + id: + description: Always empty - for future use. + label: + description: The dimension returned from the chart. + pattern: + description: Always empty - for future use. + type: + description: The type of data in the column / chart-dimension. + p: + description: Contains any annotations for the column. + required: + - id + - label + - pattern + - type + rows: + type: array + items: + type: object + properties: + c: + type: array + items: + properties: + v: + description: >- + Each value in the row is represented by an object + named `c` with five v fields: data, null, null, 0, + the value. This format is fixed by the Google + Charts API. alarms: type: object - properties: - hostname: + properties: + hostname: type: string - latest_alarm_log_unique_id: + latest_alarm_log_unique_id: type: integer format: int32 - status: + status: type: boolean - now: + now: type: integer format: int32 - alarms: + alarms: type: object - properties: - chart-name.alarm-name: + properties: + chart-name.alarm-name: type: object - properties: - id: + properties: + id: type: integer format: int32 - name: + name: type: string - description: Full alarm name - chart: + description: Full alarm name. + chart: type: string - family: + family: type: string - active: + active: type: boolean - description: Will be false only if the alarm is disabled in the configuration - disabled: + description: >- + Will be false only if the alarm is disabled in the + configuration. + disabled: type: boolean - description: Whether the health check for this alarm has been disabled via a health command API DISABLE command. - silenced: + description: >- + Whether the health check for this alarm has been disabled via + a health command API DISABLE command. + silenced: type: boolean - description: Whether notifications for this alarm have been silenced via a health command API SILENCE command. - exec: + description: >- + Whether notifications for this alarm have been silenced via a + health command API SILENCE command. + exec: type: string - recipient: + recipient: type: string - source: + source: type: string - units: + units: type: string - info: + info: type: string - status: + status: type: string - last_status_change: + last_status_change: type: integer format: int32 - last_updated: + last_updated: type: integer format: int32 - next_update: + next_update: type: integer format: int32 - update_every: + update_every: type: integer format: int32 - delay_up_duration: + delay_up_duration: type: integer format: int32 - delay_down_duration: + delay_down_duration: type: integer format: int32 - delay_max_duration: + delay_max_duration: type: integer format: int32 - delay_multiplier: + delay_multiplier: type: integer format: int32 - delay: + delay: type: integer format: int32 - delay_up_to_timestamp: + delay_up_to_timestamp: type: integer format: int32 - value_string: + value_string: type: string - no_clear_notification: + no_clear_notification: type: boolean - lookup_dimensions: + lookup_dimensions: type: string - db_after: + db_after: type: integer format: int32 - db_before: + db_before: type: integer format: int32 - lookup_method: + lookup_method: type: string - lookup_after: + lookup_after: type: integer format: int32 - lookup_before: + lookup_before: type: integer format: int32 - lookup_options: + lookup_options: type: string - calc: + calc: type: string - calc_parsed: + calc_parsed: type: string - warn: + warn: type: string - warn_parsed: + warn_parsed: type: string - crit: + crit: type: string - crit_parsed: + crit_parsed: type: string warn_repeat_every: type: integer @@ -945,90 +1343,90 @@ definitions: crit_repeat_every: type: integer format: int32 - green: + green: type: string format: nullable - red: + red: type: string format: nullable - value: + value: type: number alarm_log_entry: type: object - properties: - hostname: + properties: + hostname: type: string - unique_id: + unique_id: type: integer format: int32 - alarm_id: + alarm_id: type: integer format: int32 - alarm_event_id: + alarm_event_id: type: integer format: int32 - name: + name: type: string - chart: + chart: type: string - family: + family: type: string - processed: + processed: type: boolean - updated: + updated: type: boolean - exec_run: + exec_run: type: integer format: int32 - exec_failed: + exec_failed: type: boolean - exec: + exec: type: string - recipient: + recipient: type: string - exec_code: + exec_code: type: integer format: int32 - source: + source: type: string - units: + units: type: string - when: + when: type: integer format: int32 - duration: + duration: type: integer format: int32 - non_clear_duration: + non_clear_duration: type: integer format: int32 - status: + status: type: string - old_status: + old_status: type: string - delay: + delay: type: integer format: int32 - delay_up_to_timestamp: + delay_up_to_timestamp: type: integer format: int32 - updated_by_id: + updated_by_id: type: integer format: int32 - updates_id: + updates_id: type: integer format: int32 - value_string: + value_string: type: string - old_value_string: + old_value_string: type: string - silenced: + silenced: type: string - info: + info: type: string - value: + value: type: string format: nullable - old_value: + old_value: type: string format: nullable diff --git a/web/api/queries/query.c b/web/api/queries/query.c index 6f186d3ac..b0eb826d6 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -1559,7 +1559,7 @@ RRDR *rrd2rrdr( rrd_update_every, first_entry_t, last_entry_t); #ifdef ENABLE_DBENGINE - if ((st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)) { + if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { struct rrdeng_region_info *region_info_array; unsigned regions, max_interval; |