From 0915b3ef56dfac3113cce55a59a5765dc94976be Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 14:34:54 +0200 Subject: Adding upstream version 2.13.6. Signed-off-by: Daniel Baumann --- doc/.gitignore | 2 + doc/01-about.md | 70 + doc/02-installation.md | 667 +++ doc/02-installation.md.d/01-Debian.md | 3 + doc/02-installation.md.d/02-Ubuntu.md | 3 + doc/02-installation.md.d/03-Raspbian.md | 3 + doc/02-installation.md.d/04-Fedora.md | 2 + doc/02-installation.md.d/05-CentOS.md | 3 + doc/02-installation.md.d/06-RHEL.md | 3 + doc/02-installation.md.d/07-OpenSUSE.md | 3 + doc/02-installation.md.d/08-SLES.md | 3 + doc/02-installation.md.d/09-Amazon-Linux.md | 3 + doc/02-installation.md.d/10-Windows.md | 3 + doc/03-monitoring-basics.md | 3262 +++++++++++ doc/04-configuration.md | 737 +++ doc/05-service-monitoring.md | 998 ++++ doc/06-distributed-monitoring.md | 3555 ++++++++++++ doc/07-agent-based-monitoring.md | 485 ++ doc/08-advanced-topics.md | 1208 ++++ doc/09-object-types.md | 1972 +++++++ doc/10-icinga-template-library.md | 6022 ++++++++++++++++++++ doc/11-cli-commands.md | 734 +++ doc/12-icinga2-api.md | 2996 ++++++++++ doc/13-addons.md | 258 + doc/14-features.md | 1482 +++++ doc/15-troubleshooting.md | 1914 +++++++ doc/16-upgrading-icinga-2.md | 950 +++ doc/17-language-reference.md | 1371 +++++ doc/18-library-reference.md | 1960 +++++++ doc/19-technical-concepts.md | 2217 +++++++ doc/20-script-debugger.md | 177 + doc/21-development.md | 2698 +++++++++ doc/22-selinux.md | 312 + doc/23-migrating-from-icinga-1x.md | 1585 ++++++ doc/24-appendix.md | 695 +++ doc/CMakeLists.txt | 20 + doc/icinga2.8 | 99 + doc/images/addons/dashing_icinga2.png | Bin 0 -> 883821 bytes .../addons/icinga_certificate_monitoring.png | Bin 0 -> 251618 bytes doc/images/addons/icinga_reporting.png | Bin 0 -> 312814 bytes doc/images/addons/icingaweb2_businessprocess.png | Bin 0 -> 181635 bytes doc/images/addons/icingaweb2_grafana.png | Bin 0 -> 573452 bytes doc/images/addons/icingaweb2_graphite.png | Bin 0 -> 252196 bytes doc/images/addons/icingaweb2_maps.png | Bin 0 -> 543265 bytes doc/images/addons/nano-syntax.png | Bin 0 -> 48641 bytes doc/images/addons/vim-syntax.png | Bin 0 -> 38385 bytes .../advanced-topics/flapping-state-graph.png | Bin 0 -> 8132 bytes ...cinga2_external_checks_freshness_icingaweb2.png | Bin 0 -> 30046 bytes .../icingaweb2_downtime_handled.png | Bin 0 -> 39967 bytes doc/images/api/icinga2_api_powershell_ise.png | Bin 0 -> 503887 bytes .../configuration/icinga_web_local_server.png | Bin 0 -> 302951 bytes .../development/windows_boost_build_dev_cmd.png | Bin 0 -> 14058 bytes .../development/windows_builds_gitlab_pipeline.png | Bin 0 -> 18092 bytes .../development/windows_powershell_posh_git.png | Bin 0 -> 10374 bytes .../windows_visual_studio_installer_01.png | Bin 0 -> 93643 bytes .../windows_visual_studio_installer_02.png | Bin 0 -> 96993 bytes .../windows_visual_studio_installer_03.png | Bin 0 -> 87282 bytes .../development/windows_visual_studio_tabs_c++.png | Bin 0 -> 15837 bytes ...ed_monitoring_agent_checks_command_endpoint.png | Bin 0 -> 91755 bytes .../icinga2_distributed_monitoring_endpoints.png | Bin 0 -> 75860 bytes .../icinga2_distributed_monitoring_roles.png | Bin 0 -> 114197 bytes ...istributed_monitoring_satellite_config_sync.png | Bin 0 -> 88721 bytes ..._monitoring_scenario_ha_masters_with_agents.png | Bin 0 -> 137403 bytes ...nitoring_scenarios_master_satellites_agents.png | Bin 0 -> 147103 bytes ...ted_monitoring_scenarios_master_with_agents.png | Bin 0 -> 127139 bytes .../icinga2_distributed_monitoring_zones.png | Bin 0 -> 120164 bytes ..._distributed_windows_client_disk_icingaweb2.png | Bin 0 -> 104924 bytes ...buted_windows_nscp_api_drivesize_icingaweb2.png | Bin 0 -> 38947 bytes ...distributed_windows_nscp_counter_icingaweb2.png | Bin 0 -> 28292 bytes .../icinga2_windows_running_service.png | Bin 0 -> 72832 bytes .../icinga2_windows_setup_installer_01.png | Bin 0 -> 105133 bytes .../icinga2_windows_setup_installer_02.png | Bin 0 -> 16884 bytes .../icinga2_windows_setup_installer_03.png | Bin 0 -> 8091 bytes .../icinga2_windows_setup_installer_04.png | Bin 0 -> 8140 bytes .../icinga2_windows_setup_installer_05.png | Bin 0 -> 92081 bytes .../icinga2_windows_setup_wizard_01.png | Bin 0 -> 76748 bytes .../icinga2_windows_setup_wizard_02.png | Bin 0 -> 86004 bytes ...icinga2_windows_setup_wizard_02_global_zone.png | Bin 0 -> 84387 bytes .../icinga2_windows_setup_wizard_03.png | Bin 0 -> 78295 bytes .../icinga2_windows_setup_wizard_04.png | Bin 0 -> 69902 bytes ...icinga2_windows_setup_wizard_05_nsclient_01.png | Bin 0 -> 117259 bytes ...icinga2_windows_setup_wizard_05_nsclient_02.png | Bin 0 -> 17894 bytes ...icinga2_windows_setup_wizard_05_nsclient_03.png | Bin 0 -> 15445 bytes ...icinga2_windows_setup_wizard_05_nsclient_04.png | Bin 0 -> 19891 bytes ...icinga2_windows_setup_wizard_05_nsclient_05.png | Bin 0 -> 18101 bytes ...icinga2_windows_setup_wizard_05_nsclient_06.png | Bin 0 -> 11510 bytes ...icinga2_windows_setup_wizard_05_nsclient_07.png | Bin 0 -> 32661 bytes ...a2_windows_setup_wizard_06_finish_no_ticket.png | Bin 0 -> 59909 bytes ..._windows_setup_wizard_06_finish_with_ticket.png | Bin 0 -> 57735 bytes ...icinga2_windows_setup_wizard_examine_config.png | Bin 0 -> 45359 bytes doc/images/icingadb/icingadb-architecture.png | Bin 0 -> 563761 bytes doc/images/icingadb/icingadb-daemon.png | Bin 0 -> 527021 bytes doc/images/icingadb/icingadb-icinga2.png | Bin 0 -> 529555 bytes doc/images/icingadb/icingadb-redis.png | Bin 0 -> 526785 bytes doc/scroll.js | 16 + doc/update-links.py | 41 + doc/win-dev.ps1 | 97 + 97 files changed, 38629 insertions(+) create mode 100644 doc/.gitignore create mode 100644 doc/01-about.md create mode 100644 doc/02-installation.md create mode 100644 doc/02-installation.md.d/01-Debian.md create mode 100644 doc/02-installation.md.d/02-Ubuntu.md create mode 100644 doc/02-installation.md.d/03-Raspbian.md create mode 100644 doc/02-installation.md.d/04-Fedora.md create mode 100644 doc/02-installation.md.d/05-CentOS.md create mode 100644 doc/02-installation.md.d/06-RHEL.md create mode 100644 doc/02-installation.md.d/07-OpenSUSE.md create mode 100644 doc/02-installation.md.d/08-SLES.md create mode 100644 doc/02-installation.md.d/09-Amazon-Linux.md create mode 100644 doc/02-installation.md.d/10-Windows.md create mode 100644 doc/03-monitoring-basics.md create mode 100644 doc/04-configuration.md create mode 100644 doc/05-service-monitoring.md create mode 100644 doc/06-distributed-monitoring.md create mode 100644 doc/07-agent-based-monitoring.md create mode 100644 doc/08-advanced-topics.md create mode 100644 doc/09-object-types.md create mode 100644 doc/10-icinga-template-library.md create mode 100644 doc/11-cli-commands.md create mode 100644 doc/12-icinga2-api.md create mode 100644 doc/13-addons.md create mode 100644 doc/14-features.md create mode 100644 doc/15-troubleshooting.md create mode 100644 doc/16-upgrading-icinga-2.md create mode 100644 doc/17-language-reference.md create mode 100644 doc/18-library-reference.md create mode 100644 doc/19-technical-concepts.md create mode 100644 doc/20-script-debugger.md create mode 100644 doc/21-development.md create mode 100644 doc/22-selinux.md create mode 100644 doc/23-migrating-from-icinga-1x.md create mode 100644 doc/24-appendix.md create mode 100644 doc/CMakeLists.txt create mode 100644 doc/icinga2.8 create mode 100644 doc/images/addons/dashing_icinga2.png create mode 100644 doc/images/addons/icinga_certificate_monitoring.png create mode 100644 doc/images/addons/icinga_reporting.png create mode 100644 doc/images/addons/icingaweb2_businessprocess.png create mode 100644 doc/images/addons/icingaweb2_grafana.png create mode 100644 doc/images/addons/icingaweb2_graphite.png create mode 100644 doc/images/addons/icingaweb2_maps.png create mode 100644 doc/images/addons/nano-syntax.png create mode 100644 doc/images/addons/vim-syntax.png create mode 100644 doc/images/advanced-topics/flapping-state-graph.png create mode 100644 doc/images/advanced-topics/icinga2_external_checks_freshness_icingaweb2.png create mode 100644 doc/images/advanced-topics/icingaweb2_downtime_handled.png create mode 100644 doc/images/api/icinga2_api_powershell_ise.png create mode 100644 doc/images/configuration/icinga_web_local_server.png create mode 100644 doc/images/development/windows_boost_build_dev_cmd.png create mode 100644 doc/images/development/windows_builds_gitlab_pipeline.png create mode 100644 doc/images/development/windows_powershell_posh_git.png create mode 100644 doc/images/development/windows_visual_studio_installer_01.png create mode 100644 doc/images/development/windows_visual_studio_installer_02.png create mode 100644 doc/images/development/windows_visual_studio_installer_03.png create mode 100644 doc/images/development/windows_visual_studio_tabs_c++.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_agent_checks_command_endpoint.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_endpoints.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_roles.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_satellite_config_sync.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenario_ha_masters_with_agents.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_monitoring_zones.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_windows_client_disk_icingaweb2.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_api_drivesize_icingaweb2.png create mode 100644 doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_counter_icingaweb2.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_running_service.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_installer_01.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_installer_02.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_installer_03.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_installer_04.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_installer_05.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_01.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02_global_zone.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_03.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_04.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_01.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_02.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_03.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_04.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_05.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_06.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_07.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_no_ticket.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_with_ticket.png create mode 100644 doc/images/distributed-monitoring/icinga2_windows_setup_wizard_examine_config.png create mode 100644 doc/images/icingadb/icingadb-architecture.png create mode 100644 doc/images/icingadb/icingadb-daemon.png create mode 100644 doc/images/icingadb/icingadb-icinga2.png create mode 100644 doc/images/icingadb/icingadb-redis.png create mode 100644 doc/scroll.js create mode 100755 doc/update-links.py create mode 100644 doc/win-dev.ps1 (limited to 'doc') diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000..e60e194 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,2 @@ +build +*.rst diff --git a/doc/01-about.md b/doc/01-about.md new file mode 100644 index 0000000..582e226 --- /dev/null +++ b/doc/01-about.md @@ -0,0 +1,70 @@ +# About Icinga 2 + +## What is Icinga 2? + +[Icinga](https://icinga.com/products/) is a monitoring system which checks +the availability of your network resources, notifies users of outages, and generates +performance data for reporting. + +Scalable and extensible, Icinga can monitor large, complex environments across +multiple locations. + +Icinga 2 is the monitoring server and requires [Icinga Web 2](https://icinga.com/products/) +on top in your Icinga Stack. The [configuration](https://icinga.com/products/configuration/) +can be easily managed with either the [Icinga Director](https://icinga.com/docs/director/latest/), +config management tools or plain text within the [Icinga DSL](04-configuration.md#configuration). + + +![Icinga 2 Distributed Master and Satellites with Agents](images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png) + +## Start with Icinga + +* [Installation](02-installation.md#installation) +* [Monitoring Basics](03-monitoring-basics.md#monitoring-basics) +* [Configuration](04-configuration.md#configuration) +* [Distributed Monitoring](06-distributed-monitoring.md#distributed-monitoring) +* [Addons, Integrations and Features](13-addons.md#addons) +* [Troubleshooting](15-troubleshooting.md#troubleshooting) +* [Upgrading](16-upgrading-icinga-2.md#upgrading-icinga-2) + +Once Icinga Server and Web are running in your distributed environment, +make sure to check out the many [Icinga modules](https://icinga.com/docs/) +for even better monitoring. + +## What's New + +You can follow the development and release milestones on [GitHub](https://github.com/icinga/icinga2/issues). +Please follow our release announcements on [icinga.com](https://icinga.com/blog/) too. + +## Support + +Check the project website at [icinga.com](https://icinga.com) for status updates. Join the +[community channels](https://icinga.com/community/) for questions +or get in touch for [professional support](https://icinga.com/subscription/). + +## Contribute + +There are many ways to contribute to Icinga -- whether it be sending patches, +testing, reporting bugs or reviewing and updating the documentation. Every +contribution is appreciated! + +Please continue reading in the [Contributing chapter](https://github.com/Icinga/icinga2/blob/master/CONTRIBUTING.md). + +### Security Issues + +For reporting security issues please visit [this page](https://icinga.com/contact/security/). + +### Icinga 2 Development + +The Git repository is located on [GitHub](https://github.com/Icinga/icinga2). + +Icinga 2 is written in C++ and can be built on Linux/Unix and Windows. +Read more about development builds in the [development chapter](21-development.md#development). + + +## License + +Icinga 2 and the Icinga 2 documentation are licensed under the terms of the GNU +General Public License Version 2. You will find a copy of this license in the +LICENSE file included in the source package. + diff --git a/doc/02-installation.md b/doc/02-installation.md new file mode 100644 index 0000000..d7ab43f --- /dev/null +++ b/doc/02-installation.md @@ -0,0 +1,667 @@ + +# Installation + +This tutorial is a step-by-step introduction to install Icinga 2. +It assumes that you are familiar with the operating system you're using to install Icinga 2. + +Please follow the steps listed for your operating system. Packages for distributions other than the ones +listed here may also be available. Please refer to [icinga.com/get-started/download](https://icinga.com/get-started/download/#community) +for a full list of available community repositories. + +## Upgrade + +In case you are upgrading an existing setup, please ensure to +follow the [upgrade documentation](16-upgrading-icinga-2.md#upgrading-icinga-2). + + +## Add Icinga Package Repository + +We recommend using our official repositories. Here's how to add it to your system: + + + +### Debian Repository + +```bash +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb https://packages.icinga.com/debian icinga-${DIST} main" > \ + /etc/apt/sources.list.d/${DIST}-icinga.list + echo "deb-src https://packages.icinga.com/debian icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga.list + +apt-get update +``` + +#### Debian Backports Repository + +This repository is required for Debian Stretch since Icinga v2.11. + +Debian Stretch: + +```bash +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb https://deb.debian.org/debian ${DIST}-backports main" > \ + /etc/apt/sources.list.d/${DIST}-backports.list + +apt-get update +``` + + + + +### Ubuntu Repository + +```bash +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +. /etc/os-release; if [ ! -z ${UBUNTU_CODENAME+x} ]; then DIST="${UBUNTU_CODENAME}"; else DIST="$(lsb_release -c| awk '{print $2}')"; fi; \ + echo "deb https://packages.icinga.com/ubuntu icinga-${DIST} main" > \ + /etc/apt/sources.list.d/${DIST}-icinga.list + echo "deb-src https://packages.icinga.com/ubuntu icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga.list + +apt-get update +``` + + + +### Raspbian Repository + +```bash +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb https://packages.icinga.com/raspbian icinga-${DIST} main" > \ + /etc/apt/sources.list.d/icinga.list + echo "deb-src https://packages.icinga.com/raspbian icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/icinga.list + +apt-get update +``` + + + +### CentOS Repository + +```bash +rpm --import https://packages.icinga.com/icinga.key +wget https://packages.icinga.com/centos/ICINGA-release.repo -O /etc/yum.repos.d/ICINGA-release.repo +``` + +The packages for CentOS depend on other packages which are distributed +as part of the [EPEL repository](https://fedoraproject.org/wiki/EPEL): + +```bash +yum install epel-release +``` + + + +### RHEL Repository + +!!! info + + A paid repository subscription is required for RHEL repositories. Get more information on + [icinga.com/subscription](https://icinga.com/subscription) + + Don't forget to fill in the username and password section with your credentials in the local .repo file. + +```bash +rpm --import https://packages.icinga.com/icinga.key +wget https://packages.icinga.com/subscription/rhel/ICINGA-release.repo -O /etc/yum.repos.d/ICINGA-release.repo +``` + +If you are using RHEL you need to additionally enable the `codeready-builder` +repository before installing the [EPEL rpm package](https://fedoraproject.org/wiki/EPEL#How_can_I_use_these_extra_packages.3F). + +#### RHEL 8 or Later + +```bash +ARCH=$(/bin/arch) +OSVER=$(. /etc/os-release; echo "${VERSION_ID%%.*}") + +subscription-manager repos --enable "codeready-builder-for-rhel-${OSVER}-${ARCH}-rpms" + +dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-${OSVER}.noarch.rpm +``` + +#### RHEL 7 + +```bash +subscription-manager repos --enable rhel-7-server-optional-rpms + +yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm +``` + + + + +### Fedora Repository + +```bash +rpm --import https://packages.icinga.com/icinga.key +dnf install https://packages.icinga.com/fedora/icinga-rpm-release-$(. /etc/os-release; echo "$VERSION_ID")-latest.noarch.rpm +``` + + + +### SLES Repository + +!!! info + + A paid repository subscription is required for SLES repositories. Get more information on + [icinga.com/subscription](https://icinga.com/subscription) + + Don't forget to fill in the username and password section with your credentials in the local .repo file. + +```bash +rpm --import https://packages.icinga.com/icinga.key + +zypper ar https://packages.icinga.com/subscription/sles/ICINGA-release.repo +zypper ref +``` + +You need to additionally add the `PackageHub` repository to fulfill dependencies: + +```bash +source /etc/os-release + +SUSEConnect -p PackageHub/$VERSION_ID/x86_64 +``` + + + +### openSUSE Repository + +```bash +rpm --import https://packages.icinga.com/icinga.key + +zypper ar https://packages.icinga.com/openSUSE/ICINGA-release.repo +zypper ref +``` + +You need to additionally add the `server:monitoring` repository to fulfill dependencies: + +```bash +zypper ar https://download.opensuse.org/repositories/server:/monitoring/15.3/server:monitoring.repo +``` + + + +### Amazon Linux 2 Repository + +!!! info + + A paid repository subscription is required for Amazon Linux repositories. Get more information on + [icinga.com/subscription](https://icinga.com/subscription) + + Don't forget to fill in the username and password section with your credentials in the local .repo file. + +```bash +rpm --import https://packages.icinga.com/icinga.key +wget https://packages.icinga.com/subscription/amazon/ICINGA-release.repo -O /etc/yum.repos.d/ICINGA-release.repo +``` + +The packages for Amazon Linux 2 depend on other packages which are distributed +as part of the [EPEL repository](https://fedoraproject.org/wiki/EPEL). + +```bash +yum install epel-release +``` + + + +### Icinga for Windows Repository +[Icinga for Windows](https://icinga.com/docs/icinga-for-windows/latest/doc/000-Introduction/) is the recommended +way to install and update Icinga 2 on Windows. + +We provide a dedicated repository for Windows to simplify the installation. Please refer to the official +[Icinga for Windows installation docs](https://icinga.com/docs/icinga-for-windows/latest/doc/110-Installation/01-Getting-Started/) + + +## Install Icinga 2 + +You can install Icinga 2 by using your distribution's package manager +to install the `icinga2` package. The following commands must be executed +with `root` permissions unless noted otherwise. + + +!!! tip + + If you have [SELinux](22-selinux.md) enabled, the package `icinga2-selinux` is also required. + + + + +#### Debian / Ubuntu / Raspbian + +```bash +apt-get install icinga2 +``` + + + + +#### CentOS + +!!! info + + Note that installing Icinga 2 is only supported on CentOS 7 as CentOS 8 is EOL. + +```bash +yum install icinga2 +systemctl enable icinga2 +systemctl start icinga2 +``` + + + +#### RHEL 8 or Later + +```bash +dnf install icinga2 +systemctl enable icinga2 +systemctl start icinga2 +``` + +#### RHEL 7 + +```bash +yum install icinga2 +systemctl enable icinga2 +systemctl start icinga2 +``` + + + + +#### Fedora + +```bash +dnf install icinga2 +systemctl enable icinga2 +systemctl start icinga2 +``` + + + + +#### SLES / openSUSE + +```bash +zypper install icinga2 +``` + + + + +#### Amazon Linux 2 + +```bash +yum install icinga2 +systemctl enable icinga2 +systemctl start icinga2 +``` + + +### Systemd Service + +The majority of supported distributions use systemd. The Icinga 2 packages automatically install the necessary +systemd unit files. + +If you're stuck with configuration errors, you can manually invoke the +[configuration validation](11-cli-commands.md#config-validation). + +```bash +icinga2 daemon -C +``` + +!!! tip + + If you are running into fork errors with systemd enabled distributions, + please check the [troubleshooting chapter](15-troubleshooting.md#check-fork-errors). + + +## Set up Check Plugins + +Without plugins Icinga 2 does not know how to check external services. The +[Monitoring Plugins Project](https://www.monitoring-plugins.org/) provides +an extensive set of plugins which can be used with Icinga 2 to check whether +services are working properly. + +These plugins are required to make the [example configuration](04-configuration.md#configuring-icinga2-overview) +work out-of-the-box. + +Depending on which directory your plugins are installed into you may need to +update the global `PluginDir` constant in your [Icinga 2 configuration](04-configuration.md#constants-conf). +This constant is used by the check command definitions contained in the Icinga Template Library +to determine where to find the plugin binaries. + +!!! tip + + Please refer to the [service monitoring](05-service-monitoring.md#service-monitoring-plugins) chapter for details about how to integrate + additional check plugins into your Icinga 2 setup. + + + + +#### Debian / Ubuntu / Raspbian + +```bash +apt-get install monitoring-plugins +``` + + + + +#### CentOS + +The packages for CentOS depend on other packages which are distributed as part of the EPEL repository. + +```bash +yum install nagios-plugins-all +``` + + + + +#### RHEL + +The packages for RHEL depend on other packages which are distributed as part of the EPEL repository. + +#### RHEL 8 or Later + +```bash +dnf install nagios-plugins-all +``` + +#### RHEL 7 + +```bash +yum install nagios-plugins-all +``` + + + + +#### Fedora + +```bash +dnf install nagios-plugins-all +``` + + + + +#### SLES / openSUSE + +The packages depend on other packages which are distributed +as part of the [server:monitoring repository](https://build.opensuse.org/project/repositories/server:monitoring). +Please make sure to enable this repository beforehand. + +```bash +zypper install monitoring-plugins +``` + + + + +#### Amazon Linux 2 + +The packages for Amazon Linux 2 depend on other packages which are distributed as part of the EPEL repository. + +```bash +amazon-linux-extras install epel + +yum install nagios-plugins-all +``` + + +## Set up Icinga 2 API + +Almost every Icinga 2 setup requires the Icinga 2 API as Icinga Web connects to it, Icinga DB requires it, +and it enables cluster communication functionality for highly available and distributed setups. + +!!! info + + If you set up a highly available and/or distributed Icinga monitoring environment, please read the + [Distributed Monitoring](06-distributed-monitoring.md#distributed-monitoring) chapter as + the commands to set up the API are different from setting up a single node setup. + +See the [API](12-icinga2-api.md#icinga2-api-setup) chapter for details, +or follow the steps below to set up the API quickly: + +Run the following command to: + +* enable the `api` feature, +* set up certificates, and +* add the API user `root` with an auto-generated password in the configuration file + `/etc/icinga2/conf.d/api-users.conf`. + +```bash +icinga2 api setup +``` + +Restart Icinga 2 for these changes to take effect. + +```bash +systemctl restart icinga2 +``` + + +## Set up Icinga DB + +Icinga DB is a set of components for publishing, synchronizing and +visualizing monitoring data in the Icinga ecosystem, consisting of: + +* Icinga 2 with its `icingadb` feature enabled, + responsible for publishing monitoring data to a Redis server, i.e. configuration and its runtime updates, + check results, state changes, downtimes, acknowledgements, notifications, and other events such as flapping +* The [Icinga DB daemon](https://icinga.com/docs/icinga-db), + which synchronizes the data between the Redis server and a database +* And Icinga Web with the + [Icinga DB Web](https://icinga.com/docs/icinga-db-web) module enabled, + which connects to both Redis and the database to display and work with the most up-to-date data + +![Icinga DB Architecture](images/icingadb/icingadb-architecture.png) + +!!! info + + Setting up Icinga 2's Icinga DB feature is only required for Icinga 2 master nodes or single-node setups. + +### Set up Redis Server + +A Redis server from version 6.2 is required. + +!!! info + + This guide sets up the `icingadb-redis` package provided by Icinga, + which ships a current Redis Server version and is preconfigured for the Icinga DB components. + Using own Redis server setups is supported as long as the version requirements are met. + +![Icinga DB Redis](images/icingadb/icingadb-redis.png) + +!!! tip + + Although the Redis server can run anywhere in an Icinga environment, + we recommend to install it where the corresponding Icinga 2 node is running to + keep latency between the components low. + +#### Install Icinga DB Redis Package + +Use your distribution's package manager to install the `icingadb-redis` package as follows: + + + +##### Amazon Linux 2 + +```bash +yum install icingadb-redis +``` + + + + +##### CentOS + + +!!! info + + Note that installing Icinga DB Redis is only supported on CentOS 7 as CentOS 8 is EOL. + +```bash +yum install icingadb-redis +``` + + + + +##### Debian / Ubuntu + +```bash +apt-get install icingadb-redis +``` + + + +##### RHEL 8 or Later + +```bash +dnf install icingadb-redis +``` + +##### RHEL 7 + +```bash +yum install icingadb-redis +``` + + + + +##### SLES + +```bash +zypper install icingadb-redis +``` + + +#### Run Icinga DB Redis + +The `icingadb-redis` package automatically installs the necessary systemd unit files to run Icinga DB Redis. +Please run the following command to enable and start its service: + +```bash +systemctl enable --now icingadb-redis +``` + +#### Enable Remote Redis Connections + +By default, `icingadb-redis` only listens on `127.0.0.1`. If Icinga Web or Icinga 2 is running on another node, +remote access to the Redis server must be allowed. This requires the following directives to be set in +the `/etc/icingadb-redis/icingadb-redis.conf` configuration file: + +* Set `protected-mode` to `no`, i.e. `protected-mode no` +* Set `bind` to the desired binding interface or bind all interfaces, e.g. `bind 0.0.0.0` + +!!! warning + + By default, Redis has no authentication preventing others from accessing it. + When opening Redis to an external interface, make sure to set a password, set up appropriate firewall rules, + or configure TLS with certificate authentication on Redis and its consumers, + i.e. Icinga 2, Icinga DB and Icinga Web. + +Restart Icinga DB Redis for these changes to take effect: + +```bash +systemctl restart icingadb-redis +``` + +### Enable Icinga DB Feature + +With the [Icinga DB feature](14-features.md#icinga-db) enabled, +Icinga 2 publishes all of its monitoring data to the Redis server. This includes configuration and +its runtime updates via the Icinga 2 API, check results, state changes, downtimes, acknowledgments, notifications and +other events such as flapping. + +![Icinga DB Icinga 2](images/icingadb/icingadb-icinga2.png) + +Icinga 2 installs the feature configuration file to `/etc/icinga2/features-available/icingadb.conf`, +pre-configured for a local setup. +Update this file in case Redis is running on a different host or to set credentials. +All available settings are explained in the [Icinga DB object](09-object-types.md#icingadb) chapter. + +!!! important + + For single-node and high-availability setups, please read the note about the + [environment ID](https://icinga.com/docs/icinga-db/latest/doc/05-Distributed-Setups/#environment-id), + which is common to all Icinga DB components and generated by the Icinga DB feature. + +To enable the `icingadb` feature use the following command: + +```bash +icinga2 feature enable icingadb +``` + +Restart Icinga 2 for these changes to take effect: + +```bash +systemctl restart icinga2 +``` + +### Install Icinga DB Daemon + +After installing Icinga 2, setting up a Redis server and enabling the `icingadb` feature, +the Icinga DB daemon that synchronizes monitoring data between the Redis server and a database is now set up. + +![Icinga DB Daemon](images/icingadb/icingadb-daemon.png) + +!!! tip + + Although the Icinga DB daemon can run anywhere in an Icinga environment, + we recommend to install it where the corresponding Icinga 2 node and Redis server is running to + keep latency between the components low. + +The Icinga DB daemon package is also included in the Icinga repository, and since it is already set up, +you have completed the instructions here and can proceed to + +[install the Icinga DB daemon on Amazon Linux](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/01-Amazon-Linux/#installing-icinga-db-package), + + +[install the Icinga DB daemon on CentOS](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/02-CentOS/#installing-icinga-db-package), + + +[install the Icinga DB daemon on Debian](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/03-Debian/#installing-icinga-db-package), + + +[install the Icinga DB daemon on RHEL](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/04-RHEL/#installing-icinga-db-package), + + +[install the Icinga DB daemon on SLES](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/05-SLES/#installing-icinga-db-package), + + +[install the Icinga DB daemon on Ubuntu](https://icinga.com/docs/icinga-db/latest/doc/02-Installation/06-Ubuntu/#installing-icinga-db-package), + +which will also guide you through the setup of the database and Icinga DB Web. + + +## Backup + +Ensure to include the following in your backups: + +* Configuration files in `/etc/icinga2` +* Certificate files in `/var/lib/icinga2/ca` (Master CA key pair) and `/var/lib/icinga2/certs` (node certificates) +* Runtime files in `/var/lib/icinga2` + + diff --git a/doc/02-installation.md.d/01-Debian.md b/doc/02-installation.md.d/01-Debian.md new file mode 100644 index 0000000..d3e3143 --- /dev/null +++ b/doc/02-installation.md.d/01-Debian.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on Debian + + diff --git a/doc/02-installation.md.d/02-Ubuntu.md b/doc/02-installation.md.d/02-Ubuntu.md new file mode 100644 index 0000000..aa099d8 --- /dev/null +++ b/doc/02-installation.md.d/02-Ubuntu.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on Ubuntu + + diff --git a/doc/02-installation.md.d/03-Raspbian.md b/doc/02-installation.md.d/03-Raspbian.md new file mode 100644 index 0000000..fc48d6c --- /dev/null +++ b/doc/02-installation.md.d/03-Raspbian.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on Raspbian + + diff --git a/doc/02-installation.md.d/04-Fedora.md b/doc/02-installation.md.d/04-Fedora.md new file mode 100644 index 0000000..1f12276 --- /dev/null +++ b/doc/02-installation.md.d/04-Fedora.md @@ -0,0 +1,2 @@ + + diff --git a/doc/02-installation.md.d/05-CentOS.md b/doc/02-installation.md.d/05-CentOS.md new file mode 100644 index 0000000..4d766b2 --- /dev/null +++ b/doc/02-installation.md.d/05-CentOS.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on CentOS + + diff --git a/doc/02-installation.md.d/06-RHEL.md b/doc/02-installation.md.d/06-RHEL.md new file mode 100644 index 0000000..568251a --- /dev/null +++ b/doc/02-installation.md.d/06-RHEL.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on RHEL + + diff --git a/doc/02-installation.md.d/07-OpenSUSE.md b/doc/02-installation.md.d/07-OpenSUSE.md new file mode 100644 index 0000000..347831e --- /dev/null +++ b/doc/02-installation.md.d/07-OpenSUSE.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on openSUSE + + diff --git a/doc/02-installation.md.d/08-SLES.md b/doc/02-installation.md.d/08-SLES.md new file mode 100644 index 0000000..aa2646d --- /dev/null +++ b/doc/02-installation.md.d/08-SLES.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on SLES + + diff --git a/doc/02-installation.md.d/09-Amazon-Linux.md b/doc/02-installation.md.d/09-Amazon-Linux.md new file mode 100644 index 0000000..ec1d986 --- /dev/null +++ b/doc/02-installation.md.d/09-Amazon-Linux.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on Amazon Linux + + diff --git a/doc/02-installation.md.d/10-Windows.md b/doc/02-installation.md.d/10-Windows.md new file mode 100644 index 0000000..64bc687 --- /dev/null +++ b/doc/02-installation.md.d/10-Windows.md @@ -0,0 +1,3 @@ +# Install Icinga 2 on Windows + + diff --git a/doc/03-monitoring-basics.md b/doc/03-monitoring-basics.md new file mode 100644 index 0000000..c7f468a --- /dev/null +++ b/doc/03-monitoring-basics.md @@ -0,0 +1,3262 @@ +# Monitoring Basics + +This part of the Icinga 2 documentation provides an overview of all the basic +monitoring concepts you need to know to run Icinga 2. +Keep in mind these examples are made with a Linux server. If you are +using Windows, you will need to change the services accordingly. See the [ITL reference](10-icinga-template-library.md#windows-plugins) + for further information. + +## Attribute Value Types + +The Icinga 2 configuration uses different value types for attributes. + + Type | Example + -------------------------------------------------------|--------------------------------------------------------- + [Number](17-language-reference.md#numeric-literals) | `5` + [Duration](17-language-reference.md#duration-literals) | `1m` + [String](17-language-reference.md#string-literals) | `"These are notes"` + [Boolean](17-language-reference.md#boolean-literals) | `true` + [Array](17-language-reference.md#array) | `[ "value1", "value2" ]` + [Dictionary](17-language-reference.md#dictionary) | `{ "key1" = "value1", "key2" = false }` + +It is important to use the correct value type for object attributes +as otherwise the [configuration validation](11-cli-commands.md#config-validation) will fail. + +## Hosts and Services + +Icinga 2 can be used to monitor the availability of hosts and services. Hosts +and services can be virtually anything which can be checked in some way: + +* Network services (HTTP, SMTP, SNMP, SSH, etc.) +* Printers +* Switches or routers +* Temperature sensors +* Other local or network-accessible services + +Host objects provide a mechanism to group services that are running +on the same physical device. + +Here is an example of a host object which defines two child services: + +``` +object Host "my-server1" { + address = "10.0.0.1" + check_command = "hostalive" +} + +object Service "ping4" { + host_name = "my-server1" + check_command = "ping4" +} + +object Service "http" { + host_name = "my-server1" + check_command = "http" +} +``` + +The example creates two services `ping4` and `http` which belong to the +host `my-server1`. + +It also specifies that the host should perform its own check using the `hostalive` +check command. + +The `address` attribute is used by check commands to determine which network +address is associated with the host object. + +Details on troubleshooting check problems can be found [here](15-troubleshooting.md#troubleshooting). + +### Host States + +Hosts can be in any one of the following states: + + Name | Description + ------------|-------------- + UP | The host is available. + DOWN | The host is unavailable. + +### Service States + +Services can be in any one of the following states: + + Name | Description + ------------|-------------- + OK | The service is working properly. + WARNING | The service is experiencing some problems but is still considered to be in working condition. + CRITICAL | The check successfully determined that the service is in a critical state. + UNKNOWN | The check could not determine the service's state. + +### Check Result State Mapping + +[Check plugins](05-service-monitoring.md#service-monitoring-plugins) return +with an exit code which is converted into a state number. +Services map the states directly while hosts will treat `0` or `1` as `UP` +for example. + + Value | Host State | Service State + ------|------------|-------------- + 0 | Up | OK + 1 | Up | Warning + 2 | Down | Critical + 3 | Down | Unknown + +### Hard and Soft States + +When detecting a problem with a host/service, Icinga re-checks the object a number of +times (based on the `max_check_attempts` and `retry_interval` settings) before sending +notifications. This ensures that no unnecessary notifications are sent for +transient failures. During this time the object is in a `SOFT` state. + +After all re-checks have been executed and the object is still in a non-OK +state, the host/service switches to a `HARD` state and notifications are sent. + + Name | Description + ------------|-------------- + HARD | The host/service's state hasn't recently changed. `check_interval` applies here. + SOFT | The host/service has recently changed state and is being re-checked with `retry_interval`. + +### Host and Service Checks + +Hosts and services determine their state by running checks in a regular interval. + +``` +object Host "router" { + check_command = "hostalive" + address = "10.0.0.1" +} +``` + +The `hostalive` command is one of several built-in check commands. It sends ICMP +echo requests to the IP address specified in the `address` attribute to determine +whether a host is online. + +> **Tip** +> +> `hostalive` is the same as `ping` but with different default thresholds. +> Both use the `ping` CLI command to execute sequential checks. +> +> If you need faster ICMP checks, look into the [icmp](10-icinga-template-library.md#plugin-check-command-icmp) CheckCommand. + +A number of other [built-in check commands](10-icinga-template-library.md#icinga-template-library) are also +available. In addition to these commands the next few chapters will explain in +detail how to set up your own check commands. + +#### Host Check Alternatives + +If the host is not reachable with ICMP, HTTP, etc. you can +also use the [dummy](10-icinga-template-library.md#itl-dummy) CheckCommand to set a default state. + +``` +object Host "dummy-host" { + check_command = "dummy" + vars.dummy_state = 0 //Up + vars.dummy_text = "Everything OK." +} +``` + +This method is also used when you send in [external check results](08-advanced-topics.md#external-check-results). + +A more advanced technique is to calculate an overall state +based on all services. This is described [here](08-advanced-topics.md#access-object-attributes-at-runtime-cluster-check). + + +## Templates + +Templates may be used to apply a set of identical attributes to more than one +object: + +``` +template Service "generic-service" { + max_check_attempts = 3 + check_interval = 5m + retry_interval = 1m + enable_perfdata = true +} + +apply Service "ping4" { + import "generic-service" + + check_command = "ping4" + + assign where host.address +} + +apply Service "ping6" { + import "generic-service" + + check_command = "ping6" + + assign where host.address6 +} +``` + + +In this example the `ping4` and `ping6` services inherit properties from the +template `generic-service`. + +Objects as well as templates themselves can import an arbitrary number of +other templates. Attributes inherited from a template can be overridden in the +object if necessary. + +You can also import existing non-template objects. + +> **Note** +> +> Templates and objects share the same namespace, i.e. you can't define a template +> that has the same name like an object. + + +### Multiple Templates + +The following example uses [custom variables](03-monitoring-basics.md#custom-variables) which +are provided in each template. The `web-server` template is used as the +base template for any host providing web services. In addition to that it +specifies the custom variable `webserver_type`, e.g. `apache`. Since this +template is also the base template, we import the `generic-host` template here. +This provides the `check_command` attribute by default and we don't need +to set it anywhere later on. + +``` +template Host "web-server" { + import "generic-host" + vars = { + webserver_type = "apache" + } +} +``` + +The `wp-server` host template specifies a Wordpress instance and sets +the `application_type` custom variable. Please note the `+=` [operator](17-language-reference.md#dictionary-operators) +which adds [dictionary](17-language-reference.md#dictionary) items, +but does not override any previous `vars` attribute. + +``` +template Host "wp-server" { + vars += { + application_type = "wordpress" + } +} +``` + +The final host object imports both templates. The order is important here: +First the base template `web-server` is added to the object, then additional +attributes are imported from the `wp-server` object. + +``` +object Host "wp.example.com" { + import "web-server" + import "wp-server" + + address = "192.168.56.200" +} +``` + +If you want to override specific attributes inherited from templates, you can +specify them on the host object. + +``` +object Host "wp1.example.com" { + import "web-server" + import "wp-server" + + vars.webserver_type = "nginx" //overrides attribute from base template + + address = "192.168.56.201" +} +``` + + + + +## Custom Variables + +In addition to built-in object attributes you can define your own custom +attributes inside the `vars` attribute. + +> **Tip** +> +> This is called `custom variables` throughout the documentation, backends and web interfaces. +> +> Older documentation versions referred to this as `custom attribute`. + +The following example specifies the key `ssh_port` as custom +variable and assigns an integer value. + +``` +object Host "localhost" { + check_command = "ssh" + vars.ssh_port = 2222 +} +``` + +`vars` is a [dictionary](17-language-reference.md#dictionary) where you +can set specific keys to values. The example above uses the shorter +[indexer](17-language-reference.md#indexer) syntax. + +An alternative representation can be written like this: + +``` + vars = { + ssh_port = 2222 + } +``` + +or + +``` + vars["ssh_port"] = 2222 +``` + +### Custom Variable Values + +Valid values for custom variables include: + +* [Strings](17-language-reference.md#string-literals), [numbers](17-language-reference.md#numeric-literals) and [booleans](17-language-reference.md#boolean-literals) +* [Arrays](17-language-reference.md#array) and [dictionaries](17-language-reference.md#dictionary) +* [Functions](03-monitoring-basics.md#custom-variables-functions) + +You can also define nested values such as dictionaries in dictionaries. + +This example defines the custom variable `disks` as dictionary. +The first key is set to `disk /` is itself set to a dictionary +with one key-value pair. + +``` + vars.disks["disk /"] = { + disk_partitions = "/" + } +``` + +This can be written as resolved structure like this: + +``` + vars = { + disks = { + "disk /" = { + disk_partitions = "/" + } + } + } +``` + +Keep this in mind when trying to access specific sub-keys +in apply rules or functions. + +Another example which is shown in the example configuration: + +``` + vars.notification["mail"] = { + groups = [ "icingaadmins" ] + } +``` + +This defines the `notification` custom variable as dictionary +with the key `mail`. Its value is a dictionary with the key `groups` +which itself has an array as value. Note: This array is the exact +same as the `user_groups` attribute for [notification apply rules](#03-monitoring-basics.md#using-apply-notifications) +expects. + +``` + vars.notification = { + mail = { + groups = [ + "icingaadmins" + ] + } + } +``` + + + + +### Functions as Custom Variables + +Icinga 2 lets you specify [functions](17-language-reference.md#functions) for custom variables. +The special case here is that whenever Icinga 2 needs the value for such a custom variable it runs +the function and uses whatever value the function returns: + +``` +object CheckCommand "random-value" { + command = [ PluginDir + "/check_dummy", "0", "$text$" ] + + vars.text = {{ Math.random() * 100 }} +} +``` + +This example uses the [abbreviated lambda syntax](17-language-reference.md#nullary-lambdas). + +These functions have access to a number of variables: + + Variable | Description + -------------|--------------- + user | The User object (for notifications). + service | The Service object (for service checks/notifications/event handlers). + host | The Host object. + command | The command object (e.g. a CheckCommand object for checks). + +Here's an example: + +``` +vars.text = {{ host.check_interval }} +``` + +In addition to these variables the [macro](18-library-reference.md#scoped-functions-macro) function can be used to retrieve the +value of arbitrary macro expressions: + +``` +vars.text = {{ + if (macro("$address$") == "127.0.0.1") { + log("Running a check for localhost!") + } + + return "Some text" +}} +``` + +The `resolve_arguments` function can be used to resolve a command and its arguments much in +the same fashion Icinga does this for the `command` and `arguments` attributes for +commands. The `by_ssh` command uses this functionality to let users specify a +command and arguments that should be executed via SSH: + +``` +arguments = { + "-C" = {{ + var command = macro("$by_ssh_command$") + var arguments = macro("$by_ssh_arguments$") + + if (typeof(command) == String && !arguments) { + return command + } + + var escaped_args = [] + for (arg in resolve_arguments(command, arguments)) { + escaped_args.add(escape_shell_arg(arg)) + } + return escaped_args.join(" ") + }} + ... +} +``` + +Accessing object attributes at runtime inside these functions is described in the +[advanced topics](08-advanced-topics.md#access-object-attributes-at-runtime) chapter. + + +## Runtime Macros + +Macros can be used to access other objects' attributes and [custom variables](03-monitoring-basics.md#custom-variables) +at runtime. For example they are used in command definitions to figure out +which IP address a check should be run against: + +``` +object CheckCommand "my-ping" { + command = [ PluginDir + "/check_ping" ] + + arguments = { + "-H" = "$ping_address$" + "-w" = "$ping_wrta$,$ping_wpl$%" + "-c" = "$ping_crta$,$ping_cpl$%" + "-p" = "$ping_packets$" + } + + // Resolve from a host attribute, or custom variable. + vars.ping_address = "$address$" + + // Default values + vars.ping_wrta = 100 + vars.ping_wpl = 5 + + vars.ping_crta = 250 + vars.ping_cpl = 10 + + vars.ping_packets = 5 +} + +object Host "router" { + check_command = "my-ping" + address = "10.0.0.1" +} +``` + +In this example we are using the `$address$` macro to refer to the host's `address` +attribute. + +We can also directly refer to custom variables, e.g. by using `$ping_wrta$`. Icinga +automatically tries to find the closest match for the attribute you specified. The +exact rules for this are explained in the next section. + +> **Note** +> +> When using the `$` sign as single character you must escape it with an +> additional dollar character (`$$`). + + +### Evaluation Order + +When executing commands Icinga 2 checks the following objects in this order to look +up macros and their respective values: + +1. User object (only for notifications) +2. Service object +3. Host object +4. Command object +5. Global custom variables in the `Vars` constant + +This execution order allows you to define default values for custom variables +in your command objects. + +Here's how you can override the custom variable `ping_packets` from the previous +example: + +``` +object Service "ping" { + host_name = "localhost" + check_command = "my-ping" + + vars.ping_packets = 10 // Overrides the default value of 5 given in the command +} +``` + +If a custom variable isn't defined anywhere, an empty value is used and a warning is +written to the Icinga 2 log. + +You can also directly refer to a specific attribute -- thereby ignoring these evaluation +rules -- by specifying the full attribute name: + +``` +$service.vars.ping_wrta$ +``` + +This retrieves the value of the `ping_wrta` custom variable for the service. This +returns an empty value if the service does not have such a custom variable no matter +whether another object such as the host has this attribute. + + +### Host Runtime Macros + +The following host custom variables are available in all commands that are executed for +hosts or services: + + Name | Description + -----------------------------|-------------- + host.name | The name of the host object. + host.display\_name | The value of the `display_name` attribute. + host.state | The host's current state. Can be one of `UNREACHABLE`, `UP` and `DOWN`. + host.state\_id | The host's current state. Can be one of `0` (up), `1` (down) and `2` (unreachable). + host.state\_type | The host's current state type. Can be one of `SOFT` and `HARD`. + host.check\_attempt | The current check attempt number. + host.max\_check\_attempts | The maximum number of checks which are executed before changing to a hard state. + host.last\_state | The host's previous state. Can be one of `UNREACHABLE`, `UP` and `DOWN`. + host.last\_state\_id | The host's previous state. Can be one of `0` (up), `1` (down) and `2` (unreachable). + host.last\_state\_type | The host's previous state type. Can be one of `SOFT` and `HARD`. + host.last\_state\_change | The last state change's timestamp. + host.downtime\_depth | The number of active downtimes. + host.duration\_sec | The time since the last state change. + host.latency | The host's check latency. + host.execution\_time | The host's check execution time. + host.output | The last check's output. + host.perfdata | The last check's performance data. + host.last\_check | The timestamp when the last check was executed. + host.check\_source | The monitoring instance that performed the last check. + host.num\_services | Number of services associated with the host. + host.num\_services\_ok | Number of services associated with the host which are in an `OK` state. + host.num\_services\_warning | Number of services associated with the host which are in a `WARNING` state. + host.num\_services\_unknown | Number of services associated with the host which are in an `UNKNOWN` state. + host.num\_services\_critical | Number of services associated with the host which are in a `CRITICAL` state. + +In addition to these specific runtime macros [host object](09-object-types.md#objecttype-host) +attributes can be accessed too. + +### Service Runtime Macros + +The following service macros are available in all commands that are executed for +services: + + Name | Description + -----------------------------|-------------- + service.name | The short name of the service object. + service.display\_name | The value of the `display_name` attribute. + service.check\_command | The short name of the command along with any arguments to be used for the check. + service.state | The service's current state. Can be one of `OK`, `WARNING`, `CRITICAL` and `UNKNOWN`. + service.state\_id | The service's current state. Can be one of `0` (ok), `1` (warning), `2` (critical) and `3` (unknown). + service.state\_type | The service's current state type. Can be one of `SOFT` and `HARD`. + service.check\_attempt | The current check attempt number. + service.max\_check\_attempts | The maximum number of checks which are executed before changing to a hard state. + service.last\_state | The service's previous state. Can be one of `OK`, `WARNING`, `CRITICAL` and `UNKNOWN`. + service.last\_state\_id | The service's previous state. Can be one of `0` (ok), `1` (warning), `2` (critical) and `3` (unknown). + service.last\_state\_type | The service's previous state type. Can be one of `SOFT` and `HARD`. + service.last\_state\_change | The last state change's timestamp. + service.downtime\_depth | The number of active downtimes. + service.duration\_sec | The time since the last state change. + service.latency | The service's check latency. + service.execution\_time | The service's check execution time. + service.output | The last check's output. + service.perfdata | The last check's performance data. + service.last\_check | The timestamp when the last check was executed. + service.check\_source | The monitoring instance that performed the last check. + +In addition to these specific runtime macros [service object](09-object-types.md#objecttype-service) +attributes can be accessed too. + +### Command Runtime Macros + +The following custom variables are available in all commands: + + Name | Description + -----------------------|-------------- + command.name | The name of the command object. + +### User Runtime Macros + +The following custom variables are available in all commands that are executed for +users: + + Name | Description + -----------------------|-------------- + user.name | The name of the user object. + user.display\_name | The value of the `display_name` attribute. + +In addition to these specific runtime macros [user object](09-object-types.md#objecttype-user) +attributes can be accessed too. + +### Notification Runtime Macros + + Name | Description + -----------------------|-------------- + notification.type | The type of the notification. + notification.author | The author of the notification comment if existing. + notification.comment | The comment of the notification if existing. + +In addition to these specific runtime macros [notification object](09-object-types.md#objecttype-notification) +attributes can be accessed too. + +### Global Runtime Macros + +The following macros are available in all executed commands: + + Name | Description + -------------------------|-------------- + icinga.timet | Current UNIX timestamp. + icinga.long\_date\_time | Current date and time including timezone information. Example: `2014-01-03 11:23:08 +0000` + icinga.short\_date\_time | Current date and time. Example: `2014-01-03 11:23:08` + icinga.date | Current date. Example: `2014-01-03` + icinga.time | Current time including timezone information. Example: `11:23:08 +0000` + icinga.uptime | Current uptime of the Icinga 2 process. + +The following macros provide global statistics: + + Name | Description + ------------------------------------|------------------------------------ + icinga.num\_services\_ok | Current number of services in state 'OK'. + icinga.num\_services\_warning | Current number of services in state 'Warning'. + icinga.num\_services\_critical | Current number of services in state 'Critical'. + icinga.num\_services\_unknown | Current number of services in state 'Unknown'. + icinga.num\_services\_pending | Current number of pending services. + icinga.num\_services\_unreachable | Current number of unreachable services. + icinga.num\_services\_flapping | Current number of flapping services. + icinga.num\_services\_in\_downtime | Current number of services in downtime. + icinga.num\_services\_acknowledged | Current number of acknowledged service problems. + icinga.num\_hosts\_up | Current number of hosts in state 'Up'. + icinga.num\_hosts\_down | Current number of hosts in state 'Down'. + icinga.num\_hosts\_unreachable | Current number of unreachable hosts. + icinga.num\_hosts\_pending | Current number of pending hosts. + icinga.num\_hosts\_flapping | Current number of flapping hosts. + icinga.num\_hosts\_in\_downtime | Current number of hosts in downtime. + icinga.num\_hosts\_acknowledged | Current number of acknowledged host problems. + + +## Apply Rules + +Several object types require an object relation, e.g. [Service](09-object-types.md#objecttype-service), +[Notification](09-object-types.md#objecttype-notification), [Dependency](09-object-types.md#objecttype-dependency), +[ScheduledDowntime](09-object-types.md#objecttype-scheduleddowntime) objects. The +object relations are documented in the linked chapters. + +If you for example create a service object you have to specify the [host_name](09-object-types.md#objecttype-service) +attribute and reference an existing host attribute. + +``` +object Service "ping4" { + check_command = "ping4" + host_name = "icinga2-agent1.localdomain" +} +``` + +This isn't comfortable when managing a huge set of configuration objects which could +[match](03-monitoring-basics.md#using-apply-expressions) on a common pattern. + +Instead you want to use **[apply](17-language-reference.md#apply) rules**. + +If you want basic monitoring for all your hosts, add a `ping4` service apply rule +for all hosts which have the `address` attribute specified. Just one rule for 1000 hosts +instead of 1000 service objects. Apply rules will automatically generate them for you. + +``` +apply Service "ping4" { + check_command = "ping4" + assign where host.address +} +``` + +More explanations on assign where expressions can be found [here](03-monitoring-basics.md#using-apply-expressions). + +### Apply Rules: Prerequisites + +Before you start with apply rules keep the following in mind: + +* Define the best match. + * A set of unique [custom variables](03-monitoring-basics.md#custom-variables) for these hosts/services? + * Or [group](03-monitoring-basics.md#groups) memberships, e.g. a host being a member of a hostgroup which should have a service set? + * A generic pattern [match](18-library-reference.md#global-functions-match) on the host/service name? + * [Multiple expressions combined](03-monitoring-basics.md#using-apply-expressions) with `&&` or `||` [operators](17-language-reference.md#expression-operators) +* All expressions must return a boolean value (an empty string is equal to `false` e.g.) + +More specific object type requirements are described in these chapters: + +* [Apply services to hosts](03-monitoring-basics.md#using-apply-services) +* [Apply notifications to hosts and services](03-monitoring-basics.md#using-apply-notifications) +* [Apply dependencies to hosts and services](03-monitoring-basics.md#using-apply-dependencies) +* [Apply scheduled downtimes to hosts and services](03-monitoring-basics.md#using-apply-scheduledowntimes) + +### Apply Rules: Usage Examples + +You can set/override object attributes in apply rules using the respectively available +objects in that scope (host and/or service objects). + +``` +vars.application_type = host.vars.application_type +``` + +[Custom variables](03-monitoring-basics.md#custom-variables) can also store +nested dictionaries and arrays. That way you can use them for not only matching +for their existence or values in apply expressions, but also assign +("inherit") their values into the generated objected from apply rules. + +Remember the examples shown for [custom variable values](03-monitoring-basics.md#custom-variables-values): + +``` + vars.notification["mail"] = { + groups = [ "icingaadmins" ] + } +``` + +You can do two things here: + +* Check for the existence of the `notification` custom variable and its nested dictionary key `mail`. +If this is boolean true, the notification object will be generated. +* Assign the value of the `groups` key to the `user_groups` attribute. + +``` +apply Notification "mail-icingaadmin" to Host { + [...] + + user_groups = host.vars.notification.mail.groups + + assign where host.vars.notification.mail +} + +``` + +A more advanced example is to use [apply rules with for loops on arrays or +dictionaries](03-monitoring-basics.md#using-apply-for) provided by +[custom atttributes](03-monitoring-basics.md#custom-variables) or groups. + +Remember the examples shown for [custom variable values](03-monitoring-basics.md#custom-variables-values): + +``` + vars.disks["disk /"] = { + disk_partitions = "/" + } +``` + +You can iterate over all dictionary keys defined in `disks`. +You can optionally use the value to specify additional object attributes. + +``` +apply Service for (disk => config in host.vars.disks) { + [...] + + vars.disk_partitions = config.disk_partitions +} +``` + +Please read the [apply for chapter](03-monitoring-basics.md#using-apply-for) +for more specific insights. + + +> **Tip** +> +> Building configuration in that dynamic way requires detailed information +> of the generated objects. Use the `object list` [CLI command](11-cli-commands.md#cli-command-object) +> after successful [configuration validation](11-cli-commands.md#config-validation). + + +### Apply Rules Expressions + +You can use simple or advanced combinations of apply rule expressions. Each +expression must evaluate into the boolean `true` value. An empty string +will be for instance interpreted as `false`. In a similar fashion undefined +attributes will return `false`. + +Returns `false`: + +``` +assign where host.vars.attribute_does_not_exist +``` + +Multiple `assign where` condition rows are evaluated as `OR` condition. + +You can combine multiple expressions for matching only a subset of objects. In some cases, +you want to be able to add more than one assign/ignore where expression which matches +a specific condition. To achieve this you can use the logical `and` and `or` operators. + +#### Apply Rules Expressions Examples + +Assign a service to a specific host in a host group [array](18-library-reference.md#array-type) using the [in operator](17-language-reference.md#expression-operators): + +``` +assign where "hostgroup-dev" in host.groups +``` + +Assign an object when a custom variable is [equal](17-language-reference.md#expression-operators) to a value: + +``` +assign where host.vars.application_type == "database" + +assign where service.vars.sms_notify == true +``` + +Assign an object if a dictionary [contains](18-library-reference.md#dictionary-contains) a given key: + +``` +assign where host.vars.app_dict.contains("app") +``` + +Match the host name by either using a [case insensitive match](18-library-reference.md#global-functions-match): + +``` +assign where match("webserver*", host.name) +``` + +Match the host name by using a [regular expression](18-library-reference.md#global-functions-regex). Please note the [escaped](17-language-reference.md#string-literals-escape-sequences) backslash character: + +``` +assign where regex("^webserver-[\\d+]", host.name) +``` + +[Match](18-library-reference.md#global-functions-match) all `*mysql*` patterns in the host name and (`&&`) custom variable `prod_mysql_db` +matches the `db-*` pattern. All hosts with the custom variable `test_server` set to `true` +should be ignored, or any host name ending with `*internal` pattern. + +``` +object HostGroup "mysql-server" { + display_name = "MySQL Server" + + assign where match("*mysql*", host.name) && match("db-*", host.vars.prod_mysql_db) + ignore where host.vars.test_server == true + ignore where match("*internal", host.name) +} +``` + +Similar example for advanced notification apply rule filters: If the service +attribute `notes` [matches](18-library-reference.md#global-functions-match) the `has gold support 24x7` string `AND` one of the +two condition passes, either the `customer` host custom variable is set to `customer-xy` +`OR` the host custom variable `always_notify` is set to `true`. + +The notification is ignored for services whose host name ends with `*internal` +`OR` the `priority` custom variable is [less than](17-language-reference.md#expression-operators) `2`. + +``` +template Notification "cust-xy-notification" { + users = [ "noc-xy", "mgmt-xy" ] + command = "mail-service-notification" +} + +apply Notification "notify-cust-xy-mysql" to Service { + import "cust-xy-notification" + + assign where match("*has gold support 24x7*", service.notes) && (host.vars.customer == "customer-xy" || host.vars.always_notify == true) + ignore where match("*internal", host.name) || (service.vars.priority < 2 && host.vars.is_clustered == true) +} +``` + +More advanced examples are covered [here](08-advanced-topics.md#use-functions-assign-where). + +### Apply Services to Hosts + +The sample configuration already includes a detailed example in [hosts.conf](04-configuration.md#hosts-conf) +and [services.conf](04-configuration.md#services-conf) for this use case. + +The example for `ssh` applies a service object to all hosts with the `address` +attribute being defined and the custom variable `os` set to the string `Linux` in `vars`. + +``` +apply Service "ssh" { + import "generic-service" + + check_command = "ssh" + + assign where host.address && host.vars.os == "Linux" +} +``` + +Other detailed examples are used in their respective chapters, for example +[apply services with custom command arguments](03-monitoring-basics.md#command-passing-parameters). + +### Apply Notifications to Hosts and Services + +Notifications are applied to specific targets (`Host` or `Service`) and work in a similar +manner: + +``` +apply Notification "mail-noc" to Service { + import "mail-service-notification" + + user_groups = [ "noc" ] + + assign where host.vars.notification.mail +} +``` + +In this example the `mail-noc` notification will be created as object for all services having the +`notification.mail` custom variable defined. The notification command is set to `mail-service-notification` +and all members of the user group `noc` will get notified. + +It is also possible to generally apply a notification template and dynamically overwrite values from +the template by checking for custom variables. This can be achieved by using [conditional statements](17-language-reference.md#conditional-statements): + +``` +apply Notification "host-mail-noc" to Host { + import "mail-host-notification" + + // replace interval inherited from `mail-host-notification` template with new notfication interval set by a host custom variable + if (host.vars.notification_interval) { + interval = host.vars.notification_interval + } + + // same with notification period + if (host.vars.notification_period) { + period = host.vars.notification_period + } + + // Send SMS instead of email if the host's custom variable `notification_type` is set to `sms` + if (host.vars.notification_type == "sms") { + command = "sms-host-notification" + } else { + command = "mail-host-notification" + } + + user_groups = [ "noc" ] + + assign where host.address +} +``` + +In the example above the notification template `mail-host-notification` +contains all relevant notification settings. +The apply rule is applied on all host objects where the `host.address` is defined. + +If the host object has a specific custom variable set, its value is inherited +into the local notification object scope, e.g. `host.vars.notification_interval`, +`host.vars.notification_period` and `host.vars.notification_type`. +This overwrites attributes already specified in the imported `mail-host-notification` +template. + +The corresponding host object could look like this: + +``` +object Host "host1" { + import "host-linux-prod" + display_name = "host1" + address = "192.168.1.50" + vars.notification_interval = 1h + vars.notification_period = "24x7" + vars.notification_type = "sms" +} +``` + +### Apply Dependencies to Hosts and Services + +Detailed examples can be found in the [dependencies](03-monitoring-basics.md#dependencies) chapter. + +### Apply Recurring Downtimes to Hosts and Services + +The sample configuration includes an example in [downtimes.conf](04-configuration.md#downtimes-conf). + +Detailed examples can be found in the [recurring downtimes](08-advanced-topics.md#recurring-downtimes) chapter. + + +### Using Apply For Rules + +Next to the standard way of using [apply rules](03-monitoring-basics.md#using-apply) +there is the requirement of applying objects based on a set (array or +dictionary) using [apply for](17-language-reference.md#apply-for) expressions. + +The sample configuration already includes a detailed example in [hosts.conf](04-configuration.md#hosts-conf) +and [services.conf](04-configuration.md#services-conf) for this use case. + +Take the following example: A host provides the snmp oids for different service check +types. This could look like the following example: + +``` +object Host "router-v6" { + check_command = "hostalive" + address6 = "2001:db8:1234::42" + + vars.oids["if01"] = "1.1.1.1.1" + vars.oids["temp"] = "1.1.1.1.2" + vars.oids["bgp"] = "1.1.1.1.5" +} +``` + +The idea is to create service objects for `if01` and `temp` but not `bgp`. +The oid value should also be used as service custom variable `snmp_oid`. +This is the command argument required by the [snmp](10-icinga-template-library.md#plugin-check-command-snmp) +check command. +The service's `display_name` should be set to the identifier inside the dictionary, +e.g. `if01`. + +``` +apply Service for (identifier => oid in host.vars.oids) { + check_command = "snmp" + display_name = identifier + vars.snmp_oid = oid + + ignore where identifier == "bgp" //don't generate service for bgp checks +} +``` + +Icinga 2 evaluates the `apply for` rule for all objects with the custom variable +`oids` set. +It iterates over all dictionary items inside the `for` loop and evaluates the +`assign/ignore where` expressions. You can access the loop variable +in these expressions, e.g. to ignore specific values. + +In this example the `bgp` identifier is ignored. This avoids to generate +unwanted services. A different approach would be to match the `oid` value with a +[regex](18-library-reference.md#global-functions-regex)/[wildcard match](18-library-reference.md#global-functions-match) pattern for example. + +``` + ignore where regex("^\d.\d.\d.\d.5$", oid) +``` + +> **Note** +> +> You don't need an `assign where` expression which checks for the existence of the +> `oids` custom variable. + +This method saves you from creating multiple apply rules. It also moves +the attribute specification logic from the service to the host. + + + + +#### Apply For and Custom Variable Override + +Imagine a different more advanced example: You are monitoring your network device (host) +with many interfaces (services). The following requirements/problems apply: + +* Each interface service should be named with a prefix and a name defined in your host object (which could be generated from your CMDB, etc.) +* Each interface has its own VLAN tag +* Some interfaces have QoS enabled +* Additional attributes such as `display_name` or `notes`, `notes_url` and `action_url` must be +dynamically generated. + + +> **Tip** +> +> Define the SNMP community as global constant in your [constants.conf](04-configuration.md#constants-conf) file. + +``` +const IftrafficSnmpCommunity = "public" +``` + +Define the `interfaces` [custom variable](03-monitoring-basics.md#custom-variables) +on the `cisco-catalyst-6509-34` host object and add three example interfaces as dictionary keys. + +Specify additional attributes inside the nested dictionary +as learned with [custom variable values](03-monitoring-basics.md#custom-variables-values): + +``` +object Host "cisco-catalyst-6509-34" { + import "generic-host" + display_name = "Catalyst 6509 #34 VIE21" + address = "127.0.1.4" + + /* "GigabitEthernet0/2" is the interface name, + * and key name in service apply for later on + */ + vars.interfaces["GigabitEthernet0/2"] = { + /* define all custom variables with the + * same name required for command parameters/arguments + * in service apply (look into your CheckCommand definition) + */ + iftraffic_units = "g" + iftraffic_community = IftrafficSnmpCommunity + iftraffic_bandwidth = 1 + vlan = "internal" + qos = "disabled" + } + vars.interfaces["GigabitEthernet0/4"] = { + iftraffic_units = "g" + //iftraffic_community = IftrafficSnmpCommunity + iftraffic_bandwidth = 1 + vlan = "remote" + qos = "enabled" + } + vars.interfaces["MgmtInterface1"] = { + iftraffic_community = IftrafficSnmpCommunity + vlan = "mgmt" + interface_address = "127.99.0.100" #special management ip + } +} +``` + +Start with the apply for definition and iterate over `host.vars.interfaces`. +This is a dictionary and should use the variables `interface_name` as key +and `interface_config` as value for each generated object scope. + +`"if-"` specifies the object name prefix for each service which results +in `if-` for each iteration. + +``` +/* loop over the host.vars.interfaces dictionary + * for (key => value in dict) means `interface_name` as key + * and `interface_config` as value. Access config attributes + * with the indexer (`.`) character. + */ +apply Service "if-" for (interface_name => interface_config in host.vars.interfaces) { +``` + +Import the `generic-service` template, assign the [iftraffic](10-icinga-template-library.md#plugin-contrib-command-iftraffic) +`check_command`. Use the dictionary key `interface_name` to set a proper `display_name` +string for external interfaces. + +``` + import "generic-service" + check_command = "iftraffic" + display_name = "IF-" + interface_name +``` + +The `interface_name` key's value is the same string used as command parameter for +`iftraffic`: + +``` + /* use the key as command argument (no duplication of values in host.vars.interfaces) */ + vars.iftraffic_interface = interface_name +``` + +Remember that `interface_config` is a nested dictionary. In the first iteration it looks +like this: + +``` +interface_config = { + iftraffic_units = "g" + iftraffic_community = IftrafficSnmpCommunity + iftraffic_bandwidth = 1 + vlan = "internal" + qos = "disabled" +} +``` + +Access the dictionary keys with the [indexer](17-language-reference.md#indexer) syntax +and assign them to custom variables used as command parameters for the `iftraffic` +check command. + +``` + /* map the custom variables as command arguments */ + vars.iftraffic_units = interface_config.iftraffic_units + vars.iftraffic_community = interface_config.iftraffic_community +``` + +If you just want to inherit all attributes specified inside the `interface_config` +dictionary, add it to the generated service custom variables like this: + +``` + /* the above can be achieved in a shorter fashion if the names inside host.vars.interfaces + * are the _exact_ same as required as command parameter by the check command + * definition. + */ + vars += interface_config +``` + +If the user did not specify default values for required service custom variables, +add them here. This also helps to avoid unwanted configuration validation errors or +runtime failures. Please read more about conditional statements [here](17-language-reference.md#conditional-statements). + +``` + /* set a default value for units and bandwidth */ + if (interface_config.iftraffic_units == "") { + vars.iftraffic_units = "m" + } + if (interface_config.iftraffic_bandwidth == "") { + vars.iftraffic_bandwidth = 1 + } + if (interface_config.vlan == "") { + vars.vlan = "not set" + } + if (interface_config.qos == "") { + vars.qos = "not set" + } +``` + +If the host object did not specify a custom SNMP community, +set a default value specified by the [global constant](17-language-reference.md#constants) `IftrafficSnmpCommunity`. + +``` + /* set the global constant if not explicitely + * not provided by the `interfaces` dictionary on the host + */ + if (len(interface_config.iftraffic_community) == 0 || len(vars.iftraffic_community) == 0) { + vars.iftraffic_community = IftrafficSnmpCommunity + } +``` + +Use the provided values to [calculate](17-language-reference.md#expression-operators) +more object attributes which can be e.g. seen in external interfaces. + +``` + /* Calculate some additional object attributes after populating the `vars` dictionary */ + notes = "Interface check for " + interface_name + " (units: '" + interface_config.iftraffic_units + "') in VLAN '" + vars.vlan + "' with ' QoS '" + vars.qos + "'" + notes_url = "https://foreman.company.com/hosts/" + host.name + action_url = "https://snmp.checker.company.com/" + host.name + "/if-" + interface_name +} +``` + +> **Tip** +> +> Building configuration in that dynamic way requires detailed information +> of the generated objects. Use the `object list` [CLI command](11-cli-commands.md#cli-command-object) +> after successful [configuration validation](11-cli-commands.md#config-validation). + +Verify that the apply-for-rule successfully created the service objects with the +inherited custom variables: + +``` +# icinga2 daemon -C +# icinga2 object list --type Service --name *catalyst* + +Object 'cisco-catalyst-6509-34!if-GigabitEthernet0/2' of type 'Service': +...... + * vars + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 59:3-59:26 + * iftraffic_bandwidth = 1 + * iftraffic_community = "public" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 53:3-53:65 + * iftraffic_interface = "GigabitEthernet0/2" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 49:3-49:43 + * iftraffic_units = "g" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 52:3-52:57 + * qos = "disabled" + * vlan = "internal" + + +Object 'cisco-catalyst-6509-34!if-GigabitEthernet0/4' of type 'Service': +... + * vars + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 59:3-59:26 + * iftraffic_bandwidth = 1 + * iftraffic_community = "public" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 53:3-53:65 + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 79:5-79:53 + * iftraffic_interface = "GigabitEthernet0/4" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 49:3-49:43 + * iftraffic_units = "g" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 52:3-52:57 + * qos = "enabled" + * vlan = "remote" + +Object 'cisco-catalyst-6509-34!if-MgmtInterface1' of type 'Service': +... + * vars + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 59:3-59:26 + * iftraffic_bandwidth = 1 + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 66:5-66:32 + * iftraffic_community = "public" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 53:3-53:65 + * iftraffic_interface = "MgmtInterface1" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 49:3-49:43 + * iftraffic_units = "m" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 52:3-52:57 + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 63:5-63:30 + * interface_address = "127.99.0.100" + * qos = "not set" + % = modified in '/etc/icinga2/conf.d/iftraffic.conf', lines 72:5-72:24 + * vlan = "mgmt" +``` + +### Use Object Attributes in Apply Rules + +Since apply rules are evaluated after the generic objects, you +can reference existing host and/or service object attributes as +values for any object attribute specified in that apply rule. + +``` +object Host "opennebula-host" { + import "generic-host" + address = "10.1.1.2" + + vars.hosting["cust1"] = { + http_uri = "/shop" + customer_name = "Customer 1" + customer_id = "7568" + support_contract = "gold" + } + vars.hosting["cust2"] = { + http_uri = "/" + customer_name = "Customer 2" + customer_id = "7569" + support_contract = "silver" + } +} +``` + +`hosting` is a custom variable with the Dictionary value type. +This is mandatory to iterate with the `key => value` notation +in the below apply for rule. + +``` +apply Service for (customer => config in host.vars.hosting) { + import "generic-service" + check_command = "ping4" + + vars.qos = "disabled" + + vars += config + + vars.http_uri = "/" + customer + "/" + config.http_uri + + display_name = "Shop Check for " + vars.customer_name + "-" + vars.customer_id + + notes = "Support contract: " + vars.support_contract + " for Customer " + vars.customer_name + " (" + vars.customer_id + ")." + + notes_url = "https://foreman.company.com/hosts/" + host.name + action_url = "https://snmp.checker.company.com/" + host.name + "/" + vars.customer_id +} +``` + +Each loop iteration has different values for `customer` and config` +in the local scope. + +1. + +``` +customer = "cust 1" +config = { + http_uri = "/shop" + customer_name = "Customer 1" + customer_id = "7568" + support_contract = "gold" +} +``` + +2. + +``` +customer = "cust2" +config = { + http_uri = "/" + customer_name = "Customer 2" + customer_id = "7569" + support_contract = "silver" +} +``` + +You can now add the `config` dictionary into `vars`. + +``` +vars += config +``` + +Now it looks like the following in the first iteration: + +``` +customer = "cust 1" +vars = { + http_uri = "/shop" + customer_name = "Customer 1" + customer_id = "7568" + support_contract = "gold" +} +``` + +Remember, you know this structure already. Custom +attributes can also be accessed by using the [indexer](17-language-reference.md#indexer) +syntax. + +``` + vars.http_uri = ... + config.http_uri +``` + +can also be written as + +``` + vars += config + vars.http_uri = ... + vars.http_uri +``` + + +## Groups + +A group is a collection of similar objects. Groups are primarily used as a +visualization aid in web interfaces. + +Group membership is defined at the respective object itself. If +you have a hostgroup name `windows` for example, and want to assign +specific hosts to this group for later viewing the group on your +alert dashboard, first create a HostGroup object: + +``` +object HostGroup "windows" { + display_name = "Windows Servers" +} +``` + +Then add your hosts to this group: + +``` +template Host "windows-server" { + groups += [ "windows" ] +} + +object Host "mssql-srv1" { + import "windows-server" + + vars.mssql_port = 1433 +} + +object Host "mssql-srv2" { + import "windows-server" + + vars.mssql_port = 1433 +} +``` + +This can be done for service and user groups the same way: + +``` +object UserGroup "windows-mssql-admins" { + display_name = "Windows MSSQL Admins" +} + +template User "generic-windows-mssql-users" { + groups += [ "windows-mssql-admins" ] +} + +object User "win-mssql-noc" { + import "generic-windows-mssql-users" + + email = "noc@example.com" +} + +object User "win-mssql-ops" { + import "generic-windows-mssql-users" + + email = "ops@example.com" +} +``` + +### Group Membership Assign + +Instead of manually assigning each object to a group you can also assign objects +to a group based on their attributes: + +``` +object HostGroup "prod-mssql" { + display_name = "Production MSSQL Servers" + + assign where host.vars.mssql_port && host.vars.prod_mysql_db + ignore where host.vars.test_server == true + ignore where match("*internal", host.name) +} +``` + +In this example all hosts with the `vars` attribute `mssql_port` +will be added as members to the host group `mssql`. However, all +hosts [matching](18-library-reference.md#global-functions-match) the string `\*internal` +or with the `test_server` attribute set to `true` are **not** added to this group. + +Details on the `assign where` syntax can be found in the +[Language Reference](17-language-reference.md#apply). + +## Notifications + +Notifications for service and host problems are an integral part of your +monitoring setup. + +When a host or service is in a downtime, a problem has been acknowledged or +the dependency logic determined that the host/service is unreachable, no +notifications are sent. You can configure additional type and state filters +refining the notifications being actually sent. + +There are many ways of sending notifications, e.g. by email, XMPP, +IRC, Twitter, etc. On its own Icinga 2 does not know how to send notifications. +Instead it relies on external mechanisms such as shell scripts to notify users. +More notification methods are listed in the [addons and plugins](13-addons.md#notification-scripts-interfaces) +chapter. + +A notification specification requires one or more users (and/or user groups) +who will be notified in case of problems. These users must have all custom +attributes defined which will be used in the `NotificationCommand` on execution. + +The user `icingaadmin` in the example below will get notified only on `Warning` and +`Critical` problems. In addition to that `Recovery` notifications are sent (they require +the `OK` state). + +``` +object User "icingaadmin" { + display_name = "Icinga 2 Admin" + enable_notifications = true + states = [ OK, Warning, Critical ] + types = [ Problem, Recovery ] + email = "icinga@localhost" +} +``` + +If you don't set the `states` and `types` configuration attributes for the `User` +object, notifications for all states and types will be sent. + +Details on troubleshooting notification problems can be found [here](15-troubleshooting.md#troubleshooting). + +> **Note** +> +> Make sure that the [notification](11-cli-commands.md#enable-features) feature is enabled +> in order to execute notification commands. + +You should choose which information you (and your notified users) are interested in +case of emergency, and also which information does not provide any value to you and +your environment. + +An example notification command is explained [here](03-monitoring-basics.md#notification-commands). + +You can add all shared attributes to a `Notification` template which is inherited +to the defined notifications. That way you'll save duplicated attributes in each +`Notification` object. Attributes can be overridden locally. + +``` +template Notification "generic-notification" { + interval = 15m + + command = "mail-service-notification" + + states = [ Warning, Critical, Unknown ] + types = [ Problem, Acknowledgement, Recovery, Custom, FlappingStart, + FlappingEnd, DowntimeStart, DowntimeEnd, DowntimeRemoved ] + + period = "24x7" +} +``` + +The time period `24x7` is included as example configuration with Icinga 2. + +Use the `apply` keyword to create `Notification` objects for your services: + +``` +apply Notification "notify-cust-xy-mysql" to Service { + import "generic-notification" + + users = [ "noc-xy", "mgmt-xy" ] + + assign where match("*has gold support 24x7*", service.notes) && (host.vars.customer == "customer-xy" || host.vars.always_notify == true + ignore where match("*internal", host.name) || (service.vars.priority < 2 && host.vars.is_clustered == true) +} +``` + + +Instead of assigning users to notifications, you can also add the `user_groups` +attribute with a list of user groups to the `Notification` object. Icinga 2 will +send notifications to all group members. + +> **Note** +> +> Only users who have been notified of a problem before (`Warning`, `Critical`, `Unknown` +states for services, `Down` for hosts) will receive `Recovery` notifications. + +Icinga 2 v2.10 allows you to configure `Acknowledgement` and/or `Recovery` +without a `Problem` notification. These notifications will be sent without +any problem notifications beforehand, and can be used for e.g. ticket systems. + +``` + types = [ Acknowledgement, Recovery ] +``` + +### Notifications: Users from Host/Service + +A common pattern is to store the users and user groups +on the host or service objects instead of the notification +object itself. + +The sample configuration provided in [hosts.conf](04-configuration.md#hosts-conf) and [notifications.conf](notifications-conf) +already provides an example for this question. + +> **Tip** +> +> Please make sure to read the [apply](03-monitoring-basics.md#using-apply) and +> [custom variable values](03-monitoring-basics.md#custom-variables-values) chapter to +> fully understand these examples. + + +Specify the user and groups as nested custom variable on the host object: + +``` +object Host "icinga2-agent1.localdomain" { + [...] + + vars.notification["mail"] = { + groups = [ "icingaadmins" ] + users = [ "icingaadmin" ] + } + vars.notification["sms"] = { + users = [ "icingaadmin" ] + } +} +``` + +As you can see, there is the option to use two different notification +apply rules here: One for `mail` and one for `sms`. + +This example assigns the `users` and `groups` nested keys from the `notification` +custom variable to the actual notification object attributes. + +Since errors are hard to debug if host objects don't specify the required +configuration attributes, you can add a safety condition which logs which +host object is affected. + +``` +critical/config: Host 'icinga2-client3.localdomain' does not specify required user/user_groups configuration attributes for notification 'mail-icingaadmin'. +``` + +You can also use the [script debugger](20-script-debugger.md#script-debugger) for more advanced insights. + +``` +apply Notification "mail-host-notification" to Host { + [...] + + /* Log which host does not specify required user/user_groups attributes. This will fail immediately during config validation and help a lot. */ + if (len(host.vars.notification.mail.users) == 0 && len(host.vars.notification.mail.user_groups) == 0) { + log(LogCritical, "config", "Host '" + host.name + "' does not specify required user/user_groups configuration attributes for notification '" + name + "'.") + } + + users = host.vars.notification.mail.users + user_groups = host.vars.notification.mail.groups + + assign where host.vars.notification.mail && typeof(host.vars.notification.mail) == Dictionary +} + +apply Notification "sms-host-notification" to Host { + [...] + + /* Log which host does not specify required user/user_groups attributes. This will fail immediately during config validation and help a lot. */ + if (len(host.vars.notification.sms.users) == 0 && len(host.vars.notification.sms.user_groups) == 0) { + log(LogCritical, "config", "Host '" + host.name + "' does not specify required user/user_groups configuration attributes for notification '" + name + "'.") + } + + users = host.vars.notification.sms.users + user_groups = host.vars.notification.sms.groups + + assign where host.vars.notification.sms && typeof(host.vars.notification.sms) == Dictionary +} +``` + +The example above uses [typeof](18-library-reference.md#global-functions-typeof) as safety function to ensure that +the `mail` key really provides a dictionary as value. Otherwise +the configuration validation could fail if an admin adds something +like this on another host: + +``` + vars.notification.mail = "yes" +``` + + +You can also do a more fine granular assignment on the service object: + +``` +apply Service "http" { + [...] + + vars.notification["mail"] = { + groups = [ "icingaadmins" ] + users = [ "icingaadmin" ] + } + + [...] +} +``` + +This notification apply rule is different to the one above. The service +notification users and groups are inherited from the service and if not set, +from the host object. A default user is set too. + +``` +apply Notification "mail-service-notification" to Service { + [...] + + if (service.vars.notification.mail.users) { + users = service.vars.notification.mail.users + } else if (host.vars.notification.mail.users) { + users = host.vars.notification.mail.users + } else { + /* Default user who receives everything. */ + users = [ "icingaadmin" ] + } + + if (service.vars.notification.mail.groups) { + user_groups = service.vars.notification.mail.groups + } else if (host.vars.notification.mail.groups) { + user_groups = host.vars.notification.mail.groups + } + + assign where ( host.vars.notification.mail && typeof(host.vars.notification.mail) == Dictionary ) || ( service.vars.notification.mail && typeof(service.vars.notification.mail) == Dictionary ) +} +``` + +### Notification Escalations + +When a problem notification is sent and a problem still exists at the time of re-notification +you may want to escalate the problem to the next support level. A different approach +is to configure the default notification by email, and escalate the problem via SMS +if not already solved. + +You can define notification start and end times as additional configuration +attributes making the `Notification` object a so-called `notification escalation`. +Using templates you can share the basic notification attributes such as users or the +`interval` (and override them for the escalation then). + +Using the example from above, you can define additional users being escalated for SMS +notifications between start and end time. + +``` +object User "icinga-oncall-2nd-level" { + display_name = "Icinga 2nd Level" + + vars.mobile = "+1 555 424642" +} + +object User "icinga-oncall-1st-level" { + display_name = "Icinga 1st Level" + + vars.mobile = "+1 555 424642" +} +``` + +Define an additional [NotificationCommand](03-monitoring-basics.md#notification-commands) for SMS notifications. + +> **Note** +> +> The example is not complete as there are many different SMS providers. +> Please note that sending SMS notifications will require an SMS provider +> or local hardware with an active SIM card. + +``` +object NotificationCommand "sms-notification" { + command = [ + PluginDir + "/send_sms_notification", + "$mobile$", + "..." +} +``` + +The two new notification escalations are added onto the local host +and its service `ping4` using the `generic-notification` template. +The user `icinga-oncall-2nd-level` will get notified by SMS (`sms-notification` +command) after `30m` until `1h`. + +> **Note** +> +> The `interval` was set to 15m in the `generic-notification` +> template example. Lower that value in your escalations by using a secondary +> template or by overriding the attribute directly in the `notifications` array +> position for `escalation-sms-2nd-level`. + +If the problem does not get resolved nor acknowledged preventing further notifications, +the `escalation-sms-1st-level` user will be escalated `1h` after the initial problem was +notified, but only for one hour (`2h` as `end` key for the `times` dictionary). + +``` +apply Notification "mail" to Service { + import "generic-notification" + + command = "mail-notification" + users = [ "icingaadmin" ] + + assign where service.name == "ping4" +} + +apply Notification "escalation-sms-2nd-level" to Service { + import "generic-notification" + + command = "sms-notification" + users = [ "icinga-oncall-2nd-level" ] + + times = { + begin = 30m + end = 1h + } + + assign where service.name == "ping4" +} + +apply Notification "escalation-sms-1st-level" to Service { + import "generic-notification" + + command = "sms-notification" + users = [ "icinga-oncall-1st-level" ] + + times = { + begin = 1h + end = 2h + } + + assign where service.name == "ping4" +} +``` + +### Notification Delay + +Sometimes the problem in question should not be announced when the notification is due +(the object reaching the `HARD` state), but after a certain period. In Icinga 2 +you can use the `times` dictionary and set `begin = 15m` as key and value if you want to +postpone the notification window for 15 minutes. Leave out the `end` key -- if not set, +Icinga 2 will not check against any end time for this notification. Make sure to +specify a relatively low notification `interval` to get notified soon enough again. + +``` +apply Notification "mail" to Service { + import "generic-notification" + + command = "mail-notification" + users = [ "icingaadmin" ] + + interval = 5m + + times.begin = 15m // delay notification window + + assign where service.name == "ping4" +} +``` + +### Disable Re-notifications + +If you prefer to be notified only once, you can disable re-notifications by setting the +`interval` attribute to `0`. + +``` +apply Notification "notify-once" to Service { + import "generic-notification" + + command = "mail-notification" + users = [ "icingaadmin" ] + + interval = 0 // disable re-notification + + assign where service.name == "ping4" +} +``` + +### Notification Filters by State and Type + +If there are no notification state and type filter attributes defined at the `Notification` +or `User` object, Icinga 2 assumes that all states and types are being notified. + +Available state and type filters for notifications are: + +``` +template Notification "generic-notification" { + + states = [ OK, Warning, Critical, Unknown ] + types = [ Problem, Acknowledgement, Recovery, Custom, FlappingStart, + FlappingEnd, DowntimeStart, DowntimeEnd, DowntimeRemoved ] +} +``` + + +## Commands + +Icinga 2 uses three different command object types to specify how +checks should be performed, notifications should be sent, and +events should be handled. + +### Check Commands + +[CheckCommand](09-object-types.md#objecttype-checkcommand) objects define the command line how +a check is called. + +[CheckCommand](09-object-types.md#objecttype-checkcommand) objects are referenced by +[Host](09-object-types.md#objecttype-host) and [Service](09-object-types.md#objecttype-service) objects +using the `check_command` attribute. + +> **Note** +> +> Make sure that the [checker](11-cli-commands.md#enable-features) feature is enabled in order to +> execute checks. + +#### Integrate the Plugin with a CheckCommand Definition + +Unless you have done so already, download your check plugin and put it +into the [PluginDir](04-configuration.md#constants-conf) directory. The following example uses the +`check_mysql` plugin contained in the Monitoring Plugins package. + +The plugin path and all command arguments are made a list of +double-quoted string arguments for proper shell escaping. + +Call the `check_disk` plugin with the `--help` parameter to see +all available options. Our example defines warning (`-w`) and +critical (`-c`) thresholds for the disk usage. Without any +partition defined (`-p`) it will check all local partitions. + +``` +icinga@icinga2 $ /usr/lib64/nagios/plugins/check_mysql --help +... +This program tests connections to a MySQL server + +Usage: +check_mysql [-d database] [-H host] [-P port] [-s socket] +[-u user] [-p password] [-S] [-l] [-a cert] [-k key] +[-C ca-cert] [-D ca-dir] [-L ciphers] [-f optfile] [-g group] +``` + +Next step is to understand how [command parameters](03-monitoring-basics.md#command-passing-parameters) +are being passed from a host or service object, and add a [CheckCommand](09-object-types.md#objecttype-checkcommand) +definition based on these required parameters and/or default values. + +Please continue reading in the [plugins section](05-service-monitoring.md#service-monitoring-plugins) for additional integration examples. + +#### Passing Check Command Parameters from Host or Service + +Check command parameters are defined as custom variables which can be accessed as runtime macros +by the executed check command. + +The check command parameters for ITL provided plugin check command definitions are documented +[here](10-icinga-template-library.md#icinga-template-library), for example +[disk](10-icinga-template-library.md#plugin-check-command-disk). + +In order to practice passing command parameters you should [integrate your own plugin](03-monitoring-basics.md#command-plugin-integration). + +The following example will use `check_mysql` provided by the [Monitoring Plugins](https://www.monitoring-plugins.org/). + +Define the default check command custom variables, for example `mysql_user` and `mysql_password` +(freely definable naming schema) and optional their default threshold values. You can +then use these custom variables as runtime macros for [command arguments](03-monitoring-basics.md#command-arguments) +on the command line. + +> **Tip** +> +> Use a common command type as prefix for your command arguments to increase +> readability. `mysql_user` helps understanding the context better than just +> `user` as argument. + +The default custom variables can be overridden by the custom variables +defined in the host or service using the check command `my-mysql`. The custom variables +can also be inherited from a parent template using additive inheritance (`+=`). + +``` +# vim /etc/icinga2/conf.d/commands.conf + +object CheckCommand "my-mysql" { + command = [ PluginDir + "/check_mysql" ] //constants.conf -> const PluginDir + + arguments = { + "-H" = "$mysql_host$" + "-u" = { + required = true + value = "$mysql_user$" + } + "-p" = "$mysql_password$" + "-P" = "$mysql_port$" + "-s" = "$mysql_socket$" + "-a" = "$mysql_cert$" + "-d" = "$mysql_database$" + "-k" = "$mysql_key$" + "-C" = "$mysql_ca_cert$" + "-D" = "$mysql_ca_dir$" + "-L" = "$mysql_ciphers$" + "-f" = "$mysql_optfile$" + "-g" = "$mysql_group$" + "-S" = { + set_if = "$mysql_check_slave$" + description = "Check if the slave thread is running properly." + } + "-l" = { + set_if = "$mysql_ssl$" + description = "Use ssl encryption" + } + } + + vars.mysql_check_slave = false + vars.mysql_ssl = false + vars.mysql_host = "$address$" +} +``` + +The check command definition also sets `mysql_host` to the `$address$` default value. You can override +this command parameter if for example your MySQL host is not running on the same server's ip address. + +Make sure pass all required command parameters, such as `mysql_user`, `mysql_password` and `mysql_database`. +`MysqlUsername` and `MysqlPassword` are specified as [global constants](04-configuration.md#constants-conf) +in this example. + +``` +# vim /etc/icinga2/conf.d/services.conf + +apply Service "mysql-icinga-db-health" { + import "generic-service" + + check_command = "my-mysql" + + vars.mysql_user = MysqlUsername + vars.mysql_password = MysqlPassword + + vars.mysql_database = "icinga" + vars.mysql_host = "192.168.33.11" + + assign where match("icinga2*", host.name) + ignore where host.vars.no_health_check == true +} +``` + + +Take a different example: The example host configuration in [hosts.conf](04-configuration.md#hosts-conf) +also applies an `ssh` service check. Your host's ssh port is not the default `22`, but set to `2022`. +You can pass the command parameter as custom variable `ssh_port` directly inside the service apply rule +inside [services.conf](04-configuration.md#services-conf): + +``` +apply Service "ssh" { + import "generic-service" + + check_command = "ssh" + vars.ssh_port = 2022 //custom command parameter + + assign where (host.address || host.address6) && host.vars.os == "Linux" +} +``` + +If you prefer this being configured at the host instead of the service, modify the host configuration +object instead. The runtime macro resolving order is described [here](03-monitoring-basics.md#macro-evaluation-order). + +``` +object Host "icinga2-agent1.localdomain { +... + vars.ssh_port = 2022 +} +``` + +#### Passing Check Command Parameters Using Apply For + +The host `localhost` with the generated services from the `basic-partitions` dictionary (see +[apply for](03-monitoring-basics.md#using-apply-for) for details) checks a basic set of disk partitions +with modified custom variables (warning thresholds at `10%`, critical thresholds at `5%` +free disk space). + +The custom variable `disk_partition` can either hold a single string or an array of +string values for passing multiple partitions to the `check_disk` check plugin. + +``` +object Host "my-server" { + import "generic-host" + address = "127.0.0.1" + address6 = "::1" + + vars.local_disks["basic-partitions"] = { + disk_partitions = [ "/", "/tmp", "/var", "/home" ] + } +} + +apply Service for (disk => config in host.vars.local_disks) { + import "generic-service" + check_command = "my-disk" + + vars += config + + vars.disk_wfree = "10%" + vars.disk_cfree = "5%" +} +``` + + +More details on using arrays in custom variables can be found in +[this chapter](03-monitoring-basics.md#custom-variables). + + +#### Command Arguments + +Next to the short `command` array specified in the command object, +it is advised to define plugin/script parameters in the `arguments` +dictionary attribute. + +The value of the `--parameter` key itself is a dictionary with additional +keys. They allow to create generic command objects and are also for documentation +purposes, e.g. with the `description` field copying the plugin's help text in there. +The Icinga Director uses this field to show the argument's purpose when selecting it. + +``` + arguments = { + "--parameter" = { + description = "..." + value = "..." + } + } +``` + +Each argument is optional by default and is omitted if +the value is not set. + +Learn more about integrating plugins with CheckCommand +objects in [this chapter](05-service-monitoring.md#service-monitoring-plugin-checkcommand). + +There are additional possibilities for creating a command only once, +with different parameters and arguments, shown below. + +##### Command Arguments: Value + +In order to find out about the command argument, call the plugin's help +or consult the README. + +``` +./check_systemd.py --help + +... + + -u UNIT, --unit UNIT Name of the systemd unit that is beeing tested. +``` + +Whenever the long parameter name is available, prefer this over the short one. + +``` + arguments = { + "--unit" = { + + } + } +``` + +Define a unique `prefix` for the command's specific arguments. Best practice is to follow this schema: + +``` +_ +``` + +Therefore use `systemd_` as prefix, and use the long plugin parameter name `unit` inside the [runtime macro](03-monitoring-basics.md#runtime-macros) +syntax. + +``` + arguments = { + "--unit" = { + value = "$systemd_unit$" + } + } +``` + +In order to specify a default value, specify +a [custom variable](03-monitoring-basics.md#custom-variables) inside +the CheckCommand object. + +``` + vars.systemd_unit = "icinga2" +``` + +This value can be overridden from the host/service +object as command parameters. + + +##### Command Arguments: Description + +Best practice, also inside the [ITL](10-icinga-template-library.md#icinga-template-library), is to always +copy the command parameter help output into the `description` +field of your check command. + +Learn more about integrating plugins with CheckCommand +objects in [this chapter](05-service-monitoring.md#service-monitoring-plugin-checkcommand). + +With the [example above](03-monitoring-basics.md#command-arguments-value), +inspect the parameter's help text. + +``` +./check_systemd.py --help + +... + + -u UNIT, --unit UNIT Name of the systemd unit that is beeing tested. +``` + +Copy this into the command arguments `description` entry. + +``` + arguments = { + "--unit" = { + value = "$systemd_unit$" + description = "Name of the systemd unit that is beeing tested." + } + } +``` + +##### Command Arguments: Required + +Specifies whether this command argument is required, or not. By +default all arguments are optional. + +> **Tip** +> +> Good plugins provide optional parameters in square brackets, e.g. `[-w SECONDS]`. + +The `required` field can be toggled with a [boolean](17-language-reference.md#boolean-literals) value. + +``` + arguments = { + "--host" = { + value = "..." + description = "..." + required = true + } + } +``` + +Whenever the check is executed and the argument is missing, Icinga +logs an error. This allows to better debug configuration errors +instead of sometimes unreadable plugin errors when parameters are +missing. + +##### Command Arguments: Skip Key + +The `arguments` attribute requires a key, empty values are not allowed. +To overcome this for parameters which don't need the name in front of +the value, use the `skip_key` [boolean](17-language-reference.md#boolean-literals) toggle. + +``` + command = [ PrefixDir + "/bin/icingacli", "businessprocess", "process", "check" ] + + arguments = { + "--process" = { + value = "$icingacli_businessprocess_process$" + description = "Business process to monitor" + skip_key = true + required = true + order = -1 + } + } +``` + +The service specifies the [custom variable](03-monitoring-basics.md#custom-variables) `icingacli_businessprocess_process`. + +``` + vars.icingacli_businessprocess_process = "bp-shop-web" +``` + +This results in this command line without the `--process` parameter: + +```bash +'/bin/icingacli' 'businessprocess' 'process' 'check' 'bp-shop-web' +``` + +You can use this method to put everything into the `arguments` attribute +in a defined order and without keys. This avoids entries in the `command` +attributes too. + + +##### Command Arguments: Set If + +This can be used for the following scenarios: + +**Parameters without value, e.g. `--sni`.** + +``` + command = [ PluginDir + "/check_http"] + + arguments = { + "--sni" = { + set_if = "$http_sni$" + } + } +``` + +Whenever a host/service object sets the `http_sni` [custom variable](03-monitoring-basics.md#custom-variables) +to `true`, the parameter is added to the command line. + +```bash +'/usr/lib64/nagios/plugins/check_http' '--sni' +``` + +[Numeric](17-language-reference.md#numeric-literals) values are allowed too. + +**Parameters with value, but additionally controlled with an extra custom variable boolean flag.** + +The following example is taken from the [postgres]() CheckCommand. The host +parameter should use a `value` but only whenever the `postgres_unixsocket` +[custom variable](03-monitoring-basics.md#custom-variables) is set to false. + +Note: `set_if` is using a runtime lambda function because the value +is evaluated at runtime. This is explained in [this chapter](08-advanced-topics.md#use-functions-object-config). + +``` + command = [ PluginContribDir + "/check_postgres.pl" ] + + arguments = { + "-H" = { + value = "$postgres_host$" + set_if = {{ macro("$postgres_unixsocket$") == false }} + description = "hostname(s) to connect to; defaults to none (Unix socket)" + } +``` + +An executed check for this host and services ... + +``` +object Host "postgresql-cluster" { + // ... + + vars.postgres_host = "192.168.56.200" + vars.postgres_unixsocket = false +} +``` + +... use the following command line: + +```bash +'/usr/lib64/nagios/plugins/check_postgres.pl' '-H' '192.168.56.200' +``` + +Host/service objects which set `postgres_unixsocket` to `false` don't add the `-H` parameter +and its value to the command line. + +References: [abbreviated lambda syntax](17-language-reference.md#nullary-lambdas), [macro](18-library-reference.md#scoped-functions-macro). + +##### Command Arguments: Order + +Plugin may require parameters in a special order. One after the other, +or e.g. one parameter always in the first position. + +``` + arguments = { + "--first" = { + value = "..." + description = "..." + order = -5 + } + "--second" = { + value = "..." + description = "..." + order = -4 + } + "--last" = { + value = "..." + description = "..." + order = 99 + } + } +``` + +Keep in mind that positional arguments need to be tested thoroughly. + +##### Command Arguments: Repeat Key + +Parameters can use [Array](17-language-reference.md#array) as value type. Whenever Icinga encounters +an array, it repeats the parameter key and each value element by default. + +``` + command = [ NscpPath + "\\nscp.exe", "client" ] + + arguments = { + "-a" = { + value = "$nscp_arguments$" + description = "..." + repeat_key = true + } + } +``` + +On a host/service object, specify the `nscp_arguments` [custom variable](03-monitoring-basics.md#custom-variables) +as an array. + +``` + vars.nscp_arguments = [ "exclude=sppsvc", "exclude=ShellHWDetection" ] +``` + +This translates into the following command line: + +``` +nscp.exe 'client' '-a' 'exclude=sppsvc' '-a' 'exclude=ShellHWDetection' +``` + +If the plugin requires you to pass the list without repeating the key, +set `repeat_key = false` in the argument definition. + +``` + command = [ NscpPath + "\\nscp.exe", "client" ] + + arguments = { + "-a" = { + value = "$nscp_arguments$" + description = "..." + repeat_key = false + } + } +``` + +This translates into the following command line: + +``` +nscp.exe 'client' '-a' 'exclude=sppsvc' 'exclude=ShellHWDetection' +``` + + +##### Command Arguments: Key + +The `arguments` attribute requires unique keys. Sometimes, you'll +need to override this in the resulting command line with same key +names. Therefore you can specifically override the arguments key. + +``` +arguments = { + "--key1" = { + value = "..." + key = "-specialkey" + } + "--key2" = { + value = "..." + key = "-specialkey" + } +} +``` + +This results in the following command line: + +``` + '-specialkey' '...' '-specialkey' '...' +``` + +#### Environment Variables + +The `env` command object attribute specifies a list of environment variables with values calculated +from custom variables which should be exported as environment variables prior to executing the command. + +This is useful for example for hiding sensitive information on the command line output +when passing credentials to database checks: + +``` +object CheckCommand "mysql" { + command = [ PluginDir + "/check_mysql" ] + + arguments = { + "-H" = "$mysql_address$" + "-d" = "$mysql_database$" + } + + vars.mysql_address = "$address$" + vars.mysql_database = "icinga" + vars.mysql_user = "icinga_check" + vars.mysql_pass = "password" + + env.MYSQLUSER = "$mysql_user$" + env.MYSQLPASS = "$mysql_pass$" +} +``` + +The executed command line visible with `ps` or `top` looks like this and hides +the database credentials in the user's environment. + +```bash +/usr/lib/nagios/plugins/check_mysql -H 192.168.56.101 -d icinga +``` + +> **Note** +> +> If the CheckCommand also supports setting the parameter in the command line, +> ensure to use a different name for the custom variable. Otherwise Icinga 2 +> adds the command line parameter. + +If a specific CheckCommand object provided with the [Icinga Template Library](10-icinga-template-library.md#icinga-template-library) +needs additional environment variables, you can import it into a new custom +CheckCommand object and add additional `env` keys. Example for the [mysql_health](10-icinga-template-library.md#plugin-contrib-command-mysql_health) +CheckCommand: + +``` +object CheckCommand "mysql_health_env" { + import "mysql_health" + + // https://labs.consol.de/nagios/check_mysql_health/ + env.NAGIOS__SERVICEMYSQL_USER = "$mysql_health_env_username$" + env.NAGIOS__SERVICEMYSQL_PASS = "$mysql_health_env_password$" +} +``` + +Specify the custom variables `mysql_health_env_username` and `mysql_health_env_password` +in the service object then. + +> **Note** +> +> Keep in mind that the values are still visible with the [debug console](11-cli-commands.md#cli-command-console) +> and the inspect mode in the [Icinga Director](https://icinga.com/docs/director/latest/). + +You can also set global environment variables in the application's +sysconfig configuration file, e.g. `HOME` or specific library paths +for Oracle. Beware that these environment variables can be used +by any CheckCommand object and executed plugin and can leak sensitive +information. + +### Notification Commands + +[NotificationCommand](09-object-types.md#objecttype-notificationcommand) +objects define how notifications are delivered to external interfaces +(email, XMPP, IRC, Twitter, etc.). +[NotificationCommand](09-object-types.md#objecttype-notificationcommand) +objects are referenced by [Notification](09-object-types.md#objecttype-notification) +objects using the `command` attribute. + +> **Note** +> +> Make sure that the [notification](11-cli-commands.md#enable-features) feature is enabled +> in order to execute notification commands. + +While it's possible to specify an entire notification command right +in the NotificationCommand object it is generally advisable to create a +shell script in the `/etc/icinga2/scripts` directory and have the +NotificationCommand object refer to that. + +A fresh Icinga 2 install comes with with two example scripts for host +and service notifications by email. Based on the Icinga 2 runtime macros +(such as `$service.output$` for the current check output) it's possible +to send email to the user(s) associated with the notification itself +(`$user.email$`). Feel free to take these scripts as a starting point +for your own individual notification solution - and keep in mind that +nearly everything is technically possible. + +Information needed to generate notifications is passed to the scripts as +arguments. The NotificationCommand objects `mail-host-notification` and +`mail-service-notification` correspond to the shell scripts +`mail-host-notification.sh` and `mail-service-notification.sh` in +`/etc/icinga2/scripts` and define default values for arguments. These +defaults can always be overwritten locally. + +> **Note** +> +> This example requires the `mail` binary installed on the Icinga 2 +> master. +> +> Depending on the distribution, you need a local mail transfer +> agent (MTA) such as Postfix, Exim or Sendmail in order +> to send emails. +> +> These tools virtually provide the `mail` binary executed +> by the notification scripts below. + +#### mail-host-notification + +The `mail-host-notification` NotificationCommand object uses the +example notification script located in `/etc/icinga2/scripts/mail-host-notification.sh`. + +Here is a quick overview of the arguments that can be used. See also [host runtime +macros](03-monitoring-basics.md#-host-runtime-macros) for further +information. + + Name | Description + -------------------------------|--------------------------------------- + `notification_date` | **Required.** Date and time. Defaults to `$icinga.long_date_time$`. + `notification_hostname` | **Required.** The host's `FQDN`. Defaults to `$host.name$`. + `notification_hostdisplayname` | **Required.** The host's display name. Defaults to `$host.display_name$`. + `notification_hostoutput` | **Required.** Output from host check. Defaults to `$host.output$`. + `notification_useremail` | **Required.** The notification's recipient(s). Defaults to `$user.email$`. + `notification_hoststate` | **Required.** Current state of host. Defaults to `$host.state$`. + `notification_type` | **Required.** Type of notification. Defaults to `$notification.type$`. + `notification_address` | **Optional.** The host's IPv4 address. Defaults to `$address$`. + `notification_address6` | **Optional.** The host's IPv6 address. Defaults to `$address6$`. + `notification_author` | **Optional.** Comment author. Defaults to `$notification.author$`. + `notification_comment` | **Optional.** Comment text. Defaults to `$notification.comment$`. + `notification_from` | **Optional.** Define a valid From: string (e.g. `"Icinga 2 Host Monitoring "`). Requires `GNU mailutils` (Debian/Ubuntu) or `mailx` (RHEL/SUSE). + `notification_icingaweb2url` | **Optional.** Define URL to your Icinga Web 2 (e.g. `"https://www.example.com/icingaweb2"`) + `notification_logtosyslog` | **Optional.** Set `true` to log notification events to syslog; useful for debugging. Defaults to `false`. + +#### mail-service-notification + +The `mail-service-notification` NotificationCommand object uses the +example notification script located in `/etc/icinga2/scripts/mail-service-notification.sh`. + +Here is a quick overview of the arguments that can be used. See also [service runtime +macros](03-monitoring-basics.md#-service-runtime-macros) for further +information. + + Name | Description + ----------------------------------|--------------------------------------- + `notification_date` | **Required.** Date and time. Defaults to `$icinga.long_date_time$`. + `notification_hostname` | **Required.** The host's `FQDN`. Defaults to `$host.name$`. + `notification_servicename` | **Required.** The service name. Defaults to `$service.name$`. + `notification_hostdisplayname` | **Required.** Host display name. Defaults to `$host.display_name$`. + `notification_servicedisplayname` | **Required.** Service display name. Defaults to `$service.display_name$`. + `notification_serviceoutput` | **Required.** Output from service check. Defaults to `$service.output$`. + `notification_useremail` | **Required.** The notification's recipient(s). Defaults to `$user.email$`. + `notification_servicestate` | **Required.** Current state of host. Defaults to `$service.state$`. + `notification_type` | **Required.** Type of notification. Defaults to `$notification.type$`. + `notification_address` | **Optional.** The host's IPv4 address. Defaults to `$address$`. + `notification_address6` | **Optional.** The host's IPv6 address. Defaults to `$address6$`. + `notification_author` | **Optional.** Comment author. Defaults to `$notification.author$`. + `notification_comment` | **Optional.** Comment text. Defaults to `$notification.comment$`. + `notification_from` | **Optional.** Define a valid From: string (e.g. `"Icinga 2 Host Monitoring "`). Requires `GNU mailutils` (Debian/Ubuntu) or `mailx` (RHEL/SUSE). + `notification_icingaweb2url` | **Optional.** Define URL to your Icinga Web 2 (e.g. `"https://www.example.com/icingaweb2"`) + `notification_logtosyslog` | **Optional.** Set `true` to log notification events to syslog; useful for debugging. Defaults to `false`. + + +## Dependencies + +Icinga 2 uses host and service [Dependency](09-object-types.md#objecttype-dependency) objects +for determining their network reachability. + +A service can depend on a host, and vice versa. A service has an implicit +dependency (parent) to its host. A host to host dependency acts implicitly +as host parent relation. +When dependencies are calculated, not only the immediate parent is taken into +account but all parents are inherited. + +The `parent_host_name` and `parent_service_name` attributes are mandatory for +service dependencies, `parent_host_name` is required for host dependencies. +[Apply rules](03-monitoring-basics.md#using-apply) will allow you to +[determine these attributes](03-monitoring-basics.md#dependencies-apply-custom-variables) in a more +dynamic fashion if required. + +``` +parent_host_name = "core-router" +parent_service_name = "uplink-port" +``` + +Notifications are suppressed by default if a host or service becomes unreachable. +You can control that option by defining the `disable_notifications` attribute. + +``` +disable_notifications = false +``` + +If the dependency should be triggered in the parent object's soft state, you +need to set `ignore_soft_states` to `false`. + +The dependency state filter must be defined based on the parent object being +either a host (`Up`, `Down`) or a service (`OK`, `Warning`, `Critical`, `Unknown`). + +The following example will make the dependency fail and trigger it if the parent +object is **not** in one of these states: + +``` +states = [ OK, Critical, Unknown ] +``` + +> **In other words** +> +> If the parent service object changes into the `Warning` state, this +> dependency will fail and render all child objects (hosts or services) unreachable. + +You can determine the child's reachability by querying the `last_reachable` attribute +via the [REST API](12-icinga2-api.md#icinga2-api). + +> **Note** +> +> Reachability calculation depends on fresh and processed check results. If dependencies +> disable checks for child objects, this won't work reliably. + +### Implicit Dependencies for Services on Host + +Icinga 2 automatically adds an implicit dependency for services on their host. That way +service notifications are suppressed when a host is `DOWN` or `UNREACHABLE`. This dependency +does not overwrite other dependencies and implicitly sets `disable_notifications = true` and +`states = [ Up ]` for all service objects. + +Service checks are still executed. If you want to prevent them from happening, you can +apply the following dependency to all services setting their host as `parent_host_name` +and disabling the checks. `assign where true` matches on all `Service` objects. + +``` +apply Dependency "disable-host-service-checks" to Service { + disable_checks = true + assign where true +} +``` + +### Dependencies for Network Reachability + +A common scenario is the Icinga 2 server behind a router. Checking internet +access by pinging the Google DNS server `google-dns` is a common method, but +will fail in case the `dsl-router` host is down. Therefore the example below +defines a host dependency which acts implicitly as parent relation too. + +Furthermore the host may be reachable but ping probes are dropped by the +router's firewall. In case the `dsl-router`'s `ping4` service check fails, all +further checks for the `ping4` service on host `google-dns` service should +be suppressed. This is achieved by setting the `disable_checks` attribute to `true`. + +``` +object Host "dsl-router" { + import "generic-host" + address = "192.168.1.1" +} + +object Host "google-dns" { + import "generic-host" + address = "8.8.8.8" +} + +apply Service "ping4" { + import "generic-service" + + check_command = "ping4" + + assign where host.address +} + +apply Dependency "internet" to Host { + parent_host_name = "dsl-router" + disable_checks = true + disable_notifications = true + + assign where host.name != "dsl-router" +} + +apply Dependency "internet" to Service { + parent_host_name = "dsl-router" + parent_service_name = "ping4" + disable_checks = true + + assign where host.name != "dsl-router" +} +``` + + + + +### Apply Dependencies based on Custom Variables + +You can use [apply rules](03-monitoring-basics.md#using-apply) to set parent or +child attributes, e.g. `parent_host_name` to other objects' +attributes. + +A common example are virtual machines hosted on a master. The object +name of that master is auto-generated from your CMDB or VMWare inventory +into the host's custom variables (or a generic template for your +cloud). + +Define your master host object: + +``` +/* your master */ +object Host "master.example.com" { + import "generic-host" +} +``` + +Add a generic template defining all common host attributes: + +``` +/* generic template for your virtual machines */ +template Host "generic-vm" { + import "generic-host" +} +``` + +Add a template for all hosts on your example.com cloud setting +custom variable `vm_parent` to `master.example.com`: + +``` +template Host "generic-vm-example.com" { + import "generic-vm" + vars.vm_parent = "master.example.com" +} +``` + +Define your guest hosts: + +``` +object Host "www.example1.com" { + import "generic-vm-master.example.com" +} + +object Host "www.example2.com" { + import "generic-vm-master.example.com" +} +``` + +Apply the host dependency to all child hosts importing the +`generic-vm` template and set the `parent_host_name` +to the previously defined custom variable `host.vars.vm_parent`. + +``` +apply Dependency "vm-host-to-parent-master" to Host { + parent_host_name = host.vars.vm_parent + assign where "generic-vm" in host.templates +} +``` + +You can extend this example, and make your services depend on the +`master.example.com` host too. Their local scope allows you to use +`host.vars.vm_parent` similar to the example above. + +``` +apply Dependency "vm-service-to-parent-master" to Service { + parent_host_name = host.vars.vm_parent + assign where "generic-vm" in host.templates +} +``` + +That way you don't need to wait for your guest hosts becoming +unreachable when the master host goes down. Instead the services +will detect their reachability immediately when executing checks. + +> **Note** +> +> This method with setting locally scoped variables only works in +> apply rules, but not in object definitions. + + +### Dependencies for Agent Checks + +Another good example are agent based checks. You would define a health check +for the agent daemon responding to your requests, and make all other services +querying that daemon depend on that health check. + +``` +apply Service "agent-health" { + check_command = "cluster-zone" + + display_name = "cluster-health-" + host.name + + /* This follows the convention that the agent zone name is the FQDN which is the same as the host object name. */ + vars.cluster_zone = host.name + + assign where host.vars.agent_endpoint +} +``` + +Now, make all other agent based checks dependent on the OK state of the `agent-health` +service. + +``` +apply Dependency "agent-health-check" to Service { + parent_service_name = "agent-health" + + states = [ OK ] // Fail if the parent service state switches to NOT-OK + disable_notifications = true + + assign where host.vars.agent_endpoint // Automatically assigns all agent endpoint checks as child services on the matched host + ignore where service.name == "agent-health" // Avoid a self reference from child to parent +} + +``` + +This is described in detail in [this chapter](06-distributed-monitoring.md#distributed-monitoring-health-checks). + +### Event Commands + +Unlike notifications, event commands for hosts/services are called on every +check execution if one of these conditions matches: + +* The host/service is in a [soft state](03-monitoring-basics.md#hard-soft-states) +* The host/service state changes into a [hard state](03-monitoring-basics.md#hard-soft-states) +* The host/service state recovers from a [soft or hard state](03-monitoring-basics.md#hard-soft-states) to [OK](03-monitoring-basics.md#service-states)/[Up](03-monitoring-basics.md#host-states) + +[EventCommand](09-object-types.md#objecttype-eventcommand) objects are referenced by +[Host](09-object-types.md#objecttype-host) and [Service](09-object-types.md#objecttype-service) objects +with the `event_command` attribute. + +Therefore the `EventCommand` object should define a command line +evaluating the current service state and other service runtime attributes +available through runtime variables. Runtime macros such as `$service.state_type$` +and `$service.state$` will be processed by Icinga 2 and help with fine-granular +triggered events + +If the host/service is located on a client as [command endpoint](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +the event command will be executed on the client itself (similar to the check +command). + +Common use case scenarios are a failing HTTP check which requires an immediate +restart via event command. Another example would be an application that is not +responding and therefore requires a restart. You can also use event handlers +to forward more details on state changes and events than the typical notification +alerts provide. + +#### Use Event Commands to Send Information from the Master + +This example sends a web request from the master node to an external tool +for every event triggered on a `businessprocess` service. + +Define an [EventCommand](09-object-types.md#objecttype-eventcommand) +object `send_to_businesstool` which sends state changes to the external tool. + +``` +object EventCommand "send_to_businesstool" { + command = [ + "/usr/bin/curl", + "-s", + "-X PUT" + ] + + arguments = { + "-H" = { + value ="$businesstool_url$" + skip_key = true + } + "-d" = "$businesstool_message$" + } + + vars.businesstool_url = "http://localhost:8080/businesstool" + vars.businesstool_message = "$host.name$ $service.name$ $service.state$ $service.state_type$ $service.check_attempt$" +} +``` + +Set the `event_command` attribute to `send_to_businesstool` on the Service. + +``` +object Service "businessprocess" { + host_name = "businessprocess" + + check_command = "icingacli-businessprocess" + vars.icingacli_businessprocess_process = "icinga" + vars.icingacli_businessprocess_config = "training" + + event_command = "send_to_businesstool" +} +``` + +In order to test this scenario you can run: + +```bash +nc -l 8080 +``` + +This allows to catch the web request. You can also enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) +and search for the event command execution log message. + +```bash +tail -f /var/log/icinga2/debug.log | grep EventCommand +``` + +Feed in a check result via REST API action [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result) +or via Icinga Web 2. + +Expected Result: + +``` +# nc -l 8080 +PUT /businesstool HTTP/1.1 +User-Agent: curl/7.29.0 +Host: localhost:8080 +Accept: */* +Content-Length: 47 +Content-Type: application/x-www-form-urlencoded + +businessprocess businessprocess CRITICAL SOFT 1 +``` + +#### Use Event Commands to Restart Service Daemon via Command Endpoint on Linux + +This example triggers a restart of the `httpd` service on the local system +when the `procs` service check executed via Command Endpoint fails. It only +triggers if the service state is `Critical` and attempts to restart the +service before a notification is sent. + +Requirements: + +* Icinga 2 as client on the remote node +* icinga user with sudo permissions to the httpd daemon + +Example on CentOS 7: + +``` +# visudo +icinga ALL=(ALL) NOPASSWD: /usr/bin/systemctl restart httpd +``` + +Note: Distributions might use a different name. On Debian/Ubuntu the service is called `apache2`. + +Define an [EventCommand](09-object-types.md#objecttype-eventcommand) object `restart_service` +which allows to trigger local service restarts. Put it into a [global zone](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync) +to sync its configuration to all clients. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/global-templates/eventcommands.conf + +object EventCommand "restart_service" { + command = [ PluginDir + "/restart_service" ] + + arguments = { + "-s" = "$service.state$" + "-t" = "$service.state_type$" + "-a" = "$service.check_attempt$" + "-S" = "$restart_service$" + } + + vars.restart_service = "$procs_command$" +} +``` + +This event command triggers the following script which restarts the service. +The script only is executed if the service state is `CRITICAL`. Warning and Unknown states +are ignored as they indicate not an immediate failure. + +``` +[root@icinga2-agent1.localdomain /]# vim /usr/lib64/nagios/plugins/restart_service + +#!/bin/bash + +while getopts "s:t:a:S:" opt; do + case $opt in + s) + servicestate=$OPTARG + ;; + t) + servicestatetype=$OPTARG + ;; + a) + serviceattempt=$OPTARG + ;; + S) + service=$OPTARG + ;; + esac +done + +if ( [ -z $servicestate ] || [ -z $servicestatetype ] || [ -z $serviceattempt ] || [ -z $service ] ); then + echo "USAGE: $0 -s servicestate -z servicestatetype -a serviceattempt -S service" + exit 3; +else + # Only restart on the third attempt of a critical event + if ( [ $servicestate == "CRITICAL" ] && [ $servicestatetype == "SOFT" ] && [ $serviceattempt -eq 3 ] ); then + sudo /usr/bin/systemctl restart $service + fi +fi + +[root@icinga2-agent1.localdomain /]# chmod +x /usr/lib64/nagios/plugins/restart_service +``` + +Add a service on the master node which is executed via command endpoint on the client. +Set the `event_command` attribute to `restart_service`, the name of the previously defined +EventCommand object. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/icinga2-agent1.localdomain.conf + +object Service "Process httpd" { + check_command = "procs" + event_command = "restart_service" + max_check_attempts = 4 + + host_name = "icinga2-agent1.localdomain" + command_endpoint = "icinga2-agent1.localdomain" + + vars.procs_command = "httpd" + vars.procs_warning = "1:10" + vars.procs_critical = "1:" +} +``` + +In order to test this configuration just stop the `httpd` on the remote host `icinga2-agent1.localdomain`. + +``` +[root@icinga2-agent1.localdomain /]# systemctl stop httpd +``` + +You can enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) and search for the +executed command line. + +``` +[root@icinga2-agent1.localdomain /]# tail -f /var/log/icinga2/debug.log | grep restart_service +``` + +#### Use Event Commands to Restart Service Daemon via Command Endpoint on Windows + +This example triggers a restart of the `httpd` service on the remote system +when the `service-windows` service check executed via Command Endpoint fails. +It only triggers if the service state is `Critical` and attempts to restart the +service before a notification is sent. + +Requirements: + +* Icinga 2 as client on the remote node +* Icinga 2 service with permissions to execute Powershell scripts (which is the default) + +Define an [EventCommand](09-object-types.md#objecttype-eventcommand) object `restart_service-windows` +which allows to trigger local service restarts. Put it into a [global zone](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync) +to sync its configuration to all clients. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/global-templates/eventcommands.conf + +object EventCommand "restart_service-windows" { + command = [ + "C:\\Windows\\SysWOW64\\WindowsPowerShell\\v1.0\\powershell.exe", + PluginDir + "/restart_service.ps1" + ] + + arguments = { + "-ServiceState" = "$service.state$" + "-ServiceStateType" = "$service.state_type$" + "-ServiceAttempt" = "$service.check_attempt$" + "-Service" = "$restart_service$" + "; exit" = { + order = 99 + value = "$$LASTEXITCODE" + } + } + + vars.restart_service = "$service_win_service$" +} +``` + +This event command triggers the following script which restarts the service. +The script only is executed if the service state is `CRITICAL`. Warning and Unknown states +are ignored as they indicate not an immediate failure. + +Add the `restart_service.ps1` Powershell script into `C:\Program Files\Icinga2\sbin`: + +``` +param( + [string]$Service = '', + [string]$ServiceState = '', + [string]$ServiceStateType = '', + [int]$ServiceAttempt = '' + ) + +if (!$Service -Or !$ServiceState -Or !$ServiceStateType -Or !$ServiceAttempt) { + $scriptName = GCI $MyInvocation.PSCommandPath | Select -Expand Name; + Write-Host "USAGE: $scriptName -ServiceState servicestate -ServiceStateType servicestatetype -ServiceAttempt serviceattempt -Service service" -ForegroundColor red; + exit 3; +} + +# Only restart on the third attempt of a critical event +if ($ServiceState -eq "CRITICAL" -And $ServiceStateType -eq "SOFT" -And $ServiceAttempt -eq 3) { + Restart-Service $Service; +} + +exit 0; +``` + +Add a service on the master node which is executed via command endpoint on the client. +Set the `event_command` attribute to `restart_service-windows`, the name of the previously defined +EventCommand object. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/icinga2-agent2.localdomain.conf + +object Service "Service httpd" { + check_command = "service-windows" + event_command = "restart_service-windows" + max_check_attempts = 4 + + host_name = "icinga2-agent2.localdomain" + command_endpoint = "icinga2-agent2.localdomain" + + vars.service_win_service = "httpd" +} +``` + +In order to test this configuration just stop the `httpd` on the remote host `icinga2-agent1.localdomain`. + +``` +C:> net stop httpd +``` + +You can enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) and search for the +executed command line in `C:\ProgramData\icinga2\var\log\icinga2\debug.log`. + + +#### Use Event Commands to Restart Service Daemon via SSH + +This example triggers a restart of the `httpd` daemon +via SSH when the `http` service check fails. + +Requirements: + +* SSH connection allowed (firewall, packet filters) +* icinga user with public key authentication +* icinga user with sudo permissions to restart the httpd daemon. + +Example on Debian: + +``` +# ls /home/icinga/.ssh/ +authorized_keys + +# visudo +icinga ALL=(ALL) NOPASSWD: /etc/init.d/apache2 restart +``` + +Define a generic [EventCommand](09-object-types.md#objecttype-eventcommand) object `event_by_ssh` +which can be used for all event commands triggered using SSH: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/local_eventcommands.conf + +/* pass event commands through ssh */ +object EventCommand "event_by_ssh" { + command = [ PluginDir + "/check_by_ssh" ] + + arguments = { + "-H" = "$event_by_ssh_address$" + "-p" = "$event_by_ssh_port$" + "-C" = "$event_by_ssh_command$" + "-l" = "$event_by_ssh_logname$" + "-i" = "$event_by_ssh_identity$" + "-q" = { + set_if = "$event_by_ssh_quiet$" + } + "-w" = "$event_by_ssh_warn$" + "-c" = "$event_by_ssh_crit$" + "-t" = "$event_by_ssh_timeout$" + } + + vars.event_by_ssh_address = "$address$" + vars.event_by_ssh_quiet = false +} +``` + +The actual event command only passes the `event_by_ssh_command` attribute. +The `event_by_ssh_service` custom variable takes care of passing the correct +daemon name, while `test $service.state_id$ -gt 0` makes sure that the daemon +is only restarted when the service is not in an `OK` state. + +``` +object EventCommand "event_by_ssh_restart_service" { + import "event_by_ssh" + + //only restart the daemon if state > 0 (not-ok) + //requires sudo permissions for the icinga user + vars.event_by_ssh_command = "test $service.state_id$ -gt 0 && sudo systemctl restart $event_by_ssh_service$" +} +``` + + +Now set the `event_command` attribute to `event_by_ssh_restart_service` and tell it +which service should be restarted using the `event_by_ssh_service` attribute. + +``` +apply Service "http" { + import "generic-service" + check_command = "http" + + event_command = "event_by_ssh_restart_service" + vars.event_by_ssh_service = "$host.vars.httpd_name$" + + //vars.event_by_ssh_logname = "icinga" + //vars.event_by_ssh_identity = "/home/icinga/.ssh/id_rsa.pub" + + assign where host.vars.httpd_name +} +``` + +Specify the `httpd_name` custom variable on the host to assign the +service and set the event handler service. + +``` +object Host "remote-http-host" { + import "generic-host" + address = "192.168.1.100" + + vars.httpd_name = "apache2" +} +``` + +In order to test this configuration just stop the `httpd` on the remote host `icinga2-agent1.localdomain`. + +``` +[root@icinga2-agent1.localdomain /]# systemctl stop httpd +``` + +You can enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) and search for the +executed command line. + +``` +[root@icinga2-agent1.localdomain /]# tail -f /var/log/icinga2/debug.log | grep by_ssh +``` diff --git a/doc/04-configuration.md b/doc/04-configuration.md new file mode 100644 index 0000000..316aaba --- /dev/null +++ b/doc/04-configuration.md @@ -0,0 +1,737 @@ +# Configuration + +The Icinga [configuration](https://icinga.com/products/configuration/) +can be easily managed with either the [Icinga Director](https://icinga.com/docs/director/latest/), +config management tools or plain text within the [Icinga DSL](04-configuration.md#configuration). + +Before looking into web based configuration or any sort of automation, +we recommend to start with the configuration files and fully understand +the possibilities of the Icinga DSL (Domain Specific Language). + +The package installation provides example configuration which already +monitors the local Icinga server. You can view the monitoring details +in Icinga Web. + +![Icinga Web Local Server](images/configuration/icinga_web_local_server.png) + +The [Language Reference](17-language-reference.md#language-reference) chapter explains details +on value types (string, number, dictionaries, etc.) and the general configuration syntax. + +## Configuration Best Practice + +If you are ready to configure additional hosts, services, notifications, +dependencies, etc., you should think about the requirements first and then +decide for a possible strategy. + +There are many ways of creating Icinga 2 configuration objects: + +* The [Icinga Director](https://icinga.com/docs/director/latest/) as web based and/or automation configuration interface + * [Monitoring Automation with Icinga - The Director](https://icinga.com/2019/04/23/monitoring-automation-with-icinga-the-director/) +* Manually with your preferred editor, for example vi(m), nano, notepad, etc. +* Generated by a [configuration management tool](13-addons.md#configuration-tools) such as Puppet, Chef, Ansible, etc. +* A custom exporter script from your CMDB or inventory tool +* etc. + +Find the best strategy for your own configuration and ask yourself the following questions: + +* Do your hosts share a common group of services (for example linux hosts with disk, load, etc. checks)? +* Only a small set of users receives notifications and escalations for all hosts/services? + +If you can at least answer one of these questions with yes, look for the +[apply rules](03-monitoring-basics.md#using-apply) logic instead of defining objects on a per +host and service basis. + +* You are required to define specific configuration for each host/service? +* Does your configuration generation tool already know about the host-service-relationship? + +Then you should look for the object specific configuration setting `host_name` etc. accordingly. + +You decide on the "best" layout for configuration files and directories. Ensure that +the [icinga2.conf](04-configuration.md#icinga2-conf) configuration file includes them. + +Consider these ideas: + +* tree-based on locations, host groups, specific host attributes with sub levels of directories. +* flat `hosts.conf`, `services.conf`, etc. files for rule based configuration. +* generated configuration with one file per host and a global configuration for groups, users, etc. +* one big file generated from an external application (probably a bad idea for maintaining changes). +* your own. + +In either way of choosing the right strategy you should additionally check the following: + +* Are there any specific attributes describing the host/service you could set as `vars` custom variables? +You can later use them for applying assign/ignore rules, or export them into external interfaces. +* Put hosts into hostgroups, services into servicegroups and use these attributes for your apply rules. +* Use templates to store generic attributes for your objects and apply rules making your configuration more readable. +Details can be found in the [using templates](03-monitoring-basics.md#object-inheritance-using-templates) chapter. +* Apply rules may overlap. Keep a central place (for example, [services.conf](04-configuration.md#services-conf) or [notifications.conf](04-configuration.md#notifications-conf)) storing +the configuration instead of defining apply rules deep in your configuration tree. +* Every plugin used as check, notification or event command requires a `Command` definition. +Further details can be looked up in the [check commands](03-monitoring-basics.md#check-commands) chapter. + +If you are planning to use a distributed monitoring setup with master, satellite and client installations +take the configuration location into account too. Everything configured on the master, synced to all other +nodes? Or any specific local configuration (e.g. health checks)? + +There is a detailed chapter on [distributed monitoring scenarios](06-distributed-monitoring.md#distributed-monitoring-scenarios). +Please ensure to have read the [introduction](06-distributed-monitoring.md#distributed-monitoring) at first glance. + +If you happen to have further questions, do not hesitate to join the +[community forum](https://community.icinga.com) +and ask community members for their experience and best practices. + +## Your Configuration + +If you prefer to organize your own local object tree, you can also remove +`include_recursive "conf.d"` from your icinga2.conf file. + +Create a new configuration directory, e.g. `objects.d` and include it +in your icinga2.conf file. + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/objects.d + +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/icinga2.conf + +/* Local object configuration on our master instance. */ +include_recursive "objects.d" +``` + +This approach is used by the [Icinga 2 Puppet module](https://icinga.com/products/integrations/puppet/). + +If you plan to setup a distributed setup with HA clusters and clients, please refer to [this chapter](#06-distributed-monitoring.md#distributed-monitoring-top-down) +for examples with `zones.d` as configuration directory. + +## Configuration Overview + +### icinga2.conf + +An example configuration file is installed for you in `/etc/icinga2/icinga2.conf`. + +Here's a brief description of the example configuration: + +``` +/** +* Icinga 2 configuration file +* -- this is where you define settings for the Icinga application including +* which hosts/services to check. +* +* For an overview of all available configuration options please refer +* to the documentation that is distributed as part of Icinga 2. +*/ +``` + +Icinga 2 supports [C/C++-style comments](17-language-reference.md#comments). + +/** +* The constants.conf defines global constants. +*/ +include "constants.conf" + +The `include` directive can be used to include other files. + +``` +/** +* The zones.conf defines zones for a cluster setup. +* Not required for single instance setups. +*/ +include "zones.conf" +``` + +The [Icinga Template Library](10-icinga-template-library.md#icinga-template-library) provides a set of common templates +and [CheckCommand](03-monitoring-basics.md#check-commands) definitions. + +``` +/** +* The Icinga Template Library (ITL) provides a number of useful templates +* and command definitions. +* Common monitoring plugin command definitions are included separately. +*/ +include +include +include +include + +/** +* This includes the Icinga 2 Windows plugins. These command definitions +* are required on a master node when a client is used as command endpoint. +*/ +include + +/** +* This includes the NSClient++ check commands. These command definitions +* are required on a master node when a client is used as command endpoint. +*/ +include + +/** +* The features-available directory contains a number of configuration +* files for features which can be enabled and disabled using the +* icinga2 feature enable / icinga2 feature disable CLI commands. +* These commands work by creating and removing symbolic links in +* the features-enabled directory. +*/ +include "features-enabled/*.conf" +``` + +This `include` directive takes care of including the configuration files for all +the features which have been enabled with `icinga2 feature enable`. See +[Enabling/Disabling Features](11-cli-commands.md#enable-features) for more details. + +``` +/** +* Although in theory you could define all your objects in this file +* the preferred way is to create separate directories and files in the conf.d +* directory. Each of these files must have the file extension ".conf". +*/ +include_recursive "conf.d" +``` + +You can put your own configuration files in the [conf.d](04-configuration.md#conf-d) directory. This +directive makes sure that all of your own configuration files are included. + +### constants.conf + +The `constants.conf` configuration file can be used to define global constants. + +By default, you need to make sure to set these constants: + +* The `PluginDir` constant must be set to the path where the [Monitoring Project](https://www.monitoring-plugins.org/) plugins are installed. +This constant is used by a number of +[built-in check command definitions](10-icinga-template-library.md#icinga-template-library). +* The `NodeName` constant defines your local node name. Should be set to FQDN which is the default +if not set. This constant is required for local host configuration, monitoring remote clients and +cluster setup. + +Example: + +``` +/* The directory which contains the plugins from the Monitoring Plugins project. */ +const PluginDir = "/usr/lib64/nagios/plugins" + +/* The directory which contains the Manubulon plugins. +* Check the documentation, chapter "SNMP Manubulon Plugin Check Commands", for details. +*/ +const ManubulonPluginDir = "/usr/lib64/nagios/plugins" + +/* Our local instance name. By default this is the server's hostname as returned by `hostname --fqdn`. +* This should be the common name from the API certificate. +*/ +//const NodeName = "localhost" + +/* Our local zone name. */ +const ZoneName = NodeName + +/* Secret key for remote node tickets */ +const TicketSalt = "" +``` + +The `ZoneName` and `TicketSalt` constants are required for remote client +and distributed setups. The `node setup/wizard` CLI tools take care of +populating these values. + +### zones.conf + +This file can be used to specify the required [Zone](09-object-types.md#objecttype-zone) +and [Endpoint](09-object-types.md#objecttype-endpoint) configuration object for +[distributed monitoring](06-distributed-monitoring.md#distributed-monitoring). + +By default the `NodeName` and `ZoneName` [constants](04-configuration.md#constants-conf) will be used. + +It also contains several [global zones](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync) +for distributed monitoring environments. + +Please ensure to modify this configuration with real names i.e. use the FQDN +mentioned in [this chapter](06-distributed-monitoring.md#distributed-monitoring-conventions) +for your `Zone` and `Endpoint` object names. + +### The conf.d Directory + +This directory contains **example configuration** which should help you get started +with monitoring the local host and its services. It is included in the +[icinga2.conf](04-configuration.md#icinga2-conf) configuration file by default. + +It can be used as reference example for your own configuration strategy. +Just keep in mind to include the main directories in the +[icinga2.conf](04-configuration.md#icinga2-conf) file. + +> **Note** +> +> You can remove the include directive in [icinga2.conf](04-configuration.md#icinga2-conf) +> if you prefer your own way of deploying Icinga 2 configuration. + +Further details on configuration best practice and how to build your +own strategy is described in [this chapter](04-configuration.md#configuration-best-practice). + +Available configuration files which are installed by default: + +* [hosts.conf](04-configuration.md#hosts-conf) +* [services.conf](04-configuration.md#services-conf) +* [users.conf](04-configuration.md#users-conf) +* [notifications.conf](04-configuration.md#notifications-conf) +* [commands.conf](04-configuration.md#commands-conf) +* [groups.conf](04-configuration.md#groups-conf) +* [templates.conf](04-configuration.md#templates-conf) +* [downtimes.conf](04-configuration.md#downtimes-conf) +* [timeperiods.conf](04-configuration.md#timeperiods-conf) +* [api-users.conf](04-configuration.md#api-users-conf) +* [app.conf](04-configuration.md#app-conf) + +#### hosts.conf + +The `hosts.conf` file contains an example host based on your +`NodeName` setting in [constants.conf](04-configuration.md#constants-conf). You +can use global constants for your object names instead of string +values. + +The `import` keyword is used to import the `generic-host` template which +takes care of setting up the host check command to `hostalive`. If you +require a different check command, you can override it in the object definition. + +The `vars` attribute can be used to define custom variables which are available +for check and notification commands. Most of the [Plugin Check Commands](10-icinga-template-library.md#icinga-template-library) +in the Icinga Template Library require an `address` attribute. + +The custom variable `os` is evaluated by the `linux-servers` group in +[groups.conf](04-configuration.md#groups-conf) making the local host a member. + +The example host will show you how to: + +* define http vhost attributes for the `http` service apply rule defined +in [services.conf](04-configuration.md#services-conf). +* define disks (all, specific `/`) and their attributes for the `disk` +service apply rule defined in [services.conf](04-configuration.md#services-conf). +* define notification types (`mail`) and set the groups attribute. This +will be used by notification apply rules in [notifications.conf](04-configuration.md#notifications-conf). + +If you've installed [Icinga Web 2](https://icinga.com/docs/icinga-web-2/latest/doc/02-Installation/), you can +uncomment the http vhost attributes and reload Icinga 2. The apply +rules in [services.conf](04-configuration.md#services-conf) will automatically +generate a new service checking the `/icingaweb2` URI using the `http` +check. + +``` +/* +* Host definitions with object attributes +* used for apply rules for Service, Notification, +* Dependency and ScheduledDowntime objects. +* +* Tip: Use `icinga2 object list --type Host` to +* list all host objects after running +* configuration validation (`icinga2 daemon -C`). +*/ + +/* + * This is an example host based on your + * local host's FQDN. Specify the NodeName + * constant in `constants.conf` or use your + * own description, e.g. "db-host-1". + */ + +object Host NodeName { + /* Import the default host template defined in `templates.conf`. */ + import "generic-host" + + /* Specify the address attributes for checks e.g. `ssh` or `http`. */ + address = "127.0.0.1" + address6 = "::1" + + /* Set custom variable `os` for hostgroup assignment in `groups.conf`. */ + vars.os = "Linux" + + /* Define http vhost attributes for service apply rules in `services.conf`. */ + vars.http_vhosts["http"] = { + http_uri = "/" + } + /* Uncomment if you've sucessfully installed Icinga Web 2. */ + //vars.http_vhosts["Icinga Web 2"] = { + // http_uri = "/icingaweb2" + //} + + /* Define disks and attributes for service apply rules in `services.conf`. */ + vars.disks["disk"] = { + /* No parameters. */ + } + vars.disks["disk /"] = { + disk_partitions = "/" + } + + /* Define notification mail attributes for notification apply rules in `notifications.conf`. */ + vars.notification["mail"] = { + /* The UserGroup `icingaadmins` is defined in `users.conf`. */ + groups = [ "icingaadmins" ] + } +} +``` + +This is only the host object definition. Now we'll need to make sure that this +host and your additional hosts are getting [services](04-configuration.md#services-conf) applied. + +> **Tip** +> +> If you don't understand all the attributes and how to use [apply rules](17-language-reference.md#apply), +> don't worry -- the [monitoring basics](03-monitoring-basics.md#monitoring-basics) chapter will explain +> that in detail. + +#### services.conf + +These service [apply rules](17-language-reference.md#apply) will show you how to monitor +the local host, but also allow you to re-use or modify them for +your own requirements. + +You should define all your service apply rules in `services.conf` +or any other central location keeping them organized. + +By default, the local host will be monitored by the following services + +Service(s) | Applied on host(s) +--------------------------------------------|------------------------ +`load`, `procs`, `swap`, `users`, `icinga` | The `NodeName` host only. +`ping4`, `ping6` | All hosts with `address` resp. `address6` attribute. +`ssh` | All hosts with `address` and `vars.os` set to `Linux` +`http`, optional: `Icinga Web 2` | All hosts with custom variable `http_vhosts` defined as dictionary. +`disk`, `disk /` | All hosts with custom variable `disks` defined as dictionary. + +The Debian packages also include an additional `apt` service check applied to the local host. + +The command object `icinga` for the embedded health check is provided by the +[Icinga Template Library (ITL)](10-icinga-template-library.md#icinga-template-library) while `http_ip`, `ssh`, `load`, `processes`, +`users` and `disk` are all provided by the [Plugin Check Commands](10-icinga-template-library.md#icinga-template-library) +which we enabled earlier by including the `itl` and `plugins` configuration file. + + +Example `load` service apply rule: + +``` +apply Service "load" { +import "generic-service" + +check_command = "load" + +/* Used by the ScheduledDowntime apply rule in `downtimes.conf`. */ +vars.backup_downtime = "02:00-03:00" + +assign where host.name == NodeName +} +``` + +The `apply` keyword can be used to create new objects which are associated with +another group of objects. You can `import` existing templates, define (custom) +attributes. + +The custom variable `backup_downtime` is defined to a specific timerange string. +This variable value will be used for applying a `ScheduledDowntime` object to +these services in [downtimes.conf](04-configuration.md#downtimes-conf). + +In this example the `assign where` condition is a boolean expression which is +evaluated for all objects of type `Host` and a new service with name "load" +is created for each matching host. [Expression operators](17-language-reference.md#expression-operators) +may be used in `assign where` conditions. + +Multiple `assign where` conditions can be combined with `AND` using the `&&` operator +as shown in the `ssh` example: + +``` +apply Service "ssh" { + import "generic-service" + + check_command = "ssh" + + assign where host.address && host.vars.os == "Linux" +} +``` + +In this example, the service `ssh` is applied to all hosts having the `address` +attribute defined `AND` having the custom variable `os` set to the string +`Linux`. +You can modify this condition to match multiple expressions by combining `AND` +and `OR` using `&&` and `||` [operators](17-language-reference.md#expression-operators), for example +`assign where host.address && (vars.os == "Linux" || vars.os == "Windows")`. + + +A more advanced example is shown by the `http` and `disk` service apply +rules. While one `apply` rule for `ssh` will only create a service for matching +hosts, you can go one step further: Generate apply rules based on array items +or dictionary key-value pairs. + +The idea is simple: Your host in [hosts.conf](04-configuration.md#hosts-conf) defines the +`disks` dictionary as custom variable in `vars`. + +Remember the example from [hosts.conf](04-configuration.md#hosts-conf): + +``` +... + /* Define disks and attributes for service apply rules in `services.conf`. */ + vars.disks["disk"] = { + /* No parameters. */ + } + vars.disks["disk /"] = { + disk_partition = "/" + } +... +``` + +This dictionary contains multiple service names we want to monitor. `disk` +should just check all available disks, while `disk /` will pass an additional +parameter `disk_partition` to the check command. + +You'll recognize that the naming is important -- that's the very same name +as it is passed from a service to a check command argument. Read about services +and passing check commands in [this chapter](03-monitoring-basics.md#command-passing-parameters). + +Using `apply Service for` omits the service name, it will take the key stored in +the `disk` variable in `key => config` as new service object name. + +The `for` keyword expects a loop definition, for example `key => value in dictionary` +as known from Perl and other scripting languages. + +Once defined like this, the `apply` rule defined below will do the following: + +* only match hosts with `host.vars.disks` defined through the `assign where` condition +* loop through all entries in the `host.vars.disks` dictionary. That's `disk` and `disk /` as keys. +* call `apply` on each, and set the service object name from the provided key +* inside apply, the `generic-service` template is imported +* defining the [disk](10-icinga-template-library.md#plugin-check-command-disk) check command requiring command arguments like `disk_partition` +* adding the `config` dictionary items to `vars`. Simply said, there's now `vars.disk_partition` defined for the +generated service + +Configuration example: + +``` +apply Service for (disk => config in host.vars.disks) { + import "generic-service" + + check_command = "disk" + + vars += config +} +``` + +A similar example is used for the `http` services. That way you can make your +host the information provider for all apply rules. Define them once, and only +manage your hosts. + +Look into [notifications.conf](04-configuration.md#notifications-conf) how this technique is used +for applying notifications to hosts and services using their type and user +attributes. + +Don't forget to install the check plugins required by the hosts and services and their check commands. + +Further details on the monitoring configuration can be found in the +[monitoring basics](03-monitoring-basics.md#monitoring-basics) chapter. + +#### users.conf + +Defines the `icingaadmin` User and the `icingaadmins` UserGroup. The latter is used in +[hosts.conf](04-configuration.md#hosts-conf) for defining a custom host attribute later used in +[notifications.conf](04-configuration.md#notifications-conf) for notification apply rules. + +``` +object User "icingaadmin" { + import "generic-user" + + display_name = "Icinga 2 Admin" + groups = [ "icingaadmins" ] + + email = "icinga@localhost" +} + +object UserGroup "icingaadmins" { + display_name = "Icinga 2 Admin Group" +} +``` + +#### notifications.conf + +Notifications for check alerts are an integral part or your +Icinga 2 monitoring stack. + +The examples in this file define two notification apply rules for hosts and services. +Both `apply` rules match on the same condition: They are only applied if the +nested dictionary attribute `notification.mail` is set. + +Please note that the `to` keyword is important in [notification apply rules](03-monitoring-basics.md#using-apply-notifications) +defining whether these notifications are applies to hosts or services. +The `import` keyword imports the specific mail templates defined in [templates.conf](04-configuration.md#templates-conf). + +The `interval` attribute is not explicitly set -- it [defaults to 30 minutes](09-object-types.md#objecttype-notification). + +By setting the `user_groups` to the value provided by the +respective [host.vars.notification.mail](04-configuration.md#hosts-conf) attribute we'll +implicitely use the `icingaadmins` UserGroup defined in [users.conf](04-configuration.md#users-conf). + +``` +apply Notification "mail-icingaadmin" to Host { + import "mail-host-notification" + + user_groups = host.vars.notification.mail.groups + users = host.vars.notification.mail.users + + assign where host.vars.notification.mail +} + +apply Notification "mail-icingaadmin" to Service { + import "mail-service-notification" + + user_groups = host.vars.notification.mail.groups + users = host.vars.notification.mail.users + + assign where host.vars.notification.mail +} +``` + +More details on defining notifications and their additional attributes such as +filters can be read in [this chapter](03-monitoring-basics.md#alert-notifications). + +#### commands.conf + +This is the place where your own command configuration can be defined. By default +only the notification commands used by the notification templates defined in [templates.conf](04-configuration.md#templates-conf). + +You can freely customize these notification commands, and adapt them for your needs. +Read more on that topic [here](03-monitoring-basics.md#notification-commands). + +#### groups.conf + +The example host defined in [hosts.conf](hosts-conf) already has the +custom variable `os` set to `Linux` and is therefore automatically +a member of the host group `linux-servers`. + +This is done by using the [group assign](17-language-reference.md#group-assign) expressions similar +to previously seen [apply rules](03-monitoring-basics.md#using-apply). + +``` +object HostGroup "linux-servers" { + display_name = "Linux Servers" + + assign where host.vars.os == "Linux" +} + +object HostGroup "windows-servers" { + display_name = "Windows Servers" + + assign where host.vars.os == "Windows" +} +``` + +Service groups can be grouped together by similar pattern matches. +The [match function](18-library-reference.md#global-functions-match) expects a wildcard match string +and the attribute string to match with. + +``` +object ServiceGroup "ping" { + display_name = "Ping Checks" + + assign where match("ping*", service.name) +} + +object ServiceGroup "http" { + display_name = "HTTP Checks" + + assign where match("http*", service.check_command) +} + +object ServiceGroup "disk" { + display_name = "Disk Checks" + + assign where match("disk*", service.check_command) +} +``` + +#### templates.conf + +Most of the example configuration objects use generic global templates by +default: + +``` +template Host "generic-host" { + max_check_attempts = 5 + check_interval = 1m + retry_interval = 30s + + check_command = "hostalive" +} + +template Service "generic-service" { + max_check_attempts = 3 + check_interval = 1m + retry_interval = 30s +} +``` + +The `hostalive` check command is part of the +[Plugin Check Commands](10-icinga-template-library.md#icinga-template-library). + +``` +template Notification "mail-host-notification" { + command = "mail-host-notification" + + states = [ Up, Down ] + types = [ Problem, Acknowledgement, Recovery, Custom, + FlappingStart, FlappingEnd, + DowntimeStart, DowntimeEnd, DowntimeRemoved ] + + period = "24x7" +} + +template Notification "mail-service-notification" { + command = "mail-service-notification" + + states = [ OK, Warning, Critical, Unknown ] + types = [ Problem, Acknowledgement, Recovery, Custom, + FlappingStart, FlappingEnd, + DowntimeStart, DowntimeEnd, DowntimeRemoved ] + + period = "24x7" +} +``` + +More details on `Notification` object attributes can be found [here](09-object-types.md#objecttype-notification). + + +#### downtimes.conf + +The `load` service apply rule defined in [services.conf](04-configuration.md#services-conf) defines +the `backup_downtime` custom variable. + +The ScheduledDowntime apply rule uses this attribute to define the default value +for the time ranges required for recurring downtime slots. + +Learn more about downtimes in [this chapter](08-advanced-topics.md#downtimes). + +``` +apply ScheduledDowntime "backup-downtime" to Service { + author = "icingaadmin" + comment = "Scheduled downtime for backup" + + ranges = { + monday = service.vars.backup_downtime + tuesday = service.vars.backup_downtime + wednesday = service.vars.backup_downtime + thursday = service.vars.backup_downtime + friday = service.vars.backup_downtime + saturday = service.vars.backup_downtime + sunday = service.vars.backup_downtime + } + + assign where service.vars.backup_downtime != "" +} +``` + +#### timeperiods.conf + +This file contains the default timeperiod definitions for `24x7`, `9to5` +and `never`. TimePeriod objects are referenced by `*period` +objects such as hosts, services or notifications. + + +#### api-users.conf + +Provides the default [ApiUser](09-object-types.md#objecttype-apiuser) object +named "root" for the [API authentication](12-icinga2-api.md#icinga2-api-authentication). + +#### app.conf + +Provides the default [IcingaApplication](09-object-types.md#objecttype-icingaapplication) +object named "app" for additional settings such as disabling notifications +globally, etc. diff --git a/doc/05-service-monitoring.md b/doc/05-service-monitoring.md new file mode 100644 index 0000000..f376309 --- /dev/null +++ b/doc/05-service-monitoring.md @@ -0,0 +1,998 @@ +# Service Monitoring + +The power of Icinga 2 lies in its modularity. There are thousands of +community plugins available next to the standard plugins provided by +the [Monitoring Plugins project](https://www.monitoring-plugins.org). + +Start your research on [Icinga Exchange](https://exchange.icinga.com) +and look which services are already [covered](05-service-monitoring.md#service-monitoring-overview). + +The [requirements chapter](05-service-monitoring.md#service-monitoring-requirements) guides you +through the plugin setup, tests and their integration with an [existing](05-service-monitoring.md#service-monitoring-plugin-checkcommand) +or [new](05-service-monitoring.md#service-monitoring-plugin-checkcommand-new) CheckCommand object +and host/service objects inside the [Director](05-service-monitoring.md#service-monitoring-plugin-checkcommand-integration-director) +or [Icinga config files](05-service-monitoring.md#service-monitoring-plugin-checkcommand-integration-config-files). +It also adds hints on [modifying](05-service-monitoring.md#service-monitoring-plugin-checkcommand-modify) existing commands. + +Plugins follow the [Plugin API specification](05-service-monitoring.md#service-monitoring-plugin-api) +which is enriched with examples and also code examples to get you started with +[your own plugin](05-service-monitoring.md#service-monitoring-plugin-new). + + + +## Requirements + +### Plugins + +All existing Icinga or Nagios plugins work with Icinga 2. Community +plugins can be found for example on [Icinga Exchange](https://exchange.icinga.com). + +The recommended way of setting up these plugins is to copy them +into the `PluginDir` directory. + +If you have plugins with many dependencies, consider creating a +custom RPM/DEB package which handles the required libraries and binaries. + +Configuration management tools such as Puppet, Ansible, Chef or Saltstack +also help with automatically installing the plugins on different +operating systems. They can also help with installing the required +dependencies, e.g. Python libraries, Perl modules, etc. + +### Plugin Setup + +Good plugins provide installations and configuration instructions +in their docs and/or README on GitHub. + +Sometimes dependencies are not listed, or your distribution differs from the one +described. Try running the plugin after setup and [ensure it works](05-service-monitoring.md#service-monitoring-plugins-it-works). + +#### Ensure it works + +Prior to using the check plugin with Icinga 2 you should ensure that it is working properly +by trying to run it on the console using whichever user Icinga 2 is running as: + +RHEL/CentOS/Fedora + +```bash +sudo -u icinga /usr/lib64/nagios/plugins/check_mysql_health --help +``` + +Debian/Ubuntu + +```bash +sudo -u nagios /usr/lib/nagios/plugins/check_mysql_health --help +``` + +Additional libraries may be required for some plugins. Please consult the plugin +documentation and/or the included README file for installation instructions. +Sometimes plugins contain hard-coded paths to other components. Instead of changing +the plugin it might be easier to create a symbolic link to make sure it doesn't get +overwritten during the next update. + +Sometimes there are plugins which do not exactly fit your requirements. +In that case you can modify an existing plugin or just write your own. + +#### Plugin Dependency Errors + +Plugins can be scripts (Shell, Python, Perl, Ruby, PHP, etc.) +or compiled binaries (C, C++, Go). + +These scripts/binaries may require additional libraries +which must be installed on every system they are executed. + +> **Tip** +> +> Don't test the plugins on your master instance, instead +> do that on the satellites and clients which execute the +> checks. + +There are errors, now what? Typical errors are missing libraries, +binaries or packages. + +##### Python Example + +Example for a Python plugin which uses the `tinkerforge` module +to query a network service: + +``` +ImportError: No module named tinkerforge.ip_connection +``` + +Its [documentation](https://github.com/NETWAYS/check_tinkerforge#installation) +points to installing the `tinkerforge` Python module. + +##### Perl Example + +Example for a Perl plugin which uses SNMP: + +``` +Can't locate Net/SNMP.pm in @INC (you may need to install the Net::SNMP module) +``` + +Prior to installing the Perl module via CPAN, look for a distribution +specific package, e.g. `libnet-snmp-perl` on Debian/Ubuntu or `perl-Net-SNMP` +on RHEL/CentOS. + + +#### Optional: Custom Path + +If you are not using the default `PluginDir` directory, you +can create a custom plugin directory and constant +and reference this in the created CheckCommand objects. + +Create a common directory e.g. `/opt/monitoring/plugins` +and install the plugin there. + +```bash +mkdir -p /opt/monitoring/plugins +cp check_snmp_int.pl /opt/monitoring/plugins +chmod +x /opt/monitoring/plugins/check_snmp_int.pl +``` + +Next create a new global constant, e.g. `CustomPluginDir` +in your [constants.conf](04-configuration.md#constants-conf) +configuration file: + +``` +vim /etc/icinga2/constants.conf + +const PluginDir = "/usr/lib/nagios/plugins" +const CustomPluginDir = "/opt/monitoring/plugins" +``` + +### CheckCommand Definition + +Each plugin requires a [CheckCommand](09-object-types.md#objecttype-checkcommand) object in your +configuration which can be used in the [Service](09-object-types.md#objecttype-service) or +[Host](09-object-types.md#objecttype-host) object definition. + +Please check if the Icinga 2 package already provides an +[existing CheckCommand definition](10-icinga-template-library.md#icinga-template-library). + +If that's the case, thoroughly check the required parameters and integrate the check command +into your host and service objects. Best practice is to run the plugin on the CLI +with the required parameters first. + +Example for database size checks with [check_mysql_health](10-icinga-template-library.md#plugin-contrib-command-mysql_health). + +```bash +/usr/lib64/nagios/plugins/check_mysql_health --hostname '127.0.0.1' --username root --password icingar0xx --mode sql --name 'select sum(data_length + index_length) / 1024 / 1024 from information_schema.tables where table_schema = '\''icinga'\'';' '--name2' 'db_size' --units 'MB' --warning 4096 --critical 8192 +``` + +The parameter names inside the ITL commands follow the +`_` schema. + +#### Icinga Director Integration + +Navigate into `Commands > External Commands` and search for `mysql_health`. +Select `mysql_health` and navigate into the `Fields` tab. + +In order to access the parameters, the Director requires you to first +define the needed custom data fields: + +* `mysql_health_hostname` +* `mysql_health_username` and `mysql_health_password` +* `mysql_health_mode` +* `mysql_health_name`, `mysql_health_name2` and `mysql_health_units` +* `mysql_health_warning` and `mysql_health_critical` + +Create a new host template and object where you'll generic +settings like `mysql_health_hostname` (if it differs from the host's +`address` attribute) and `mysql_health_username` and `mysql_health_password`. + +Create a new service template for `mysql-health` and set the `mysql_health` +as check command. You can also define a default for `mysql_health_mode`. + +Next, create a service apply rule or a new service set which gets assigned +to matching host objects. + + +#### Icinga Config File Integration + +Create or modify a host object which stores +the generic database defaults and prepares details +for a service apply for rule. + +``` +object Host "icinga2-master1.localdomain" { + check_command = "hostalive" + address = "..." + + // Database listens locally, not external + vars.mysql_health_hostname = "127.0.0.1" + + // Basic database size checks for Icinga DBs + vars.databases["icinga"] = { + mysql_health_warning = 4096 //MB + mysql_health_critical = 8192 //MB + } + vars.databases["icingaweb2"] = { + mysql_health_warning = 4096 //MB + mysql_health_critical = 8192 //MB + } +} +``` + +The host object prepares the database details and thresholds already +for advanced [apply for](03-monitoring-basics.md#using-apply-for) rules. It also uses +conditions to fetch host specified values, or set default values. + +``` +apply Service "db-size-" for (db_name => config in host.vars.databases) { + check_interval = 1m + retry_interval = 30s + + check_command = "mysql_health" + + if (config.mysql_health_username) { + vars.mysql_healt_username = config.mysql_health_username + } else { + vars.mysql_health_username = "root" + } + if (config.mysql_health_password) { + vars.mysql_healt_password = config.mysql_health_password + } else { + vars.mysql_health_password = "icingar0xx" + } + + vars.mysql_health_mode = "sql" + vars.mysql_health_name = "select sum(data_length + index_length) / 1024 / 1024 from information_schema.tables where table_schema = '" + db_name + "';" + vars.mysql_health_name2 = "db_size" + vars.mysql_health_units = "MB" + + if (config.mysql_health_warning) { + vars.mysql_health_warning = config.mysql_health_warning + } + if (config.mysql_health_critical) { + vars.mysql_health_critical = config.mysql_health_critical + } + + vars += config +} +``` + +#### New CheckCommand + +This chapter describes how to add a new CheckCommand object for a plugin. + +Please make sure to follow these conventions when adding a new command object definition: + +* Use [command arguments](03-monitoring-basics.md#command-arguments) whenever possible. The `command` attribute +must be an array in `[ ... ]` for shell escaping. +* Define a unique `prefix` for the command's specific arguments. Best practice is to follow this schema: + +``` +_ +``` + +That way you can safely set them on host/service level and you'll always know which command they control. +* Use command argument default values, e.g. for thresholds. +* Use [advanced conditions](09-object-types.md#objecttype-checkcommand) like `set_if` definitions. + +Before starting with the CheckCommand definition, please check +the existing objects available inside the ITL. They follow best +practices and are maintained by developers and our community. + +This example picks a new plugin called [check_systemd](https://exchange.icinga.com/joseffriedrich/check_systemd) +uploaded to Icinga Exchange in June 2019. + +First, [install](05-service-monitoring.md#service-monitoring-plugins-setup) the plugin and ensure +that [it works](05-service-monitoring.md#service-monitoring-plugins-it-works). Then run it with the +`--help` parameter to see the actual parameters (docs might be outdated). + +``` +./check_systemd.py --help + +usage: check_systemd.py [-h] [-c SECONDS] [-e UNIT | -u UNIT] [-v] [-V] + [-w SECONDS] + +... + +optional arguments: + -h, --help show this help message and exit + -c SECONDS, --critical SECONDS + Startup time in seconds to result in critical status. + -e UNIT, --exclude UNIT + Exclude a systemd unit from the checks. This option + can be applied multiple times. For example: -e mnt- + data.mount -e task.service. + -u UNIT, --unit UNIT Name of the systemd unit that is beeing tested. + -v, --verbose Increase output verbosity (use up to 3 times). + -V, --version show program's version number and exit + -w SECONDS, --warning SECONDS + Startup time in seconds to result in warning status. +``` + +The argument description is important, based on this you need to create the +command arguments. + +> **Tip** +> +> When you are using the Director, you can prepare the commands as files +> e.g. inside the `global-templates` zone. Then run the kickstart wizard +> again to import the commands as external reference. +> +> If you prefer to use the Director GUI/CLI, please apply the steps +> in the `Add Command` form. + +Start with the basic plugin call without any parameters. + +``` +object CheckCommand "systemd" { // Plugin name without 'check_' prefix + command = [ PluginContribDir + "/check_systemd.py" ] // Use the 'PluginContribDir' constant, see the contributed ITL commands +} +``` + +Run a config validation to see if that works, `icinga2 daemon -C` + +Next, analyse the plugin parameters. Plugins with a good help output show +optional parameters in square brackes. This is the case for all parameters +for this plugin. If there are required parameters, use the `required` key +inside the argument. + +The `arguments` attribute is a dictionary which takes the parameters as keys. + +``` + arguments = { + "--unit" = { ... } + } +``` + +If there a long parameter names available, prefer them. This increases +readability in both the configuration as well as the executed command line. + +The argument value itself is a sub dictionary which has additional keys: + +* `value` which references the runtime macro string +* `description` where you copy the plugin parameter help text into +* `required`, `set_if`, etc. for advanced parameters, check the [CheckCommand object](09-object-types.md#objecttype-checkcommand) chapter. + +The runtime macro syntax is required to allow value extraction when +the command is executed. + +> **Tip** +> +> Inside the Director, store the new command first in order to +> unveil the `Arguments` tab. + +Best practice is to use the command name as prefix, in this specific +case e.g. `systemd_unit`. + +``` + arguments = { + "--unit" = { + value = "$systemd_unit$" // The service parameter would then be defined as 'vars.systemd_unit = "icinga2"' + description = "Name of the systemd unit that is beeing tested." + } + "--warning" = { + value = "$systemd_warning$" + description = "Startup time in seconds to result in warning status." + } + "--critical" = { + value = "$systemd_critical$" + description = "Startup time in seconds to result in critical status." + } + } +``` + +This may take a while -- validate the configuration in between up until +the CheckCommand definition is done. + +Then test and integrate it into your monitoring configuration. + +Remember: Do it once and right, and never touch the CheckCommand again. +Optional arguments allow different use cases and scenarios. + + +Once you have created your really good CheckCommand, please consider +sharing it with our community by creating a new PR on [GitHub](https://github.com/Icinga/icinga2/blob/master/CONTRIBUTING.md). +_Please also update the documentation for the ITL._ + + +> **Tip** +> +> Inside the Director, you can render the configuration in the Deployment +> section. Extract the static configuration object and use that as a source +> for sending it upstream. + + + +#### Modify Existing CheckCommand + +Sometimes an existing CheckCommand inside the ITL is missing a parameter. +Or you don't need a default parameter value being set. + +Instead of copying the entire configuration object, you can import +an object into another new object. + +``` +object CheckCommand "http-custom" { + import "http" // Import existing http object + + arguments += { // Use additive assignment to add missing parameters + "--key" = { + value = "$http_..." // Keep the parameter name the same as with http + } + } + + // Override default parameters + vars.http_address = "..." +} +``` + +This CheckCommand can then be referenced in your host/service object +definitions. + + +### Plugin API + +Icinga 2 supports the native plugin API specification from the Monitoring Plugins project. +It is defined in the [Monitoring Plugins](https://www.monitoring-plugins.org) guidelines. + +The Icinga documentation revamps the specification into our +own guideline enriched with examples and best practices. + +#### Output + +The output should be as short and as detailed as possible. The +most common cases include: + +- Viewing a problem list in Icinga Web and dashboards +- Getting paged about a problem +- Receiving the alert on the CLI or forwarding it to external (ticket) systems + +Examples: + +``` +: + +OK: MySQL connection time is fine (0.0002s) +WARNING: MySQL connection time is slow (0.5s > 0.1s threshold) +CRITICAL: MySQL connection time is causing degraded performance (3s > 0.5s threshold) +``` + +Icinga supports reading multi-line output where Icinga Web +only shows the first line in the listings and everything in the detail view. + +Example for an end2end check with many smaller test cases integrated: + +``` +OK: Online banking works. +Testcase 1: Site reached. +Testcase 2: Attempted login, JS loads. +Testcase 3: Login succeeded. +Testcase 4: View current state works. +Testcase 5: Transactions fine. +``` + +If the extended output shouldn't be visible in your monitoring, but only for testing, +it is recommended to implement the `--verbose` plugin parameter to allow +developers and users to debug further. Check [here](05-service-monitoring.md#service-monitoring-plugin-api-verbose) +for more implementation tips. + +> **Tip** +> +> More debug output also helps when implementing your plugin. +> +> Best practice is to have the plugin parameter and handling implemented first, +> then add it anywhere you want to see more, e.g. from initial database connections +> to actual query results. + + +#### Status + +Value | Status | Description +------|-----------|------------------------------- +0 | OK | The check went fine and everything is considered working. +1 | Warning | The check is above the given warning threshold, or anything else is suspicious requiring attention before it breaks. +2 | Critical | The check exceeded the critical threshold, or something really is broken and will harm the production environment. +3 | Unknown | Invalid parameters, low level resource errors (IO device busy, no fork resources, TCP sockets, etc.) preventing the actual check. Higher level errors such as DNS resolving, TCP connection timeouts should be treated as `Critical` instead. Whenever the plugin reaches its timeout (best practice) it should also terminate with `Unknown`. + +Keep in mind that these are service states. Icinga automatically maps +the [host state](03-monitoring-basics.md#check-result-state-mapping) from the returned plugin states. + +#### Thresholds + +A plugin calculates specific values and may decide about the exit state on its own. +This is done with thresholds - warning and critical values which are compared with +the actual value. Upon this logic, the exit state is determined. + +Imagine the following value and defined thresholds: + +``` +ptc_value = 57.8 + +warning = 50 +critical = 60 +``` + +Whenever `ptc_value` is higher than warning or critical, it should return +the appropriate [state](05-service-monitoring.md#service-monitoring-plugin-api-status). + +The threshold evaluation order also is important: + +* Critical thresholds are evaluated first and superseed everything else. +* Warning thresholds are evaluated second +* If no threshold is matched, return the OK state + +Avoid using hardcoded threshold values in your plugins, always +add them to the argument parser. + +Example for Python: + +```python +import argparse +import signal +import sys + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument("-w", "--warning", help="Warning threshold. Single value or range, e.g. '20:50'.") + parser.add_argument("-c", "--critical", help="Critical threshold. Single vluae or range, e.g. '25:45'.") + + args = parser.parse_args() +``` + +Users might call plugins only with the critical threshold parameter, +leaving out the warning parameter. Keep this in mind when evaluating +the thresholds, always check if the parameters have been defined before. + +```python + if args.critical: + if ptc_value > args.critical: + print("CRITICAL - ...") + sys.exit(2) # Critical + + if args.warning: + if ptc_value > args.warning: + print("WARNING - ...") + sys.exit(1) # Warning + + print("OK - ...") + sys.exit(0) # OK +``` + +The above is a simplified example for printing the [output](05-service-monitoring.md#service-monitoring-plugin-api-output) +and using the [state](05-service-monitoring.md#service-monitoring-plugin-api-status) +as exit code. + +Before diving into the implementation, learn more about required +[performance data metrics](05-service-monitoring.md#service-monitoring-plugin-api-performance-data-metrics) +and more best practices below. + +##### Threshold Ranges + +Threshold ranges can be used to specify an alert window, e.g. whenever a calculated +value is between a lower and higher critical threshold. + +The schema for threshold ranges looks as follows. The `@` character in square brackets +is optional. + +``` +[@]start:end +``` + +There are a few requirements for ranges: + +* `start <= end`. Add a check in your code and let the user know about problematic values. + +``` +10:20 # OK + +30:10 # Error +``` + +* `start:` can be omitted if its value is 0. This is the default handling for single threshold values too. + +``` +10 # Every value > 10 and < 0, outside of 0..10 +``` + +* If `end` is omitted, assume end is infinity. + +``` +10: # < 10, outside of 10..∞ +``` + +* In order to specify negative infinity, use the `~` character. + +``` +~:10 # > 10, outside of -∞..10 +``` + +* Raise alert if value is outside of the defined range. + +``` +10:20 # < 10 or > 20, outside of 10..20 +``` + +* Start with `@` to raise an alert if the value is **inside** the defined range, inclusive start/end values. + +``` +@10:20 # >= 10 and <= 20, inside of 10..20 +``` + +Best practice is to either implement single threshold values, or fully support ranges. +This requires parsing the input parameter values, therefore look for existing libraries +already providing this functionality. + +[check_tinkerforge](https://github.com/NETWAYS/check_tinkerforge/blob/master/check_tinkerforge.py) +implements a simple parser to avoid dependencies. + + +#### Performance Data Metrics + +Performance data metrics must be appended to the plugin output with a preceding `|` character. +The schema is as follows: + +``` + | 'label'=value[UOM];[warn];[crit];[min];[max] +``` + +The label should be encapsulated with single quotes. Avoid spaces or special characters such +as `%` in there, this could lead to problems with metric receivers such as Graphite. + +Labels must not include `'` and `=` characters. Keep the label length as short and unique as possible. + +Example: + +``` +'load1'=4.7 +``` + +Values must respect the C/POSIX locale and not implement e.g. German locale for floating point numbers with `,`. +Icinga sets `LC_NUMERIC=C` to enforce this locale on plugin execution. + +##### Unit of Measurement (UOM) + +``` +'rta'=12.445000ms 'pl'=0% +``` + +The UoMs are written as-is into the [core backends](14-features.md#core-backends) +(IDO, API). I.e. 12.445000ms remain 12.445000ms. + +In contrast, the [metric backends](14-features.md#metrics) +(Graphite, InfluxDB, etc.) get perfdata (including warn, crit, min, max) +normalized by Icinga. E.g. 12.445000ms become 0.012445 seconds. + +Some plugins change the UoM for different sizing, e.g. returning the disk usage in MB and later GB +for the same performance data label. This is to ensure that graphs always look the same. + +[Icinga DB](14-features.md#core-backends-icingadb) gets both the as-is and the normalized perfdata. + +What metric backends get... | ... from which perfdata UoMs (case-insensitive if possible) +----------------------------|--------------------------------------- +bytes (B) | B, KB, MB, ..., YB, KiB, MiB, ..., YiB +bits (b) | b, kb, mb, ..., yb, kib, mib, ..., yib +packets | packets +seconds (s) | ns, us, ms, s, m, h, d +percent | % +amperes (A) | nA, uA, mA, A, kA, MA, GA, ..., YA +ohms (O) | nO, uO, mO, O, kO, MO, GO, ..., YO +volts (V) | nV, uV, mV, V, kV, MV, GV, ..., YV +watts (W) | nW, uW, mW, W, kW, MW, GW, ..., YW +ampere seconds (As) | nAs, uAs, mAs, As, kAs, MAs, GAs, ..., YAs +ampere seconds | nAm, uAm, mAm, Am (ampere minutes), kAm, MAm, GAm, ..., YAm +ampere seconds | nAh, uAh, mAh, Ah (ampere hours), kAh, MAh, GAh, ..., YAh +watt hours | nWs, uWs, mWs, Ws (watt seconds), kWs, MWs, GWs, ..., YWs +watt hours | nWm, uWm, mWm, Wm (watt minutes), kWm, MWm, GWm, ..., YWm +watt hours (Wh) | nWh, uWh, mWh, Wh, kWh, MWh, GWh, ..., YWh +lumens | lm +decibel-milliwatts | dBm +grams (g) | ng, ug, mg, g, kg, t +degrees Celsius | C +degrees Fahrenheit | F +degrees Kelvin | K +liters (l) | ml, l, hl + +The UoM "c" represents a continuous counter (e.g. interface traffic counters). + +Unknown UoMs are discarted (as if none was given). +A value without any UoM may be an integer or floating point number +for any type (processes, users, etc.). + +##### Thresholds and Min/Max + +Next to the performance data value, warn, crit, min, max can optionally be provided. They must be separated +with the semi-colon `;` character. They share the same UOM with the performance data value. + +``` +$ check_ping -4 -H icinga.com -c '200,15%' -w '100,5%' + +PING OK - Packet loss = 0%, RTA = 12.44 ms|rta=12.445000ms;100.000000;200.000000;0.000000 pl=0%;5;15;0 +``` + +##### Multiple Performance Data Values + +Multiple performance data values must be joined with a space character. The below example +is from the [check_load](10-icinga-template-library.md#plugin-check-command-load) plugin. + +``` +load1=4.680;1.000;2.000;0; load5=0.000;5.000;10.000;0; load15=0.000;10.000;20.000;0; +``` + +#### Timeout + +Icinga has a safety mechanism where it kills processes running for too +long. The timeout can be specified in [CheckCommand objects](09-object-types.md#objecttype-checkcommand) +or on the host/service object. + +Best practice is to control the timeout in the plugin itself +and provide a clear message followed by the Unknown state. + +Example in Python taken from [check_tinkerforge](https://github.com/NETWAYS/check_tinkerforge/blob/master/check_tinkerforge.py): + +```python +import argparse +import signal +import sys + +def handle_sigalrm(signum, frame, timeout=None): + output('Plugin timed out after %d seconds' % timeout, 3) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + # ... add more arguments + parser.add_argument("-t", "--timeout", help="Timeout in seconds (default 10s)", type=int, default=10) + args = parser.parse_args() + + signal.signal(signal.SIGALRM, partial(handle_sigalrm, timeout=args.timeout)) + signal.alarm(args.timeout) + + # ... perform the check and generate output/status +``` + +#### Versions + +Plugins should provide a version via `-V` or `--version` parameter +which is bumped on releases. This allows to identify problems with +too old or new versions on the community support channels. + +Example in Python taken from [check_tinkerforge](https://github.com/NETWAYS/check_tinkerforge/blob/master/check_tinkerforge.py): + +```python +import argparse +import signal +import sys + +__version__ = '0.9.1' + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-V', '--version', action='version', version='%(prog)s v' + sys.modules[__name__].__version__) +``` + +#### Verbose + +Plugins should provide a verbose mode with `-v` or `--verbose` in order +to show more detailed log messages. This helps to debug and analyse the +flow and execution steps inside the plugin. + +Ensure to add the parameter prior to implementing the check logic into +the plugin. + +Example in Python taken from [check_tinkerforge](https://github.com/NETWAYS/check_tinkerforge/blob/master/check_tinkerforge.py): + +```python +import argparse +import signal +import sys + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-v', '--verbose', action='store_true') + + if args.verbose: + print("Verbose debug output") +``` + + +### Create a new Plugin + +Sometimes an existing plugin does not satisfy your requirements. You +can either kindly contact the original author about plans to add changes +and/or create a patch. + +If you just want to format the output and state of an existing plugin +it might also be helpful to write a wrapper script. This script +could pass all configured parameters, call the plugin script, parse +its output/exit code and return your specified output/exit code. + +On the other hand plugins for specific services and hardware might not yet +exist. + +> **Tip** +> +> Watch this presentation from Icinga Camp Berlin to learn more +> about [How to write checks that don't suck](https://www.youtube.com/watch?v=Ey_APqSCoFQ). + +Common best practices: + +* Choose the programming language wisely + * Scripting languages (Bash, Python, Perl, Ruby, PHP, etc.) are easier to write and setup but their check execution might take longer (invoking the script interpreter as overhead, etc.). + * Plugins written in C/C++, Go, etc. improve check execution time but may generate an overhead with installation and packaging. +* Use a modern VCS such as Git for developing the plugin, e.g. share your plugin on GitHub and let it sync to [Icinga Exchange](https://exchange.icinga.com). +* **Look into existing plugins endorsed by community members.** + +Implementation hints: + +* Add parameters with key-value pairs to your plugin. They should allow long names (e.g. `--host localhost`) and also short parameters (e.g. `-H localhost`) + * `-h|--help` should print the version and all details about parameters and runtime invocation. Note: Python's ArgParse class provides this OOTB. + * `--version` should print the plugin [version](05-service-monitoring.md#service-monitoring-plugin-api-versions). +* Add a [verbose/debug output](05-service-monitoring.md#service-monitoring-plugin-api-verbose) functionality for detailed on-demand logging. +* Respect the exit codes required by the [Plugin API](05-service-monitoring.md#service-monitoring-plugin-api). +* Always add [performance data](05-service-monitoring.md#service-monitoring-plugin-api-performance-data-metrics) to your plugin output. +* Allow to specify [warning/critical thresholds](05-service-monitoring.md#service-monitoring-plugin-api-thresholds) as parameters. + +Example skeleton: + +``` +# 1. include optional libraries +# 2. global variables +# 3. helper functions and/or classes +# 4. define timeout condition + +if () then + print "UNKNOWN - Timeout (...) reached | 'time'=30.0 +endif + +# 5. main method + + + +if () then + print "CRITICAL - ... | 'time'=0.1 'myperfdatavalue'=5.0 + exit(2) +else if () then + print "WARNING - ... | 'time'=0.1 'myperfdatavalue'=3.0 + exit(1) +else + print "OK - ... | 'time'=0.2 'myperfdatavalue'=1.0 +endif +``` + +There are various plugin libraries available which will help +with plugin execution and output formatting too, for example +[nagiosplugin from Python](https://pypi.python.org/pypi/nagiosplugin/). + +> **Note** +> +> Ensure to test your plugin properly with special cases before putting it +> into production! + +Once you've finished your plugin please upload/sync it to [Icinga Exchange](https://exchange.icinga.com/new). +Thanks in advance! + + +## Service Monitoring Overview + +The following examples should help you to start implementing your own ideas. +There is a variety of plugins available. This collection is not complete -- +if you have any updates, please send a documentation patch upstream. + +Please visit our [community forum](https://community.icinga.com) which +may provide an answer to your use case already. If not, do not hesitate +to create a new topic. + +### General Monitoring + +If the remote service is available (via a network protocol and port), +and if a check plugin is also available, you don't necessarily need a local client. +Instead, choose a plugin and configure its parameters and thresholds. The following examples are included in the [Icinga 2 Template Library](10-icinga-template-library.md#icinga-template-library): + +* [ping4](10-icinga-template-library.md#plugin-check-command-ping4), [ping6](10-icinga-template-library.md#plugin-check-command-ping6), +[fping4](10-icinga-template-library.md#plugin-check-command-fping4), [fping6](10-icinga-template-library.md#plugin-check-command-fping6), [hostalive](10-icinga-template-library.md#plugin-check-command-hostalive) +* [tcp](10-icinga-template-library.md#plugin-check-command-tcp), [udp](10-icinga-template-library.md#plugin-check-command-udp), [ssl](10-icinga-template-library.md#plugin-check-command-ssl) +* [ntp_time](10-icinga-template-library.md#plugin-check-command-ntp-time) + +### Linux Monitoring + +* [disk](10-icinga-template-library.md#plugin-check-command-disk) +* [mem](10-icinga-template-library.md#plugin-contrib-command-mem), [swap](10-icinga-template-library.md#plugin-check-command-swap) +* [procs](10-icinga-template-library.md#plugin-check-command-processes) +* [users](10-icinga-template-library.md#plugin-check-command-users) +* [running_kernel](10-icinga-template-library.md#plugin-contrib-command-running_kernel) +* package management: [apt](10-icinga-template-library.md#plugin-check-command-apt), [yum](10-icinga-template-library.md#plugin-contrib-command-yum), etc. +* [ssh](10-icinga-template-library.md#plugin-check-command-ssh) +* performance: [iostat](10-icinga-template-library.md#plugin-contrib-command-iostat), [check_sar_perf](https://github.com/dnsmichi/icinga-plugins/blob/master/scripts/check_sar_perf.py) + +### Windows Monitoring + +* [check_wmi_plus](https://edcint.co.nz/checkwmiplus/) +* [NSClient++](https://www.nsclient.org) (in combination with the Icinga 2 client and either [check_nscp_api](10-icinga-template-library.md#nscp-check-api) or [nscp-local](10-icinga-template-library.md#nscp-plugin-check-commands) check commands) +* [Icinga 2 Windows Plugins](10-icinga-template-library.md#windows-plugins) (disk, load, memory, network, performance counters, ping, procs, service, swap, updates, uptime, users +* vbs and Powershell scripts + +### Database Monitoring + +* MySQL/MariaDB: [mysql_health](10-icinga-template-library.md#plugin-contrib-command-mysql_health), [mysql](10-icinga-template-library.md#plugin-check-command-mysql), [mysql_query](10-icinga-template-library.md#plugin-check-command-mysql-query) +* PostgreSQL: [postgres](10-icinga-template-library.md#plugin-contrib-command-postgres) +* Oracle: [oracle_health](10-icinga-template-library.md#plugin-contrib-command-oracle_health) +* MSSQL: [mssql_health](10-icinga-template-library.md#plugin-contrib-command-mssql_health) +* DB2: [db2_health](10-icinga-template-library.md#plugin-contrib-command-db2_health) +* MongoDB: [mongodb](10-icinga-template-library.md#plugin-contrib-command-mongodb) +* Elasticsearch: [elasticsearch](10-icinga-template-library.md#plugin-contrib-command-elasticsearch) +* Redis: [redis](10-icinga-template-library.md#plugin-contrib-command-redis) + +### SNMP Monitoring + +* [Manubulon plugins](10-icinga-template-library.md#snmp-manubulon-plugin-check-commands) (interface, storage, load, memory, process) +* [snmp](10-icinga-template-library.md#plugin-check-command-snmp), [snmpv3](10-icinga-template-library.md#plugin-check-command-snmpv3) + +### Network Monitoring + +* [nwc_health](10-icinga-template-library.md#plugin-contrib-command-nwc_health) +* [interfaces](10-icinga-template-library.md#plugin-contrib-command-interfaces) +* [interfacetable](10-icinga-template-library.md#plugin-contrib-command-interfacetable) +* [iftraffic](10-icinga-template-library.md#plugin-contrib-command-iftraffic), [iftraffic64](10-icinga-template-library.md#plugin-contrib-command-iftraffic64) + +### Web Monitoring + +* [http](10-icinga-template-library.md#plugin-check-command-http) +* [ftp](10-icinga-template-library.md#plugin-check-command-ftp) +* [webinject](10-icinga-template-library.md#plugin-contrib-command-webinject) +* [squid](10-icinga-template-library.md#plugin-contrib-command-squid) +* [apache-status](10-icinga-template-library.md#plugin-contrib-command-apache-status) +* [nginx_status](10-icinga-template-library.md#plugin-contrib-command-nginx_status) +* [kdc](10-icinga-template-library.md#plugin-contrib-command-kdc) +* [rbl](10-icinga-template-library.md#plugin-contrib-command-rbl) + +* [Icinga Certificate Monitoring](https://icinga.com/products/icinga-certificate-monitoring/) + +### Java Monitoring + +* [jmx4perl](10-icinga-template-library.md#plugin-contrib-command-jmx4perl) + +### DNS Monitoring + +* [dns](10-icinga-template-library.md#plugin-check-command-dns) +* [dig](10-icinga-template-library.md#plugin-check-command-dig) +* [dhcp](10-icinga-template-library.md#plugin-check-command-dhcp) + +### Backup Monitoring + +* [check_bareos](https://github.com/widhalmt/check_bareos) + +### Log Monitoring + +* [check_logfiles](https://labs.consol.de/nagios/check_logfiles/) +* [check_logstash](https://github.com/NETWAYS/check_logstash) +* [check_graylog2_stream](https://github.com/Graylog2/check-graylog2-stream) + +### Virtualization Monitoring + +### VMware Monitoring + +* [Icinga Module for vSphere](https://icinga.com/products/icinga-module-for-vsphere/) +* [esxi_hardware](10-icinga-template-library.md#plugin-contrib-command-esxi-hardware) +* [VMware](10-icinga-template-library.md#plugin-contrib-vmware) + +**Tip**: If you are encountering timeouts using the VMware Perl SDK, +check [this blog entry](https://www.claudiokuenzler.com/blog/650/slow-vmware-perl-sdk-soap-request-error-libwww-version). +Ubuntu 16.04 LTS can have troubles with random entropy in Perl asked [here](https://monitoring-portal.org/t/check-vmware-api-slow-when-run-multiple-times/2868). +In that case, [haveged](https://issihosts.com/haveged/) may help. + +### SAP Monitoring + +* [check_sap_health](https://labs.consol.de/nagios/check_sap_health/index.html) +* [SAP CCMS](https://sourceforge.net/projects/nagios-sap-ccms/) + +### Mail Monitoring + +* [smtp](10-icinga-template-library.md#plugin-check-command-smtp), [ssmtp](10-icinga-template-library.md#plugin-check-command-ssmtp) +* [imap](10-icinga-template-library.md#plugin-check-command-imap), [simap](10-icinga-template-library.md#plugin-check-command-simap) +* [pop](10-icinga-template-library.md#plugin-check-command-pop), [spop](10-icinga-template-library.md#plugin-check-command-spop) +* [mailq](10-icinga-template-library.md#plugin-check-command-mailq) + +### Hardware Monitoring + +* [hpasm](10-icinga-template-library.md#plugin-contrib-command-hpasm) +* [ipmi-sensor](10-icinga-template-library.md#plugin-contrib-command-ipmi-sensor) + +### Metrics Monitoring + +* [graphite](10-icinga-template-library.md#plugin-contrib-command-graphite) diff --git a/doc/06-distributed-monitoring.md b/doc/06-distributed-monitoring.md new file mode 100644 index 0000000..7d28a9b --- /dev/null +++ b/doc/06-distributed-monitoring.md @@ -0,0 +1,3555 @@ +# Distributed Monitoring with Master, Satellites and Agents + +This chapter will guide you through the setup of a distributed monitoring +environment, including high-availability clustering and setup details +for Icinga masters, satellites and agents. + +## Roles: Master, Satellites and Agents + +Icinga 2 nodes can be given names for easier understanding: + +* A `master` node which is on top of the hierarchy. +* A `satellite` node which is a child of a `satellite` or `master` node. +* An `agent` node which is connected to `master` and/or `satellite` nodes. + +![Icinga 2 Distributed Roles](images/distributed-monitoring/icinga2_distributed_monitoring_roles.png) + +Rephrasing this picture into more details: + +* A `master` node has no parent node. + * A `master`node is where you usually install Icinga Web 2. + * A `master` node can combine executed checks from child nodes into backends and notifications. +* A `satellite` node has a parent and a child node. + * A `satellite` node may execute checks on its own or delegate check execution to child nodes. + * A `satellite` node can receive configuration for hosts/services, etc. from the parent node. + * A `satellite` node continues to run even if the master node is temporarily unavailable. +* An `agent` node only has a parent node. + * An `agent` node will either run its own configured checks or receive command execution events from the parent node. + +A client can be a secondary master, a satellite or an agent. It +typically requests something from the primary master or parent node. + +The following sections will refer to these roles and explain the +differences and the possibilities this kind of setup offers. + +> **Note** +> +> Previous versions of this documentation used the term `Icinga client`. +> This has been refined into `Icinga agent` and is visible in the docs, +> backends and web interfaces. + +**Tip**: If you just want to install a single master node that monitors several hosts +(i.e. Icinga agents), continue reading -- we'll start with +simple examples. +In case you are planning a huge cluster setup with multiple levels and +lots of satellites and agents, read on -- we'll deal with these cases later on. + +The installation on each system is the same: Follow the [installation instructions](02-installation.md) +for the Icinga 2 package and the required check plugins. + +The required configuration steps are mostly happening +on the command line. You can also [automate the setup](06-distributed-monitoring.md#distributed-monitoring-automation). + +The first thing you need learn about a distributed setup is the hierarchy of the single components. + +## Zones + +The Icinga 2 hierarchy consists of so-called [zone](09-object-types.md#objecttype-zone) objects. +Zones depend on a parent-child relationship in order to trust each other. + +![Icinga 2 Distributed Zones](images/distributed-monitoring/icinga2_distributed_monitoring_zones.png) + +Have a look at this example for the `satellite` zones which have the `master` zone as a parent zone: + +``` +object Zone "master" { + //... +} + +object Zone "satellite region 1" { + parent = "master" + //... +} + +object Zone "satellite region 2" { + parent = "master" + //... +} +``` + +There are certain limitations for child zones, e.g. their members are not allowed +to send configuration commands to the parent zone members. Vice versa, the +trust hierarchy allows for example the `master` zone to send +configuration files to the `satellite` zone. Read more about this +in the [security section](06-distributed-monitoring.md#distributed-monitoring-security). + +`agent` nodes also have their own unique zone. By convention you +must use the FQDN for the zone name. + +## Endpoints + +Nodes which are a member of a zone are so-called [Endpoint](09-object-types.md#objecttype-endpoint) objects. + +![Icinga 2 Distributed Endpoints](images/distributed-monitoring/icinga2_distributed_monitoring_endpoints.png) + +Here is an example configuration for two endpoints in different zones: + +``` +object Endpoint "icinga2-master1.localdomain" { + host = "192.168.56.101" +} + +object Endpoint "icinga2-satellite1.localdomain" { + host = "192.168.56.105" +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain" ] + parent = "master" +} +``` + +All endpoints in the same zone work as high-availability setup. For +example, if you have two nodes in the `master` zone, they will load-balance the check execution. + +Endpoint objects are important for specifying the connection +information, e.g. if the master should actively try to connect to an agent. + +The zone membership is defined inside the `Zone` object definition using +the `endpoints` attribute with an array of `Endpoint` names. + +> **Note** +> +> There is a known [problem](https://github.com/Icinga/icinga2/issues/3533) +> with >2 endpoints in a zone and a message routing loop. +> The config validation will log a warning to let you know about this too. + +If you want to check the availability (e.g. ping checks) of the node +you still need a [Host](09-object-types.md#objecttype-host) object. + +## ApiListener + +In case you are using the CLI commands later, you don't have to write +this configuration from scratch in a text editor. +The [ApiListener](09-object-types.md#objecttype-apilistener) object is +used to load the TLS certificates and specify restrictions, e.g. +for accepting configuration commands. + +It is also used for the [Icinga 2 REST API](12-icinga2-api.md#icinga2-api) which shares +the same host and port with the Icinga 2 Cluster protocol. + +The object configuration is stored in the `/etc/icinga2/features-enabled/api.conf` +file. Depending on the configuration mode the attributes `accept_commands` +and `accept_config` can be configured here. + +In order to use the `api` feature you need to enable it and restart Icinga 2. + +```bash +icinga2 feature enable api +``` + +## Conventions + +By convention all nodes should be configured using their FQDN. + +Furthermore, you must ensure that the following names +are exactly the same in all configuration files: + +* Host certificate common name (CN). +* Endpoint configuration object for the host. +* NodeName constant for the local host. + +Setting this up on the command line will help you to minimize the effort. +Just keep in mind that you need to use the FQDN for endpoints and for +common names when asked. + +## Security + +While there are certain mechanisms to ensure a secure communication between all +nodes (firewalls, policies, software hardening, etc.), Icinga 2 also provides +additional security: + +* TLS v1.2+ is required. +* TLS cipher lists are hardened [by default](09-object-types.md#objecttype-apilistener). +* TLS certificates are mandatory for communication between nodes. The CLI command wizards +help you create these certificates. +* Child zones only receive updates (check results, commands, etc.) for their configured objects. +* Child zones are not allowed to push configuration updates to parent zones. +* Zones cannot interfere with other zones and influence each other. Each checkable host or service object is assigned to **one zone** only. +* All nodes in a zone trust each other. +* [Config sync](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync) and [remote command endpoint execution](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) is disabled by default. + +The underlying protocol uses JSON-RPC event notifications exchanged by nodes. +The connection is secured by TLS. The message protocol uses an internal API, +and as such message types and names may change internally and are not documented. + +Zones build the trust relationship in a distributed environment. If you do not specify +a zone for an agent/satellite and specify the parent zone, its zone members e.g. the master instance +won't trust the agent/satellite. + +Building this trust is key in your distributed environment. That way the parent node +knows that it is able to send messages to the child zone, e.g. configuration objects, +configuration in global zones, commands to be executed in this zone/for this endpoint. +It also receives check results from the child zone for checkable objects (host/service). + +Vice versa, the agent/satellite trusts the master and accepts configuration and commands if enabled +in the api feature. If the agent/satellite would send configuration to the parent zone, the parent nodes +will deny it. The parent zone is the configuration entity, and does not trust agents/satellites in this matter. +An agent/satellite could attempt to modify a different agent/satellite for example, or inject a check command +with malicious code. + +While it may sound complicated for agent/satellite setups, it removes the problem with different roles +and configurations for a master and child nodes. Both of them work the same way, are configured +in the same way (Zone, Endpoint, ApiListener), and you can troubleshoot and debug them in just one go. + +## Versions and Upgrade + +It generally is advised to use the newest releases with the same version on all instances. +Prior to upgrading, make sure to plan a maintenance window. + +The Icinga project aims to allow the following compatibility: + +``` +master (2.11) >= satellite (2.10) >= agent (2.9) +``` + +Older agent versions may work, but there's no guarantee. Always keep in mind that +older versions are out of support and can contain bugs. + +In terms of an upgrade, ensure that the master is upgraded first, then +involved satellites, and last the Icinga agents. If you are on v2.10 +currently, first upgrade the master instance(s) to 2.11, and then proceed +with the satellites. Things are getting easier with any sort of automation +tool (Puppet, Ansible, etc.). + +Releases and new features may require you to upgrade master/satellite instances at once, +this is highlighted in the [upgrading docs](16-upgrading-icinga-2.md#upgrading-icinga-2) if needed. +One example is the CA Proxy and on-demand signing feature +available since v2.8 where all involved instances need this version +to function properly. + +## Master Setup + +This section explains how to install a central single master node using +the `node wizard` command. If you prefer to do an automated installation, please +refer to the [automated setup](06-distributed-monitoring.md#distributed-monitoring-automation) section. + +Follow the [installation instructions](02-installation.md) for the Icinga 2 package and the required +check plugins if you haven't done so already. + +**Note**: Windows is not supported for a master node setup. + +The next step is to run the `node wizard` CLI command. Prior to that +ensure to collect the required information: + + Parameter | Description + --------------------|-------------------- + Common name (CN) | **Required.** By convention this should be the host's FQDN. Defaults to the FQDN. + Master zone name | **Optional.** Allows to specify the master zone name. Defaults to `master`. + Global zones | **Optional.** Allows to specify more global zones in addition to `global-templates` and `director-global`. Defaults to `n`. + API bind host | **Optional.** Allows to specify the address the ApiListener is bound to. For advanced usage only. + API bind port | **Optional.** Allows to specify the port the ApiListener is bound to. For advanced usage only (requires changing the default port 5665 everywhere). + Disable conf.d | **Optional.** Allows to disable the `include_recursive "conf.d"` directive except for the `api-users.conf` file in the `icinga2.conf` file. Defaults to `y`. Configuration on the master is discussed below. + +The setup wizard will ensure that the following steps are taken: + +* Enable the `api` feature. +* Generate a new certificate authority (CA) in `/var/lib/icinga2/ca` if it doesn't exist. +* Create a certificate for this node signed by the CA key. +* Update the [zones.conf](04-configuration.md#zones-conf) file with the new zone hierarchy. +* Update the [ApiListener](06-distributed-monitoring.md#distributed-monitoring-apilistener) and [constants](04-configuration.md#constants-conf) configuration. +* Update the [icinga2.conf](04-configuration.md#icinga2-conf) to disable the `conf.d` inclusion, and add the `api-users.conf` file inclusion. + +Here is an example of a master setup for the `icinga2-master1.localdomain` node on CentOS 7: + +``` +[root@icinga2-master1.localdomain /]# icinga2 node wizard + +Welcome to the Icinga 2 Setup Wizard! + +We will guide you through all required configuration details. + +Please specify if this is a satellite/agent setup ('n' installs a master setup) [Y/n]: n + +Starting the Master setup routine... + +Please specify the common name (CN) [icinga2-master1.localdomain]: icinga2-master1.localdomain +Reconfiguring Icinga... +Checking for existing certificates for common name 'icinga2-master1.localdomain'... +Certificates not yet generated. Running 'api setup' now. +Generating master configuration for Icinga 2. +Enabling feature api. Make sure to restart Icinga 2 for these changes to take effect. + +Master zone name [master]: + +Default global zones: global-templates director-global +Do you want to specify additional global zones? [y/N]: N + +Please specify the API bind host/port (optional): +Bind Host []: +Bind Port []: + +Do you want to disable the inclusion of the conf.d directory [Y/n]: +Disabling the inclusion of the conf.d directory... +Checking if the api-users.conf file exists... + +Done. + +Now restart your Icinga 2 daemon to finish the installation! +``` + +You can verify that the CA public and private keys are stored in the `/var/lib/icinga2/ca` directory. +Keep this path secure and include it in your backups. + +In case you lose the CA private key you have to generate a new CA for signing new agent/satellite +certificate requests. You then have to also re-create new signed certificates for all +existing nodes. + +Once the master setup is complete, you can also use this node as primary [CSR auto-signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing) +master. The following section will explain how to use the CLI commands in order to fetch their +signed certificate from this master node. + +## Signing Certificates on the Master + +All certificates must be signed by the same certificate authority (CA). This ensures +that all nodes trust each other in a distributed monitoring environment. + +This CA is generated during the [master setup](06-distributed-monitoring.md#distributed-monitoring-setup-master) +and should be the same on all master instances. + +You can avoid signing and deploying certificates [manually](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-certificates-manual) +by using built-in methods for auto-signing certificate signing requests (CSR): + +* [CSR Auto-Signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing) which uses a client (an agent or a satellite) ticket generated on the master as trust identifier. +* [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) which allows to sign pending certificate requests on the master. + +Both methods are described in detail below. + +> **Note** +> +> [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) is available in Icinga 2 v2.8+. + +### CSR Auto-Signing + +A client can be a secondary master, a satellite or an agent. It sends a certificate signing request (CSR) +and must authenticate itself in a trusted way. The master generates a client ticket which is included in this request. +That way the master can verify that the request matches the previously trusted ticket +and sign the request. + +> **Note** +> +> Icinga 2 v2.8 added the possibility to forward signing requests on a satellite +> to the master node. This is called `CA Proxy` in blog posts and design drafts. +> This functionality helps with the setup of [three level clusters](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents) +> and more. + +Advantages: + +* Nodes (secondary master, satellites, agents) can be installed by different users who have received the client ticket. +* No manual interaction necessary on the master node. +* Automation tools like Puppet, Ansible, etc. can retrieve the pre-generated ticket in their client catalog +and run the node setup directly. + +Disadvantages: + +* Tickets need to be generated on the master and copied to client setup wizards. +* No central signing management. + +#### CSR Auto-Signing: Preparation + +Prior to using this mode, ensure that the following steps are taken on +the signing master: + +* The [master setup](06-distributed-monitoring.md#distributed-monitoring-setup-master) was run successfully. This includes: + * Generated a CA key pair + * Generated a private ticket salt stored in the `TicketSalt` constant, set as `ticket_salt` attribute inside the [api](09-object-types.md#objecttype-apilistener) feature. +* Restart of the master instance. + +#### CSR Auto-Signing: On the master + +Setup wizards for agent/satellite nodes will ask you for this specific client ticket. + +There are two possible ways to retrieve the ticket: + +* [CLI command](11-cli-commands.md#cli-command-pki) executed on the master node. +* [REST API](12-icinga2-api.md#icinga2-api) request against the master node. + + +Required information: + + Parameter | Description + --------------------|-------------------- + Common name (CN) | **Required.** The common name for the agent/satellite. By convention this should be the FQDN. + +The following example shows how to generate a ticket on the master node `icinga2-master1.localdomain` for the agent `icinga2-agent1.localdomain`: + +``` +[root@icinga2-master1.localdomain /]# icinga2 pki ticket --cn icinga2-agent1.localdomain +``` + +Querying the [Icinga 2 API](12-icinga2-api.md#icinga2-api) on the master requires an [ApiUser](12-icinga2-api.md#icinga2-api-authentication) +object with at least the `actions/generate-ticket` permission. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/conf.d/api-users.conf + +object ApiUser "client-pki-ticket" { + password = "bea11beb7b810ea9ce6ea" //change this + permissions = [ "actions/generate-ticket" ] +} + +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 + +Retrieve the ticket on the master node `icinga2-master1.localdomain` with `curl`, for example: + + [root@icinga2-master1.localdomain /]# curl -k -s -u client-pki-ticket:bea11beb7b810ea9ce6ea -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/generate-ticket' -d '{ "cn": "icinga2-agent1.localdomain" }' +``` + +Store that ticket number for the [agent/satellite setup](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite) below. + +> **Note** +> +> Never expose the ticket salt and/or ApiUser credentials to your client nodes. +> Example: Retrieve the ticket on the Puppet master node and send the compiled catalog +> to the authorized Puppet agent node which will invoke the +> [automated setup steps](06-distributed-monitoring.md#distributed-monitoring-automation-cli-node-setup). + + +### On-Demand CSR Signing + +The client can be a secondary master, satellite or agent. +It sends a certificate signing request to specified parent node without any +ticket. The admin on the primary master is responsible for reviewing and signing the requests +with the private CA key. + +This could either be directly the master, or a satellite which forwards the request +to the signing master. + +Advantages: + +* Central certificate request signing management. +* No pre-generated ticket is required for client setups. + +Disadvantages: + +* Asynchronous step for automated deployments. +* Needs client verification on the master. + +#### On-Demand CSR Signing: Preparation + +Prior to using this mode, ensure that the following steps are taken on +the signing master: + +* The [master setup](06-distributed-monitoring.md#distributed-monitoring-setup-master) was run successfully. This includes: + * Generated a CA key pair +* Restart of the master instance. + +#### On-Demand CSR Signing: On the master + +You can list pending certificate signing requests with the `ca list` CLI command. + +``` +[root@icinga2-master1.localdomain /]# icinga2 ca list +Fingerprint | Timestamp | Signed | Subject +-----------------------------------------------------------------|---------------------|--------|-------- +71700c28445109416dd7102038962ac3fd421fbb349a6e7303b6033ec1772850 | 2017/09/06 17:20:02 | | CN = icinga2-agent2.localdomain +``` + +In order to show all requests, use the `--all` parameter. + +``` +[root@icinga2-master1.localdomain /]# icinga2 ca list --all +Fingerprint | Timestamp | Signed | Subject +-----------------------------------------------------------------|---------------------|--------|-------- +403da5b228df384f07f980f45ba50202529cded7c8182abf96740660caa09727 | 2017/09/06 17:02:40 | * | CN = icinga2-agent1.localdomain +71700c28445109416dd7102038962ac3fd421fbb349a6e7303b6033ec1772850 | 2017/09/06 17:20:02 | | CN = icinga2-agent2.localdomain +``` + +**Tip**: Add `--json` to the CLI command to retrieve the details in JSON format. + +If you want to sign a specific request, you need to use the `ca sign` CLI command +and pass its fingerprint as argument. + +``` +[root@icinga2-master1.localdomain /]# icinga2 ca sign 71700c28445109416dd7102038962ac3fd421fbb349a6e7303b6033ec1772850 +information/cli: Signed certificate for 'CN = icinga2-agent2.localdomain'. +``` + +> **Note** +> +> `ca list` cannot be used as historical inventory. Certificate +> signing requests older than 1 week are automatically deleted. + +You can also remove an undesired CSR using the `ca remove` command using the +syntax as the `ca sign` command. + +``` +[root@pym ~]# icinga2 ca remove 5c31ca0e2269c10363a97e40e3f2b2cd56493f9194d5b1852541b835970da46e +information/cli: Certificate 5c31ca0e2269c10363a97e40e3f2b2cd56493f9194d5b1852541b835970da46e removed. +``` +If you want to restore a certificate you have removed, you can use `ca restore`. + + + + +## Agent/Satellite Setup + +This section describes the setup of an agent or satellite connected to an +existing master node setup. If you haven't done so already, please [run the master setup](06-distributed-monitoring.md#distributed-monitoring-setup-master). + +Icinga 2 on the master node must be running and accepting connections on port `5665`. + + + + +### Agent/Satellite Setup on Linux + +Please ensure that you've run all the steps mentioned in the [agent/satellite section](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite). + +Follow the [installation instructions](02-installation.md) for the Icinga 2 package and the required +check plugins if you haven't done so already. + +The next step is to run the `node wizard` CLI command. + +In this example we're generating a ticket on the master node `icinga2-master1.localdomain` for the agent `icinga2-agent1.localdomain`: + +``` +[root@icinga2-master1.localdomain /]# icinga2 pki ticket --cn icinga2-agent1.localdomain +4f75d2ecd253575fe9180938ebff7cbca262f96e +``` + +Note: You don't need this step if you have chosen to use [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing). + +Start the wizard on the agent `icinga2-agent1.localdomain`: + +``` +[root@icinga2-agent1.localdomain /]# icinga2 node wizard + +Welcome to the Icinga 2 Setup Wizard! + +We will guide you through all required configuration details. +``` + +Press `Enter` or add `y` to start a satellite or agent setup. + +``` +Please specify if this is an agent/satellite setup ('n' installs a master setup) [Y/n]: +``` + +Press `Enter` to use the proposed name in brackets, or add a specific common name (CN). By convention +this should be the FQDN. + +``` +Starting the Agent/Satellite setup routine... + +Please specify the common name (CN) [icinga2-agent1.localdomain]: icinga2-agent1.localdomain +``` + +Specify the direct parent for this node. This could be your primary master `icinga2-master1.localdomain` +or a satellite node in a multi level cluster scenario. + +``` +Please specify the parent endpoint(s) (master or satellite) where this node should connect to: +Master/Satellite Common Name (CN from your master/satellite node): icinga2-master1.localdomain +``` + +Press `Enter` or choose `y` to establish a connection to the parent node. + +``` +Do you want to establish a connection to the parent node from this node? [Y/n]: +``` + +> **Note:** +> +> If this node cannot connect to the parent node, choose `n`. The setup +> wizard will provide instructions for this scenario -- signing questions are disabled then. + +Add the connection details for `icinga2-master1.localdomain`. + +``` +Please specify the master/satellite connection information: +Master/Satellite endpoint host (IP address or FQDN): 192.168.56.101 +Master/Satellite endpoint port [5665]: 5665 +``` + +You can add more parent nodes if necessary. Press `Enter` or choose `n` +if you don't want to add any. This comes in handy if you have more than one +parent node, e.g. two masters or two satellites. + +``` +Add more master/satellite endpoints? [y/N]: +``` + +Verify the parent node's certificate: + +``` +Parent certificate information: + + Subject: CN = icinga2-master1.localdomain + Issuer: CN = Icinga CA + Valid From: Sep 7 13:41:24 2017 GMT + Valid Until: Sep 3 13:41:24 2032 GMT + Fingerprint: AC 99 8B 2B 3D B0 01 00 E5 21 FA 05 2E EC D5 A9 EF 9E AA E3 + +Is this information correct? [y/N]: y +``` + +The setup wizard fetches the parent node's certificate and ask +you to verify this information. This is to prevent MITM attacks or +any kind of untrusted parent relationship. + +You can verify the fingerprint by running the following command on the node to connect to: + +```bash +openssl x509 -noout -fingerprint -sha256 -in \ + "/var/lib/icinga2/certs/$(hostname --fqdn).crt" +``` + +Note: The certificate is not fetched if you have chosen not to connect +to the parent node. + +Proceed with adding the optional client ticket for [CSR auto-signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing): + +``` +Please specify the request ticket generated on your Icinga 2 master (optional). + (Hint: # icinga2 pki ticket --cn 'icinga2-agent1.localdomain'): +4f75d2ecd253575fe9180938ebff7cbca262f96e +``` + +In case you've chosen to use [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) +you can leave the ticket question blank. + +Instead, Icinga 2 tells you to approve the request later on the master node. + +``` +No ticket was specified. Please approve the certificate signing request manually +on the master (see 'icinga2 ca list' and 'icinga2 ca sign --help' for details). +``` + +You can optionally specify a different bind host and/or port. + +``` +Please specify the API bind host/port (optional): +Bind Host []: +Bind Port []: +``` + +The next step asks you to accept configuration (required for [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)) +and commands (required for [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint)). + +``` +Accept config from parent node? [y/N]: y +Accept commands from parent node? [y/N]: y +``` + +Next you can optionally specify the local and parent zone names. This will be reflected +in the generated zone configuration file. + +Set the local zone name to something else, if you are installing a satellite or secondary master instance. + +``` +Local zone name [icinga2-agent1.localdomain]: +``` + +Set the parent zone name to something else than `master` if this agents connects to a satellite instance instead of the master. + +``` +Parent zone name [master]: +``` + +You can add more global zones in addition to `global-templates` and `director-global` if necessary. +Press `Enter` or choose `n`, if you don't want to add any additional. + +``` +Reconfiguring Icinga... + +Default global zones: global-templates director-global +Do you want to specify additional global zones? [y/N]: N +``` + +Last but not least the wizard asks you whether you want to disable the inclusion of the local configuration +directory in `conf.d`, or not. Defaults to disabled, as agents either are checked via command endpoint, or +they receive configuration synced from the parent zone. + +``` +Do you want to disable the inclusion of the conf.d directory [Y/n]: Y +Disabling the inclusion of the conf.d directory... +``` + + +The wizard proceeds and you are good to go. + +``` +Done. + +Now restart your Icinga 2 daemon to finish the installation! +``` + +> **Note** +> +> If you have chosen not to connect to the parent node, you cannot start +> Icinga 2 yet. The wizard asked you to manually copy the master's public +> CA certificate file into `/var/lib/icinga2/certs/ca.crt`. +> +> You need to [manually sign the CSR on the master node](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing-master). + +Restart Icinga 2 as requested. + +``` +[root@icinga2-agent1.localdomain /]# systemctl restart icinga2 +``` + +Here is an overview of all parameters in detail: + + Parameter | Description + --------------------|-------------------- + Common name (CN) | **Required.** By convention this should be the host's FQDN. Defaults to the FQDN. + Master common name | **Required.** Use the common name you've specified for your master node before. + Establish connection to the parent node | **Optional.** Whether the node should attempt to connect to the parent node or not. Defaults to `y`. + Master/Satellite endpoint host | **Required if the the agent needs to connect to the master/satellite.** The parent endpoint's IP address or FQDN. This information is included in the `Endpoint` object configuration in the `zones.conf` file. + Master/Satellite endpoint port | **Optional if the the agent needs to connect to the master/satellite.** The parent endpoints's listening port. This information is included in the `Endpoint` object configuration. + Add more master/satellite endpoints | **Optional.** If you have multiple master/satellite nodes configured, add them here. + Parent Certificate information | **Required.** Verify that the connecting host really is the requested master node. + Request ticket | **Optional.** Add the [ticket](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing) generated on the master. + API bind host | **Optional.** Allows to specify the address the ApiListener is bound to. For advanced usage only. + API bind port | **Optional.** Allows to specify the port the ApiListener is bound to. For advanced usage only (requires changing the default port 5665 everywhere). + Accept config | **Optional.** Whether this node accepts configuration sync from the master node (required for [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)). For [security reasons](06-distributed-monitoring.md#distributed-monitoring-security) this defaults to `n`. + Accept commands | **Optional.** Whether this node accepts command execution messages from the master node (required for [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint)). For [security reasons](06-distributed-monitoring.md#distributed-monitoring-security) this defaults to `n`. + Local zone name | **Optional.** Allows to specify the name for the local zone. This comes in handy when this instance is a satellite, not an agent. Defaults to the FQDN. + Parent zone name | **Optional.** Allows to specify the name for the parent zone. This is important if the agent has a satellite instance as parent, not the master. Defaults to `master`. + Global zones | **Optional.** Allows to specify more global zones in addition to `global-templates` and `director-global`. Defaults to `n`. + Disable conf.d | **Optional.** Allows to disable the inclusion of the `conf.d` directory which holds local example configuration. Clients should retrieve their configuration from the parent node, or act as command endpoint execution bridge. Defaults to `y`. + +The setup wizard will ensure that the following steps are taken: + +* Enable the `api` feature. +* Create a certificate signing request (CSR) for the local node. +* Request a signed certificate (optional with the provided ticket number) on the master node. +* Allow to verify the parent node's certificate. +* Store the signed agent/satellite certificate and ca.crt in `/var/lib/icinga2/certs`. +* Update the `zones.conf` file with the new zone hierarchy. +* Update `/etc/icinga2/features-enabled/api.conf` (`accept_config`, `accept_commands`) and `constants.conf`. +* Update `/etc/icinga2/icinga2.conf` and comment out `include_recursive "conf.d"`. + +You can verify that the certificate files are stored in the `/var/lib/icinga2/certs` directory. + +> **Note** +> +> If the agent is not directly connected to the certificate signing master, +> signing requests and responses might need some minutes to fully update the agent certificates. +> +> If you have chosen to use [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) +> certificates need to be signed on the master first. Ticket-less setups require at least Icinga 2 v2.8+ on all involved instances. + +Now that you've successfully installed a Linux/Unix agent/satellite instance, please proceed to +the [configuration modes](06-distributed-monitoring.md#distributed-monitoring-configuration-modes). + + + + + +### Agent Setup on Windows + +The supported Windows agent versions are listed [here](https://icinga.com/subscription/support-details/). + +Requirements: + +* [Microsoft .NET Framework 4.6](https://www.microsoft.com/en-US/download/details.aspx?id=53344) or higher. This is the default on Windows Server 2016 or later. +* [Universal C Runtime for Windows](https://support.microsoft.com/en-us/help/2999226/update-for-universal-c-runtime-in-windows) for Windows Server 2012 and older. + +#### Agent Setup on Windows: Installer + +Download the MSI-Installer package from [https://packages.icinga.com/windows/](https://packages.icinga.com/windows/). +The preferred flavor is `x86_64` for modern Windows systems. + +The Windows package provides native [monitoring plugin binaries](06-distributed-monitoring.md#distributed-monitoring-windows-plugins) +to get you started more easily. +The installer package also includes the [NSClient++](https://www.nsclient.org/) package +to allow using its built-in plugins. You can find more details in +[this chapter](06-distributed-monitoring.md#distributed-monitoring-windows-nscp). + +> **Note** +> +> Please note that Icinga 2 was designed to run as light-weight agent on Windows. +> There is no support for satellite instances. + +Run the MSI-Installer package and follow the instructions shown in the screenshots. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_installer_01.png) +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_installer_02.png) +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_installer_03.png) +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_installer_04.png) +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_installer_05.png) + +The graphical installer offers to run the [Icinga Agent setup wizard](06-distributed-monitoring.md#distributed-monitoring-setup-agent-windows-configuration-wizard) +after the installation. Select the check box to proceed. + +> **Tip** +> +> You can also run the Icinga agent setup wizard from the Start menu later. + +#### Agent Setup on Windows: Configuration Wizard + +On a fresh installation the setup wizard guides you through the initial configuration. +It also provides a mechanism to send a certificate request to the [CSR signing master](06-distributed-monitoring.md#distributed-monitoring-setup-sign-certificates-master). + +The following configuration details are required: + + Parameter | Description + --------------------|-------------------- + Instance name | **Required.** By convention this should be the host's FQDN. Defaults to the FQDN. + Setup ticket | **Optional.** Paste the previously generated [ticket number](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing). If left blank, the certificate request must be [signed on the master node](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing). + +Fill in the required information and click `Add` to add a new master connection. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_01.png) + +Add the following details: + + Parameter | Description + -------------------------------|------------------------------- + Instance name | **Required.** The master/satellite endpoint name where this agent is a direct child of. + Master/Satellite endpoint host | **Required.** The master or satellite's IP address or FQDN. This information is included in the `Endpoint` object configuration in the `zones.conf` file. + Master/Satellite endpoint port | **Optional.** The master or satellite's listening port. This information is included in the `Endpoint` object configuration. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_02.png) + +When needed you can add an additional global zone (the zones `global-templates` and `director-global` are added by default): + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_02_global_zone.png) + +Optionally enable the following settings: + + Parameter | Description + --------------------------------------------------------|---------------------------------- + Accept commands from master/satellite instance(s) | **Optional.** Whether this node accepts command execution messages from the master node (required for [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint)). For [security reasons](06-distributed-monitoring.md#distributed-monitoring-security) this is disabled by default. + Accept config updates from master/satellite instance(s) | **Optional.** Whether this node accepts configuration sync from the master node (required for [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)). For [security reasons](06-distributed-monitoring.md#distributed-monitoring-security) this is disabled by default. + Run Icinga 2 service as this user | **Optional.** Specify a different Windows user. This defaults to `NT AUTHORITY\Network Service` and is required for more privileged service checks. + Install/Update bundled NSClient++ | **Optional.** The Windows installer bundles the NSClient++ installer for additional [plugin checks](06-distributed-monitoring.md#distributed-monitoring-windows-nscp). + Disable including local 'conf.d' directory | **Optional.** Allows to disable the `include_recursive "conf.d"` directive except for the `api-users.conf` file in the `icinga2.conf` file. Defaults to `true`. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_03.png) + +Verify the certificate from the master/satellite instance where this node should connect to. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_04.png) + + +#### Bundled NSClient++ Setup + +If you have chosen to install/update the NSClient++ package, the Icinga 2 setup wizard asks +you to do so. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_01.png) + +Choose the `Generic` setup. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_02.png) + +Choose the `Custom` setup type. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_03.png) + +NSClient++ does not install a sample configuration by default. Change this as shown in the screenshot. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_04.png) + +Generate a secure password and enable the web server module. **Note**: The webserver module is +available starting with NSClient++ 0.5.0. Icinga 2 v2.6+ is required which includes this version. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_05.png) + +Finish the installation. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_06.png) + +Open a web browser and navigate to `https://localhost:8443`. Enter the password you've configured +during the setup. In case you lost it, look into the `C:\Program Files\NSClient++\nsclient.ini` +configuration file. + +![Icinga 2 Windows Setup NSClient++](images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_07.png) + +The NSClient++ REST API can be used to query metrics. [check_nscp_api](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) +uses this transport method. + + +#### Finish Windows Agent Setup + +Finish the Windows setup wizard. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_with_ticket.png) + +If you did not provide a setup ticket, you need to sign the certificate request on the master. +The setup wizards tells you to do so. The Icinga 2 service is running at this point already +and will automatically receive and update a signed client certificate. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_no_ticket.png) + +Icinga 2 is automatically started as a Windows service. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_running_service.png) + +The Icinga 2 configuration is stored inside the `C:\ProgramData\icinga2` directory. +Click `Examine Config` in the setup wizard to open a new Explorer window. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_examine_config.png) + +The configuration files can be modified with your favorite editor e.g. Notepad++ or vim in Powershell (via chocolatey). + +In order to use the [top down](06-distributed-monitoring.md#distributed-monitoring-top-down) agent +configuration prepare the following steps. + +You don't need any local configuration on the agent except for +CheckCommand definitions which can be synced using the global zone +above. Therefore disable the inclusion of the `conf.d` directory +in the `icinga2.conf` file. + +Navigate to `C:\ProgramData\icinga2\etc\icinga2` and open +the `icinga2.conf` file in your preferred editor. Remove or comment (`//`) +the following line: + +``` +// Commented out, not required on an agent with top down mode +//include_recursive "conf.d" +``` + +> **Note** +> +> Packages >= 2.9 provide an option in the setup wizard to disable this. +> Defaults to disabled. + +Validate the configuration on Windows open an administrative Powershell +and run the following command: + +``` +C:\> cd C:\Program Files\ICINGA2\sbin + +C:\Program Files\ICINGA2\sbin> .\icinga2.exe daemon -C +``` + +**Note**: You have to run this command in a shell with `administrator` privileges. + +Now you need to restart the Icinga 2 service. Run `services.msc` from the start menu and restart the `icinga2` service. +Alternatively open an administrative Powershell and run the following commands: + +``` +C:\> Restart-Service icinga2 + +C:\> Get-Service icinga2 +``` + + +Now that you've successfully installed a Windows agent, please proceed to +the [detailed configuration modes](06-distributed-monitoring.md#distributed-monitoring-configuration-modes). + + +## Configuration Modes + +There are different ways to ensure that the Icinga 2 cluster nodes execute +checks, send notifications, etc. + +The preferred method is to configure monitoring objects on the master +and distribute the configuration to satellites and agents. + +The following chapters explain this in detail with hands-on manual configuration +examples. You should test and implement this once to fully understand how it works. + +Once you are familiar with Icinga 2 and distributed monitoring, you +can start with additional integrations to manage and deploy your +configuration: + +* [Icinga Director](https://icinga.com/docs/director/latest/) provides a web interface to manage configuration and also allows to sync imported resources (CMDB, PuppetDB, etc.) +* [Ansible Roles](https://icinga.com/products/integrations/) +* [Puppet Module](https://icinga.com/products/integrations/puppet/) +* [Chef Cookbook](https://icinga.com/products/integrations/chef/) + +More details can be found [here](13-addons.md#configuration-tools). + +### Top Down + +There are two different behaviors with check execution: + +* Send a command execution event remotely: The scheduler still runs on the parent node. +* Sync the host/service objects directly to the child node: Checks are executed locally. + +Again, technically it does not matter whether this is an `agent` or a `satellite` +which is receiving configuration or command execution events. + +### Top Down Command Endpoint + +This mode forces the Icinga 2 node to execute commands remotely on a specified endpoint. +The host/service object configuration is located on the master/satellite and the agent only +needs the CheckCommand object definitions available. + +Every endpoint has its own remote check queue. The amount of checks executed simultaneously +can be limited on the endpoint with the `MaxConcurrentChecks` constant defined in [constants.conf](04-configuration.md#constants-conf). Icinga 2 may discard check requests, +if the remote check queue is full. + +![Icinga 2 Distributed Top Down Command Endpoint](images/distributed-monitoring/icinga2_distributed_monitoring_agent_checks_command_endpoint.png) + +Advantages: + +* No local checks need to be defined on the child node (agent). +* Light-weight remote check execution (asynchronous events). +* No [replay log](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-command-endpoint-log-duration) is necessary for the child node. +* Pin checks to specific endpoints (if the child zone consists of 2 endpoints). + +Disadvantages: + +* If the child node is not connected, no more checks are executed. +* Requires additional configuration attribute specified in host/service objects. +* Requires local `CheckCommand` object configuration. Best practice is to use a [global config zone](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync). + +To make sure that all nodes involved will accept configuration and/or +commands, you need to configure the `Zone` and `Endpoint` hierarchy +on all nodes. + +* `icinga2-master1.localdomain` is the configuration master in this scenario. +* `icinga2-agent1.localdomain` acts as agent which receives command execution messages via command endpoint from the master. In addition, it receives the global check command configuration from the master. + +Include the endpoint and zone configuration on **both** nodes in the file `/etc/icinga2/zones.conf`. + +The endpoint configuration could look like this, for example: + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + host = "192.168.56.101" +} + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" + log_duration = 0 // Disable the replay log for command endpoint agents +} +``` + +Next, you need to define two zones. There is no naming convention, best practice is to either use `master`, `satellite`/`agent-fqdn` or to choose region names for example `Europe`, `USA` and `Asia`, though. + +**Note**: Each agent requires its own zone and endpoint configuration. Best practice +is to use the agent's FQDN for all object names. + +The `master` zone is a parent of the `icinga2-agent1.localdomain` zone: + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] //array with endpoint names +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "master" //establish zone hierarchy +} +``` + +You don't need any local configuration on the agent except for +CheckCommand definitions which can be synced using the global zone +above. Therefore disable the inclusion of the `conf.d` directory +in `/etc/icinga2/icinga2.conf`. + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/icinga2.conf + +// Commented out, not required on an agent as command endpoint +//include_recursive "conf.d" +``` + +> **Note** +> +> Packages >= 2.9 provide an option in the setup wizard to disable this. +> Defaults to disabled. + +Now it is time to validate the configuration and to restart the Icinga 2 daemon +on both nodes. + +Example on CentOS 7: + +``` +[root@icinga2-agent1.localdomain /]# icinga2 daemon -C +[root@icinga2-agent1.localdomain /]# systemctl restart icinga2 + +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Once the agents have successfully connected, you are ready for the next step: **execute +a remote check on the agent using the command endpoint**. + +Include the host and service object configuration in the `master` zone +-- this will help adding a secondary master for high-availability later. + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/master +``` + +Add the host and service objects you want to monitor. There is +no limitation for files and directories -- best practice is to +sort things by type. + +By convention a master/satellite/agent host object should use the same name as the endpoint object. +You can also add multiple hosts which execute checks against remote services/agents. + +The following example adds the `agent_endpoint` custom variable to the +host and stores its name (FQDN). _Versions older than 2.11 +used the `client_endpoint` custom variable._ + +This custom variable serves two purposes: 1) Service apply rules can match against it. +2) Apply rules can retrieve its value and assign it to the `command_endpoint` attribute. + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" //check is executed on the master + address = "192.168.56.111" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name +} +``` + +Given that you are monitoring a Linux agent, add a remote [disk](10-icinga-template-library.md#plugin-check-command-disk) +check. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "disk" { + check_command = "disk" + + // Specify the remote agent as command execution endpoint, fetch the host custom variable + command_endpoint = host.vars.agent_endpoint + + // Only assign where a host is marked as agent endpoint + assign where host.vars.agent_endpoint +} +``` + +If you have your own custom `CheckCommand` definition, add it to the global zone: + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/global-templates +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/global-templates/commands.conf + +object CheckCommand "my-cmd" { + //... +} +``` + +Save the changes and validate the configuration on the master node: + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +``` +Restart the Icinga 2 daemon (example for CentOS 7): + +``` +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +The following steps will happen: + +* Icinga 2 validates the configuration on `icinga2-master1.localdomain` and restarts. +* The `icinga2-master1.localdomain` node schedules and executes the checks. +* The `icinga2-agent1.localdomain` node receives the execute command event with additional command parameters. +* The `icinga2-agent1.localdomain` node maps the command parameters to the local check command, executes the check locally, and sends back the check result message. + +As you can see, no interaction from your side is required on the agent itself, and it's not necessary to reload the Icinga 2 service on the agent. + +You have learned the basics about command endpoint checks. Proceed with +the [scenarios](06-distributed-monitoring.md#distributed-monitoring-scenarios) +section where you can find detailed information on extending the setup. + + +### Top Down Config Sync + +This mode syncs the object configuration files within specified zones. +It comes in handy if you want to configure everything on the master node +and sync the satellite checks (disk, memory, etc.). The satellites run their +own local scheduler and will send the check result messages back to the master. + +![Icinga 2 Distributed Top Down Config Sync](images/distributed-monitoring/icinga2_distributed_monitoring_satellite_config_sync.png) + +Advantages: + +* Sync the configuration files from the parent zone to the child zones. +* No manual restart is required on the child nodes, as syncing, validation, and restarts happen automatically. +* Execute checks directly on the child node's scheduler. +* Replay log if the connection drops (important for keeping the check history in sync, e.g. for SLA reports). +* Use a global zone for syncing templates, groups, etc. + +Disadvantages: + +* Requires a config directory on the master node with the zone name underneath `/etc/icinga2/zones.d`. +* Additional zone and endpoint configuration needed. +* Replay log is replicated on reconnect after connection loss. This might increase the data transfer and create an overload on the connection. + +> **Note** +> +> This mode only supports **configuration text files** for Icinga. Do not abuse +> this for syncing binaries, this is not supported and may harm your production +> environment. The config sync uses checksums to detect changes, binaries may +> trigger reload loops. +> +> This is a fair warning. If you want to deploy plugin binaries, create +> packages for dependency management and use infrastructure lifecycle tools +> such as Foreman, Puppet, Ansible, etc. + +To make sure that all involved nodes accept configuration and/or +commands, you need to configure the `Zone` and `Endpoint` hierarchy +on all nodes. + +* `icinga2-master1.localdomain` is the configuration master in this scenario. +* `icinga2-satellite1.localdomain` acts as satellite which receives configuration from the master. Checks are scheduled locally. + +Include the endpoint and zone configuration on **both** nodes in the file `/etc/icinga2/zones.conf`. + +The endpoint configuration could look like this: + +``` +[root@icinga2-satellite1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + host = "192.168.56.101" +} + +object Endpoint "icinga2-satellite1.localdomain" { + host = "192.168.56.105" +} +``` + +Next, you need to define two zones. There is no naming convention, best practice is to either use `master`, `satellite`/`agent-fqdn` or to choose region names for example `Europe`, `USA` and `Asia`, though. + +The `master` zone is a parent of the `satellite` zone: + +``` +[root@icinga2-agent2.localdomain /]# vim /etc/icinga2/zones.conf + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] //array with endpoint names +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain" ] + + parent = "master" //establish zone hierarchy +} +``` + +Edit the `api` feature on the satellite `icinga2-satellite1.localdomain` in +the `/etc/icinga2/features-enabled/api.conf` file and set +`accept_config` to `true`. + +``` +[root@icinga2-satellite1.localdomain /]# vim /etc/icinga2/features-enabled/api.conf + +object ApiListener "api" { + //... + accept_config = true +} +``` + +Now it is time to validate the configuration and to restart the Icinga 2 daemon +on both nodes. + +Example on CentOS 7: + +``` +[root@icinga2-satellite1.localdomain /]# icinga2 daemon -C +[root@icinga2-satellite1.localdomain /]# systemctl restart icinga2 + +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +**Tip**: Best practice is to use a [global zone](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync) +for common configuration items (check commands, templates, groups, etc.). + +Once the satellite(s) have connected successfully, it's time for the next step: **execute +a local check on the satellite using the configuration sync**. + +Navigate to `/etc/icinga2/zones.d` on your master node +`icinga2-master1.localdomain` and create a new directory with the same +name as your satellite/agent zone name: + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/satellite +``` + +Add the host and service objects you want to monitor. There is +no limitation for files and directories -- best practice is to +sort things by type. + +By convention a master/satellite/agent host object should use the same name as the endpoint object. +You can also add multiple hosts which execute checks against remote services/agents via [command endpoint](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +checks. + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/satellite +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim hosts.conf + +object Host "icinga2-satellite1.localdomain" { + check_command = "hostalive" + address = "192.168.56.112" + zone = "master" //optional trick: sync the required host object to the satellite, but enforce the "master" zone to execute the check +} +``` + +Given that you are monitoring a Linux satellite add a local [disk](10-icinga-template-library.md#plugin-check-command-disk) +check. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim services.conf + +object Service "disk" { + host_name = "icinga2-satellite1.localdomain" + + check_command = "disk" +} +``` + +Save the changes and validate the configuration on the master node: + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +``` + +Restart the Icinga 2 daemon (example for CentOS 7): + +``` +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +The following steps will happen: + +* Icinga 2 validates the configuration on `icinga2-master1.localdomain`. +* Icinga 2 copies the configuration into its zone config store in `/var/lib/icinga2/api/zones`. +* The `icinga2-master1.localdomain` node sends a config update event to all endpoints in the same or direct child zones. +* The `icinga2-satellite1.localdomain` node accepts config and populates the local zone config store with the received config files. +* The `icinga2-satellite1.localdomain` node validates the configuration and automatically restarts. + +Again, there is no interaction required on the satellite itself. + +You can also use the config sync inside a high-availability zone to +ensure that all config objects are synced among zone members. + +**Note**: You can only have one so-called "config master" in a zone which stores +the configuration in the `zones.d` directory. +Multiple nodes with configuration files in the `zones.d` directory are +**not supported**. + +Now that you've learned the basics about the configuration sync, proceed with +the [scenarios](06-distributed-monitoring.md#distributed-monitoring-scenarios) +section where you can find detailed information on extending the setup. + + + +If you are eager to start fresh instead you might take a look into the +[Icinga Director](https://icinga.com/docs/director/latest/). + +## Scenarios + +The following examples should give you an idea on how to build your own +distributed monitoring environment. We've seen them all in production +environments and received feedback from our [community](https://community.icinga.com/) +and [partner support](https://icinga.com/support/) channels: + +* [Single master with agents](06-distributed-monitoring.md#distributed-monitoring-master-agents). +* [HA master with agents as command endpoint](06-distributed-monitoring.md#distributed-monitoring-scenarios-ha-master-agents) +* [Three level cluster](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents) with config HA masters, satellites receiving config sync, and agents checked using command endpoint. + +You can also extend the cluster tree depth to four levels e.g. with 2 satellite levels. +Just keep in mind that multiple levels become harder to debug in case of errors. + +You can also start with a single master setup, and later add a secondary +master endpoint. This requires an extra step with the [initial sync](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-initial-sync) +for cloning the runtime state. This is described in detail [here](06-distributed-monitoring.md#distributed-monitoring-scenarios-ha-master-agents). + + + + +### Master with Agents + +In this scenario, a single master node runs the check scheduler, notifications +and IDO database backend and uses the [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +to execute checks on the remote agents. + +![Icinga 2 Distributed Master with Agents](images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png) + +* `icinga2-master1.localdomain` is the primary master node. +* `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` are two child nodes as agents. + +Setup requirements: + +* Set up `icinga2-master1.localdomain` as [master](06-distributed-monitoring.md#distributed-monitoring-setup-master). +* Set up `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` as [agent](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite). + +Edit the `zones.conf` configuration file on the master: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // That's us +} + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" // The master actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +object Endpoint "icinga2-agent2.localdomain" { + host = "192.168.56.112" // The master actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "master" +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` + +The two agent nodes do not need to know about each other. The only important thing +is that they know about the parent zone and their endpoint members (and optionally the global zone). + +If you specify the `host` attribute in the `icinga2-master1.localdomain` endpoint object, +the agent will actively try to connect to the master node. Since you've specified the agent +endpoint's attribute on the master node already, you don't want the agents to connect to the +master. **Choose one [connection direction](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-connection-direction).** + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent1.localdomain" { + // That's us +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` +``` +[root@icinga2-agent2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent2.localdomain" { + // That's us +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain" ] +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` + +Now it is time to define the two agent hosts and apply service checks using +the command endpoint execution method on them. Note: You can also use the +config sync mode here. + +Create a new configuration directory on the master node: + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/master +``` + +Add the two agent nodes as host objects: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name +} + +object Host "icinga2-agent2.localdomain" { + check_command = "hostalive" + address = "192.168.56.112" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name +} +``` + +Add services using command endpoint checks: + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "ping4" { + check_command = "ping4" + + //check is executed on the master node + assign where host.address +} + +apply Service "disk" { + check_command = "disk" + + // Execute the check on the remote command endpoint + command_endpoint = host.vars.agent_endpoint + + // Assign the service onto an agent + assign where host.vars.agent_endpoint +} +``` + +Validate the configuration and restart Icinga 2 on the master node `icinga2-master1.localdomain`. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Open Icinga Web 2 and check the two newly created agent hosts with two new services +-- one executed locally (`ping4`) and one using command endpoint (`disk`). + +> **Note** +> +> You don't necessarily need to add the agent endpoint/zone configuration objects +> into the master's zones.conf file. Instead, you can put them into `/etc/icinga2/zones.d/master` +> either in `hosts.conf` shown above, or in a new file called `agents.conf`. + +> **Tip**: +> +> It's a good idea to add [health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) +to make sure that your cluster notifies you in case of failure. + +In terms of health checks, consider adding the following for this scenario: + +- Master node(s) check the connection to the agents +- Optional: Add dependencies for the agent host to prevent unwanted notifications when agents are unreachable + +Proceed in [this chapter](06-distributed-monitoring.md#distributed-monitoring-health-checks-master-agents). + + + + +### High-Availability Master with Agents + +This scenario is similar to the one in the [previous section](06-distributed-monitoring.md#distributed-monitoring-master-agents). The only difference is that we will now set up two master nodes in a high-availability setup. +These nodes must be configured as zone and endpoints objects. + +![Icinga 2 Distributed High Availability Master with Agents](images/distributed-monitoring/icinga2_distributed_monitoring_scenario_ha_masters_with_agents.png) + +The setup uses the capabilities of the Icinga 2 cluster. All zone members +replicate cluster events between each other. In addition to that, several Icinga 2 +features can enable [HA functionality](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). + +Best practice is to run the database backend on a dedicated server/cluster and +only expose a virtual IP address to Icinga and the IDO feature. By default, only one +endpoint will actively write to the backend then. Typical setups for MySQL clusters +involve Master-Master-Replication (Master-Slave-Replication in both directions) or Galera, +more tips can be found on our [community forums](https://community.icinga.com/). +The IDO object must have the same `instance_name` on all master nodes. + +**Note**: All nodes in the same zone require that you enable the same features for high-availability (HA). + +Overview: + +* `icinga2-master1.localdomain` is the config master master node. +* `icinga2-master2.localdomain` is the secondary master master node without config in `zones.d`. +* `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` are two child nodes as agents. + +Setup requirements: + +* Set up `icinga2-master1.localdomain` as [master](06-distributed-monitoring.md#distributed-monitoring-setup-master). +* Set up `icinga2-master2.localdomain` as [satellite](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite) (**we will modify the generated configuration**). +* Set up `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` as [agents](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite) (when asked for adding multiple masters, set to `y` and add the secondary master `icinga2-master2.localdomain`). + +In case you don't want to use the CLI commands, you can also manually create and sync the +required TLS certificates. We will modify and discuss all the details of the automatically generated configuration here. + +Since there are now two nodes in the same zone, we must consider the +[high-availability features](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). + +* Checks and notifications are balanced between the two master nodes. That's fine, but it requires check plugins and notification scripts to exist on both nodes. +* The IDO feature will only be active on one node by default. Since all events are replicated between both nodes, it is easier to just have one central database. + +One possibility is to use a dedicated MySQL cluster VIP (external application cluster) +and leave the IDO feature with enabled HA capabilities. Alternatively, +you can disable the HA feature and write to a local database on each node. +Both methods require that you configure Icinga Web 2 accordingly (monitoring +backend, IDO database, used transports, etc.). + +> **Note** +> +> You can also start with a single master shown [here](06-distributed-monitoring.md#distributed-monitoring-master-agents) and later add +> the second master. This requires an extra step with the [initial sync](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-initial-sync) +> for cloning the runtime state after done. Once done, proceed here. + +In this scenario, we are not adding the agent configuration immediately +to the `zones.conf` file but will establish the hierarchy later. + +The first master looks like this: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // That's us +} + +object Endpoint "icinga2-master2.localdomain" { + host = "192.168.56.102" // Actively connect to the secondary master +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` + +The secondary master waits for connection attempts from the first master, +and therefore does not try to connect to it again. + +``` +[root@icinga2-master2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // The first master already connects to us +} + +object Endpoint "icinga2-master2.localdomain" { + // That's us +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` + +Restart both masters and ensure the initial connection and TLS handshake works. + +The two agent nodes do not need to know about each other. The only important thing +is that they know about the parent zone and their endpoint members (and optionally about the global zone). + +If you specify the `host` attribute in the `icinga2-master1.localdomain` and `icinga2-master2.localdomain` +endpoint objects, the agent will actively try to connect to the master node. Since we've specified the agent +endpoint's attribute on the master node already, we don't want the agent to connect to the +master nodes. **Choose one [connection direction](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-connection-direction).** + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-master2.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent1.localdomain" { + // That's us +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} + +``` + +``` +[root@icinga2-agent2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-master2.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent2.localdomain" { + //That's us +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} +object Zone "director-global" { + global = true +} +``` + +Now it is time to define the two agent hosts and apply service checks using +the command endpoint execution method. + +Create a new configuration directory on the master node `icinga2-master1.localdomain`. +**Note**: The secondary master node `icinga2-master2.localdomain` receives the +configuration using the [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync). + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/master +``` + +Add the two agent nodes with their zone/endpoint and host object configuration. + +> **Note** +> +> In order to keep things in sync between the two HA masters, +> keep the `zones.conf` file as small as possible. +> +> You can create the agent zone and endpoint objects inside the +> master zone and have them synced to the secondary master. +> The cluster config sync enforces a reload allowing the secondary +> master to connect to the agents as well. + +Edit the `zones.conf` file and ensure that the agent zone/endpoint objects +are **not** specified in there. + +Then navigate into `/etc/icinga2/zones.d/master` and create a new file `agents.conf`. + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim agents.conf + +//----------------------------------------------- +// Endpoints + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" // The master actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +object Endpoint "icinga2-agent2.localdomain" { + host = "192.168.56.112" // The master actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +//----------------------------------------------- +// Zones + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "master" +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "master" +} +``` + +Whenever you need to add an agent again, edit the mentioned files. + +Next, create the corresponding host objects for the agents. Use the same names +for host and endpoint objects. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + vars.agent_endpoint = name //follows the convention that host name == endpoint name +} + +object Host "icinga2-agent2.localdomain" { + check_command = "hostalive" + address = "192.168.56.112" + vars.agent_endpoint = name //follows the convention that host name == endpoint name +} +``` + +Add services using command endpoint checks: + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "ping4" { + check_command = "ping4" + + // Check is executed on the master node + assign where host.address +} + +apply Service "disk" { + check_command = "disk" + + // Check is executed on the remote command endpoint + command_endpoint = host.vars.agent_endpoint + + assign where host.vars.agent_endpoint +} +``` + +Validate the configuration and restart Icinga 2 on the master node `icinga2-master1.localdomain`. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Open Icinga Web 2 and check the two newly created agent hosts with two new services +-- one executed locally (`ping4`) and one using command endpoint (`disk`). + +> **Tip**: +> +> It's a good idea to add [health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) +to make sure that your cluster notifies you in case of failure. + +In terms of health checks, consider adding the following for this scenario: + +- Master node(s) check the connection to the agents +- Optional: Add dependencies for the agent host to prevent unwanted notifications when agents are unreachable + +Proceed in [this chapter](06-distributed-monitoring.md#distributed-monitoring-health-checks-master-agents). + + + + +### Three Levels with Masters, Satellites and Agents + +This scenario combines everything you've learned so far: High-availability masters, +satellites receiving their configuration from the master zone, and agents checked via command +endpoint from the satellite zones. + +![Icinga 2 Distributed Master and Satellites with Agents](images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png) + +> **Tip**: +> +> It can get complicated, so grab a pen and paper and bring your thoughts to life. +> Play around with a test setup before using it in a production environment! + +There are various reasons why you might want to have satellites in your environment. The following list explains the more common ones. + +* Monitor remote locations. Besides reducing connections and traffic between different locations this setup also helps when the network connection to the remote network is lost. Satellites will keep checking and collecting data on their own and will send their check results when the connection is restored. +* Reduce connections between security zones. Satellites in a different zone (e.g. DMZ) than your masters will help reduce connections through firewalls. +* Offload resource hungry checks to other hosts. In very big setups running lots of plugins on your masters or satellites might have a significant impact on the performance during times of high load. You can introduce another level of satellites just to run these plugins and send their results to the upstream hosts. + +Best practice is to run the database backend on a dedicated server/cluster and +only expose a virtual IP address to Icinga and the IDO feature. By default, only one +endpoint will actively write to the backend then. Typical setups for MySQL clusters +involve Master-Master-Replication (Master-Slave-Replication in both directions) or Galera, +more tips can be found on our [community forums](https://community.icinga.com/). + +Overview: + +* `icinga2-master1.localdomain` is the configuration master master node. +* `icinga2-master2.localdomain` is the secondary master master node without configuration in `zones.d`. +* `icinga2-satellite1.localdomain` and `icinga2-satellite2.localdomain` are satellite nodes in a `master` child zone. They forward CSR signing requests to the master zone. +* `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` are two child nodes as agents. + +Setup requirements: + +* Set up `icinga2-master1.localdomain` as [master](06-distributed-monitoring.md#distributed-monitoring-setup-master). +* Set up `icinga2-master2.localdomain`, `icinga2-satellite1.localdomain` and `icinga2-satellite2.localdomain` as [agents](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite) (we will modify the generated configuration). +* Set up `icinga2-agent1.localdomain` and `icinga2-agent2.localdomain` as [agents](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite). + +When being asked for the parent endpoint providing CSR auto-signing capabilities, +please add one of the satellite nodes. **Note**: This requires Icinga 2 v2.8+ +and the `CA Proxy` on all master, satellite and agent nodes. + +Example for `icinga2-agent1.localdomain`: + +``` +Please specify the parent endpoint(s) (master or satellite) where this node should connect to: +``` + +Parent endpoint is the first satellite `icinga2-satellite1.localdomain`: + +``` +Master/Satellite Common Name (CN from your master/satellite node): icinga2-satellite1.localdomain +Do you want to establish a connection to the parent node from this node? [Y/n]: y + +Please specify the master/satellite connection information: +Master/Satellite endpoint host (IP address or FQDN): 192.168.56.105 +Master/Satellite endpoint port [5665]: 5665 +``` + +Add the second satellite `icinga2-satellite2.localdomain` as parent: + +``` +Add more master/satellite endpoints? [y/N]: y + +Master/Satellite Common Name (CN from your master/satellite node): icinga2-satellite2.localdomain +Do you want to establish a connection to the parent node from this node? [Y/n]: y + +Please specify the master/satellite connection information: +Master/Satellite endpoint host (IP address or FQDN): 192.168.56.106 +Master/Satellite endpoint port [5665]: 5665 + +Add more master/satellite endpoints? [y/N]: n +``` + +The specified parent nodes will forward the CSR signing request to the master instances. + +Proceed with adding the optional client ticket for [CSR auto-signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing): + +``` +Please specify the request ticket generated on your Icinga 2 master (optional). + (Hint: # icinga2 pki ticket --cn 'icinga2-agent1.localdomain'): +4f75d2ecd253575fe9180938ebff7cbca262f96e +``` + +In case you've chosen to use [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) +you can leave the ticket question blank. + +Instead, Icinga 2 tells you to approve the request later on the master node. + +``` +No ticket was specified. Please approve the certificate signing request manually +on the master (see 'icinga2 ca list' and 'icinga2 ca sign --help' for details). +``` + +You can optionally specify a different bind host and/or port. + +``` +Please specify the API bind host/port (optional): +Bind Host []: +Bind Port []: +``` + +The next step asks you to accept configuration (required for [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)) +and commands (required for [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint)). + +``` +Accept config from parent node? [y/N]: y +Accept commands from parent node? [y/N]: y +``` + +Next you can optionally specify the local and parent zone names. This will be reflected +in the generated zone configuration file. + +``` +Local zone name [icinga2-agent1.localdomain]: icinga2-agent1.localdomain +``` + +Set the parent zone name to `satellite` for this agent. + +``` +Parent zone name [master]: satellite +``` + +You can add more global zones in addition to `global-templates` and `director-global` if necessary. +Press `Enter` or choose `n`, if you don't want to add any additional. + +``` +Reconfiguring Icinga... + +Default global zones: global-templates director-global +Do you want to specify additional global zones? [y/N]: N +``` + +Last but not least the wizard asks you whether you want to disable the inclusion of the local configuration +directory in `conf.d`, or not. Defaults to disabled, since agents are checked via command endpoint and the example +configuration would collide with this mode. + +``` +Do you want to disable the inclusion of the conf.d directory [Y/n]: Y +Disabling the inclusion of the conf.d directory... +``` + + +**We'll discuss the details of the required configuration below. Most of this +configuration can be rendered by the setup wizards.** + +The zone hierarchy can look like this. We'll define only the directly connected zones here. + +The master instances should actively connect to the satellite instances, therefore +the configuration on `icinga2-master1.localdomain` and `icinga2-master2.localdomain` +must include the `host` attribute for the satellite endpoints: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // That's us +} + +object Endpoint "icinga2-master2.localdomain" { + host = "192.168.56.102" // Actively connect to the second master. +} + +object Endpoint "icinga2-satellite1.localdomain" { + host = "192.168.56.105" // Actively connect to the satellites. +} + +object Endpoint "icinga2-satellite2.localdomain" { + host = "192.168.56.106" // Actively connect to the satellites. +} + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} +``` + +The endpoint configuration on the secondary master looks similar, +but changes the connection attributes - the first master already +tries to connect, there is no need for a secondary attempt. + +``` +[root@icinga2-master2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // First master already connects to us +} + +object Endpoint "icinga2-master2.localdomain" { + // That's us +} + +object Endpoint "icinga2-satellite1.localdomain" { + host = "192.168.56.105" // Actively connect to the satellites. +} + +object Endpoint "icinga2-satellite2.localdomain" { + host = "192.168.56.106" // Actively connect to the satellites. +} +``` + +The zone configuration on both masters looks the same. Add this +to the corresponding `zones.conf` entries for the endpoints. + +``` +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite2.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} + +object Zone "director-global" { + global = true +} + +``` + +In contrast to that, the satellite instances `icinga2-satellite1.localdomain` +and `icinga2-satellite2.localdomain` should not actively connect to the master +instances. + +``` +[root@icinga2-satellite1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // This endpoint will connect to us +} + +object Endpoint "icinga2-master2.localdomain" { + // This endpoint will connect to us +} + +object Endpoint "icinga2-satellite1.localdomain" { + // That's us +} + +object Endpoint "icinga2-satellite2.localdomain" { + host = "192.168.56.106" // Actively connect to the secondary satellite +} +``` + +Again, only one side is required to establish the connection inside the HA zone. +Since satellite1 already connects to satellite2, leave out the `host` attribute +for `icinga2-satellite1.localdomain` on satellite2. + +``` +[root@icinga2-satellite2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { + // This endpoint will connect to us +} + +object Endpoint "icinga2-master2.localdomain" { + // This endpoint will connect to us +} + +object Endpoint "icinga2-satellite1.localdomain" { + // First satellite already connects to us +} + +object Endpoint "icinga2-satellite2.localdomain" { + // That's us +} +``` + +The zone configuration on both satellites looks the same. Add this +to the corresponding `zones.conf` entries for the endpoints. + +``` +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite2.localdomain" ] + + parent = "master" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} + +object Zone "director-global" { + global = true +} +``` + +Keep in mind to control the endpoint [connection direction](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-connection-direction) +using the `host` attribute, also for other endpoints in the same zone. + +Since we want to use [top down command endpoint](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) checks, +we must configure the agent endpoint and zone objects. + +In order to minimize the effort, we'll sync the agent zone and endpoint configuration to the +satellites where the connection information is needed as well. Note: This only works with satellite +and agents, since there already is a trust relationship between the master and the satellite zone. +The cluster config sync to the satellite invokes an automated reload causing the agent connection attempts. + +`icinga2-master1.localdomain` is the configuration master where everything is stored: + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/{master,satellite,global-templates} +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/satellite + +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim icinga2-agent1.localdomain.conf + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" // The satellite actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "satellite" +} + +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim icinga2-agent2.localdomain.conf + +object Endpoint "icinga2-agent2.localdomain" { + host = "192.168.56.112" // The satellite actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "satellite" +} +``` + +The two agent nodes do not need to know about each other. The only important thing +is that they know about the parent zone (the satellite) and their endpoint members (and optionally the global zone). + +> **Tipp** +> +> In the example above we've specified the `host` attribute in the agent endpoint configuration. In this mode, +> the satellites actively connect to the agents. This costs some resources on the satellite -- if you prefer to +> offload the connection attempts to the agent, or your DMZ requires this, you can also change the **[connection direction](06-distributed-monitoring.md#distributed-monitoring-advanced-hints-connection-direction).** +> +> 1) Don't set the `host` attribute for the agent endpoints put into `zones.d/satellite`. +> 2) Modify each agent's zones.conf file and add the `host` attribute to all parent satellites. You can automate this with using the `node wizard/setup` CLI commands. + +The agents are waiting for the satellites to connect, therefore they don't specify +the `host` attribute in the endpoint objects locally. + +Example for `icinga2-agent1.localdomain`: + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-satellite1.localdomain" { + // Do not actively connect to the satellite by leaving out the 'host' attribute +} + +object Endpoint "icinga2-satellite2.localdomain" { + // Do not actively connect to the satellite by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent1.localdomain" { + // That's us +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite2.localdomain" ] +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + + parent = "satellite" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} + +object Zone "director-global" { + global = true +} +``` + +Example for `icinga2-agent2.localdomain`: + +``` +[root@icinga2-agent2.localdomain /]# vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-satellite1.localdomain" { + // Do not actively connect to the satellite by leaving out the 'host' attribute +} + +object Endpoint "icinga2-satellite2.localdomain" { + // Do not actively connect to the satellite by leaving out the 'host' attribute +} + +object Endpoint "icinga2-agent2.localdomain" { + // That's us +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite2.localdomain" ] +} + +object Zone "icinga2-agent2.localdomain" { + endpoints = [ "icinga2-agent2.localdomain" ] + + parent = "satellite" +} + +/* sync global commands */ +object Zone "global-templates" { + global = true +} + +object Zone "director-global" { + global = true +} +``` + +Now it is time to define the two agents hosts on the master, sync them to the satellites +and apply service checks using the command endpoint execution method to them. +Add the two agent nodes as host objects to the `satellite` zone. + +We've already created the directories in `/etc/icinga2/zones.d` including the files for the +zone and endpoint configuration for the agents. + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/satellite +``` + +Add the host object configuration for the `icinga2-agent1.localdomain` agent. You should +have created the configuration file in the previous steps and it should contain the endpoint +and zone object configuration already. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim icinga2-agent1.localdomain.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + + vars.agent_endpoint = name // Follows the convention that host name == endpoint name +} +``` + +Add the host object configuration for the `icinga2-agent2.localdomain` agent configuration file: + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim icinga2-agent2.localdomain.conf + +object Host "icinga2-agent2.localdomain" { + check_command = "hostalive" + address = "192.168.56.112" + + vars.agent_endpoint = name // Follows the convention that host name == endpoint name +} +``` + +Add a service object which is executed on the satellite nodes (e.g. `ping4`). Pin the apply rule to the `satellite` zone only. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim services.conf + +apply Service "ping4" { + check_command = "ping4" + + // Check is executed on the satellite node + assign where host.zone == "satellite" && host.address +} +``` + +Add services using command endpoint checks. Pin the apply rules to the `satellite` zone only. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim services.conf + +apply Service "disk" { + check_command = "disk" + + // Execute the check on the remote command endpoint + command_endpoint = host.vars.agent_endpoint + + assign where host.zone == "satellite" && host.vars.agent_endpoint +} +``` + +Validate the configuration and restart Icinga 2 on the master node `icinga2-master1.localdomain`. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Open Icinga Web 2 and check the two newly created agent hosts with two new services +-- one executed locally (`ping4`) and one using command endpoint (`disk`). + +> **Tip**: +> +> It's a good idea to add [health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) +to make sure that your cluster notifies you in case of failure. + +In terms of health checks, consider adding the following for this scenario: + +- Master nodes check whether the satellite zone is connected +- Satellite nodes check the connection to the agents +- Optional: Add dependencies for the agent host to prevent unwanted notifications when agents are unreachable + +Proceed in [this chapter](06-distributed-monitoring.md#distributed-monitoring-health-checks-master-satellite-agent). + + +## Best Practice + +We've put together a collection of configuration examples from community feedback. +If you like to share your tips and tricks with us, please join the [community channels](https://icinga.com/community/)! + +### Global Zone for Config Sync + +Global zones can be used to sync generic configuration objects +to all nodes depending on them. Common examples are: + +* Templates which are imported into zone specific objects. +* Command objects referenced by Host, Service, Notification objects. +* Apply rules for services, notifications and dependencies. +* User objects referenced in notifications. +* Group objects. +* TimePeriod objects. + +Plugin scripts and binaries must not be synced, this is for Icinga 2 +configuration files only. Use your preferred package repository +and/or configuration management tool (Puppet, Ansible, Chef, etc.) +for keeping packages and scripts uptodate. + +**Note**: Checkable objects (hosts and services) cannot be put into a global +zone. The configuration validation will terminate with an error. Apply rules +work as they are evaluated locally on each endpoint. + +The zone object configuration must be deployed on all nodes which should receive +the global configuration files: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +object Zone "global-commands" { + global = true +} +``` + +The default global zones generated by the setup wizards are called `global-templates` and `director-global`. + +While you can and should use `global-templates` for your global configuration, `director-global` is reserved for use +by [Icinga Director](https://icinga.com/docs/director/latest/). Please don't +place any configuration in it manually. + +Similar to the zone configuration sync you'll need to create a new directory in +`/etc/icinga2/zones.d`: + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/global-commands +``` + +Next, add a new check command, for example: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/global-commands/web.conf + +object CheckCommand "webinject" { + //... +} +``` + +Restart the endpoints(s) which should receive the global zone before +before restarting the parent master/satellite nodes. + +Then validate the configuration on the master node and restart Icinga 2. + +**Tip**: You can copy the example configuration files located in `/etc/icinga2/conf.d` +into the default global zone `global-templates`. + +Example: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/conf.d +[root@icinga2-master1.localdomain /etc/icinga2/conf.d]# cp {commands,groups,notifications,services,templates,timeperiods,users}.conf /etc/icinga2/zones.d/global-templates +``` + +### Health Checks + +In case of network failures or other problems, your monitoring might +either have late check results or just send out mass alarms for unknown +checks. + +In order to minimize the problems caused by this, you should configure +additional health checks. + +#### cluster-zone with Masters and Agents + +The `cluster-zone` check will test whether the configured target zone is currently +connected or not. This example adds a health check for the [ha master with agents scenario](06-distributed-monitoring.md#distributed-monitoring-scenarios-ha-master-agents). + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/services.conf + +apply Service "agent-health" { + check_command = "cluster-zone" + + display_name = "cluster-health-" + host.name + + /* This follows the convention that the agent zone name is the FQDN which is the same as the host object name. */ + vars.cluster_zone = host.name + + assign where host.vars.agent_endpoint +} +``` + +In order to prevent unwanted notifications, add a service dependency which gets applied to +all services using the command endpoint mode. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/dependencies.conf + +apply Dependency "agent-health-check" to Service { + parent_service_name = "agent-health" + + states = [ OK ] // Fail if the parent service state switches to NOT-OK + disable_notifications = true + + assign where host.vars.agent_endpoint // Automatically assigns all agent endpoint checks as child services on the matched host + ignore where service.name == "agent-health" // Avoid a self reference from child to parent +} +``` + +#### cluster-zone with Masters, Satellites and Agents + +This example adds health checks for the [master, satellites and agents scenario](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents). + +Whenever the connection between the master and satellite zone breaks, +you may encounter late check results in Icinga Web. In order to view +this failure and also send notifications, add the following configuration: + +First, add the two masters as host objects to the master zone, if not already +existing. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/hosts.conf + +object Host "icinga2-master1.localdomain" { + check_command = "hostalive" + + address = "192.168.56.101" +} + +object Host "icinga2-master2.localdomain" { + check_command = "hostalive" + + address = "192.168.56.102" +} +``` + +Add service health checks against the satellite zone. + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/health.conf + +apply Service "satellite-zone-health" { + check_command = "cluster-zone" + check_interval = 30s + retry_interval = 10s + + vars.cluster_zone = "satellite" + + assign where match("icinga2-master*.localdomain", host.name) +} +``` + +**Don't forget to create notification apply rules for these services.** + +Next are health checks for agents connected to the satellite zone. +Navigate into the satellite directory in `zones.d`: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/satellite +``` + +You should already have configured agent host objects following [the master, satellite, agents scenario](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents). +Add a new configuration file where all the health checks are defined. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim health.conf + +apply Service "agent-health" { + check_command = "cluster-zone" + + display_name = "agent-health-" + host.name + + // This follows the convention that the agent zone name is the FQDN which is the same as the host object name. + vars.cluster_zone = host.name + + // Create this health check for agent hosts in the satellite zone + assign where host.zone == "satellite" && host.vars.agent_endpoint +} +``` + +In order to prevent unwanted notifications, add a service dependency which gets applied to +all services using the command endpoint mode. + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/satellite]# vim health.conf + +apply Dependency "agent-health-check" to Service { + parent_service_name = "agent-health" + + states = [ OK ] // Fail if the parent service state switches to NOT-OK + disable_notifications = true + + assign where host.zone == "satellite" && host.vars.agent_endpoint // Automatically assigns all agent endpoint checks as child services on the matched host + ignore where service.name == "agent-health" // Avoid a self reference from child to parent +} +``` + +This is all done on the configuration master, and requires the scenario to be fully up and running. + +#### Cluster Check + +The `cluster` check will check if all endpoints in the current zone and the directly +connected zones are working properly. The disadvantage of using this check is that +you cannot monitor 3 or more cluster levels with it. + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/icinga2-master1.localdomain.conf + +object Host "icinga2-master1.localdomain" { + check_command = "hostalive" + address = "192.168.56.101" +} + +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/cluster.conf + +object Service "cluster" { + check_command = "cluster" + check_interval = 5s + retry_interval = 1s + + host_name = "icinga2-master1.localdomain" +} +``` + +### Pin Checks in a Zone + +In case you want to pin specific checks to their endpoints in a given zone you'll need to use +the `command_endpoint` attribute. This is reasonable if you want to +execute a local disk check in the `master` Zone on a specific endpoint then. + +``` +[root@icinga2-master1.localdomain /]# mkdir -p /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/icinga2-master1.localdomain.conf + +object Host "icinga2-master1.localdomain" { + check_command = "hostalive" + address = "192.168.56.101" +} + +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.d/master/services.conf + +apply Service "disk" { + check_command = "disk" + + command_endpoint = host.name //requires a host object matching the endpoint object name e.g. icinga2-master1.localdomain + + assign where host.zone == "master" && match("icinga2-master*", host.name) +} +``` + +The `host.zone` attribute check inside the expression ensures that +the service object is only created for host objects inside the `master` +zone. In addition to that the [match](18-library-reference.md#global-functions-match) +function ensures to only create services for the master nodes. + +### Windows Firewall + +#### ICMP Requests + +By default ICMP requests are disabled in the Windows firewall. You can +change that by [adding a new rule](https://support.microsoft.com/en-us/kb/947709). + +``` +C:\> netsh advfirewall firewall add rule name="ICMP Allow incoming V4 echo request" protocol=icmpv4:8,any dir=in action=allow +``` + +#### Icinga 2 + +If your master/satellite nodes should actively connect to the Windows agent +you'll also need to ensure that port `5665` is enabled. + +``` +C:\> netsh advfirewall firewall add rule name="Open port 5665 (Icinga 2)" dir=in action=allow protocol=TCP localport=5665 +``` + +#### NSClient++ API + +If the [check_nscp_api](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) +plugin is used to query NSClient++, you need to ensure that its port is enabled. + +``` +C:\> netsh advfirewall firewall add rule name="Open port 8443 (NSClient++ API)" dir=in action=allow protocol=TCP localport=8443 +``` + +For security reasons, it is advised to enable the NSClient++ HTTP API for local +connection from the Icinga agent only. Remote connections to the HTTP API +are not recommended with using the legacy HTTP API. + +### Windows Agent and Plugins + +The Icinga 2 package on Windows already provides several plugins. +Detailed [documentation](10-icinga-template-library.md#windows-plugins) is available for all check command definitions. + +Based on the [master with agents](06-distributed-monitoring.md#distributed-monitoring-master-agents) +scenario we'll now add a local disk check. + +First, add the agent node as host object: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent2.localdomain" { + check_command = "hostalive" + address = "192.168.56.112" + vars.agent_endpoint = name //follows the convention that host name == endpoint name + vars.os_type = "windows" +} +``` + +Next, add the disk check using command endpoint checks (details in the +[disk-windows](10-icinga-template-library.md#windows-plugins-disk-windows) documentation): + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "disk C:" { + check_command = "disk-windows" + + vars.disk_win_path = "C:" + + //specify where the check is executed + command_endpoint = host.vars.agent_endpoint + + assign where host.vars.os_type == "windows" && host.vars.agent_endpoint +} +``` + +Validate the configuration and restart Icinga 2. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Open Icinga Web 2 and check your newly added Windows disk check :) + +![Icinga Windows Agent](images/distributed-monitoring/icinga2_distributed_windows_client_disk_icingaweb2.png) + +If you want to add your own plugins please check [this chapter](05-service-monitoring.md#service-monitoring-requirements) +for the requirements. + +### Windows Agent and NSClient++ + +There are two methods available for querying NSClient++: + +* Query the [HTTP API](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) locally from an Icinga agent (requires a running NSClient++ service) +* Run a [local CLI check](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-local) (does not require NSClient++ as a service) + +Both methods have their advantages and disadvantages. One thing to +note: If you rely on performance counter delta calculations such as +CPU utilization, please use the HTTP API instead of the CLI sample call. + +#### NSCLient++ with check_nscp_api + +The [Windows setup](06-distributed-monitoring.md#distributed-monitoring-setup-agent-windows) already allows +you to install the NSClient++ package. In addition to the Windows plugins you can +use the [nscp_api command](10-icinga-template-library.md#nscp-check-api) provided by the Icinga Template Library (ITL). + +The initial setup for the NSClient++ API and the required arguments +is the described in the ITL chapter for the [nscp_api](10-icinga-template-library.md#nscp-check-api) CheckCommand. + +Based on the [master with agents](06-distributed-monitoring.md#distributed-monitoring-master-agents) +scenario we'll now add a local nscp check which queries the NSClient++ API to check the free disk space. + +Define a host object called `icinga2-agent2.localdomain` on the master. Add the `nscp_api_password` +custom variable and specify the drives to check. + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name + vars.os_type = "Windows" + vars.nscp_api_password = "icinga" + vars.drives = [ "C:", "D:" ] +} +``` + +The service checks are generated using an [apply for](03-monitoring-basics.md#using-apply-for) +rule based on `host.vars.drives`: + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "nscp-api-" for (drive in host.vars.drives) { + import "generic-service" + + check_command = "nscp_api" + command_endpoint = host.vars.agent_endpoint + + //display_name = "nscp-drive-" + drive + + vars.nscp_api_host = "localhost" + vars.nscp_api_query = "check_drivesize" + vars.nscp_api_password = host.vars.nscp_api_password + vars.nscp_api_arguments = [ "drive=" + drive ] + + ignore where host.vars.os_type != "Windows" +} +``` + +Validate the configuration and restart Icinga 2. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Two new services ("nscp-drive-D:" and "nscp-drive-C:") will be visible in Icinga Web 2. + +![Icinga 2 Distributed Monitoring Windows Agent with NSClient++ nscp-api](images/distributed-monitoring/icinga2_distributed_windows_nscp_api_drivesize_icingaweb2.png) + +Note: You can also omit the `command_endpoint` configuration to execute +the command on the master. This also requires a different value for `nscp_api_host` +which defaults to `host.address`. + +``` + //command_endpoint = host.vars.agent_endpoint + + //vars.nscp_api_host = "localhost" +``` + +You can verify the check execution by looking at the `Check Source` attribute +in Icinga Web 2 or the REST API. + +If you want to monitor specific Windows services, you could use the following example: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name + vars.os_type = "Windows" + vars.nscp_api_password = "icinga" + vars.services = [ "Windows Update", "wscsvc" ] +} + +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "nscp-api-" for (svc in host.vars.services) { + import "generic-service" + + check_command = "nscp_api" + command_endpoint = host.vars.agent_endpoint + + //display_name = "nscp-service-" + svc + + vars.nscp_api_host = "localhost" + vars.nscp_api_query = "check_service" + vars.nscp_api_password = host.vars.nscp_api_password + vars.nscp_api_arguments = [ "service=" + svc ] + + ignore where host.vars.os_type != "Windows" +} +``` + +#### NSCLient++ with nscp-local + +The [Windows setup](06-distributed-monitoring.md#distributed-monitoring-setup-agent-windows) allows +you to install the bundled NSClient++ package. In addition to the Windows plugins you can +use the [nscp-local commands](10-icinga-template-library.md#nscp-plugin-check-commands) +provided by the Icinga Template Library (ITL). + +Add the following `include` statement on all your nodes (master, satellite, agent): + +``` +vim /etc/icinga2/icinga2.conf + +include +``` + +The CheckCommand definitions will automatically determine the installed path +to the `nscp.exe` binary. + +Based on the [master with agents](06-distributed-monitoring.md#distributed-monitoring-master-agents) +scenario we'll now add a local nscp check querying a given performance counter. + +First, add the agent node as host object: + +``` +[root@icinga2-master1.localdomain /]# cd /etc/icinga2/zones.d/master +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim hosts.conf + +object Host "icinga2-agent1.localdomain" { + check_command = "hostalive" + address = "192.168.56.111" + + vars.agent_endpoint = name //follows the convention that host name == endpoint name + vars.os_type = "windows" +} +``` + +Next, add a performance counter check using command endpoint checks (details in the +[nscp-local-counter](10-icinga-template-library.md#nscp-check-local-counter) documentation): + +``` +[root@icinga2-master1.localdomain /etc/icinga2/zones.d/master]# vim services.conf + +apply Service "nscp-local-counter-cpu" { + check_command = "nscp-local-counter" + command_endpoint = host.vars.agent_endpoint + + vars.nscp_counter_name = "\\Processor(_total)\\% Processor Time" + vars.nscp_counter_perfsyntax = "Total Processor Time" + vars.nscp_counter_warning = 1 + vars.nscp_counter_critical = 5 + + vars.nscp_counter_showall = true + + assign where host.vars.os_type == "windows" && host.vars.agent_endpoint +} +``` + +Validate the configuration and restart Icinga 2. + +``` +[root@icinga2-master1.localdomain /]# icinga2 daemon -C +[root@icinga2-master1.localdomain /]# systemctl restart icinga2 +``` + +Open Icinga Web 2 and check your newly added Windows NSClient++ check :) + +![Icinga 2 Distributed Monitoring Windows Agent with NSClient++ nscp-local](images/distributed-monitoring/icinga2_distributed_windows_nscp_counter_icingaweb2.png) + +> **Tip** +> +> In order to measure CPU load, you'll need a running NSClient++ service. +> Therefore it is advised to use a local [nscp-api](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) +> check against its REST API. + +## Advanced Hints + +You can find additional hints in this section if you prefer to go your own route +with automating setups (setup, certificates, configuration). + +### Certificate Auto-Renewal + +Icinga 2 v2.8+ added the possibility that nodes request certificate updates +on their own. If their expiration date is soon enough, they automatically +renew their already signed certificate by sending a signing request to the +parent node. You'll also see a message in the logs if certificate renewal +isn't necessary. + +### High-Availability for Icinga 2 Features + +All nodes in the same zone require that you enable the same features for high-availability (HA). + +By default, the following features provide advanced HA functionality: + +* [Checks](06-distributed-monitoring.md#distributed-monitoring-high-availability-checks) (load balanced, automated failover). +* [Notifications](06-distributed-monitoring.md#distributed-monitoring-high-availability-notifications) (load balanced, automated failover). +* [DB IDO](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido) (Run-Once, automated failover). +* [Elasticsearch](09-object-types.md#objecttype-elasticsearchwriter) +* [Gelf](09-object-types.md#objecttype-gelfwriter) +* [Graphite](09-object-types.md#objecttype-graphitewriter) +* [InfluxDB](09-object-types.md#objecttype-influxdb2writer) (v1 and v2) +* [OpenTsdb](09-object-types.md#objecttype-opentsdbwriter) +* [Perfdata](09-object-types.md#objecttype-perfdatawriter) (for PNP) + +#### High-Availability with Checks + +All instances within the same zone (e.g. the `master` zone as HA cluster) must +have the `checker` feature enabled. + +Example: + +```bash +icinga2 feature enable checker +``` + +All nodes in the same zone load-balance the check execution. If one instance shuts down, +the other nodes will automatically take over the remaining checks. + +#### High-Availability with Notifications + +All instances within the same zone (e.g. the `master` zone as HA cluster) must +have the `notification` feature enabled. + +Example: + +```bash +icinga2 feature enable notification +``` + +Notifications are load-balanced amongst all nodes in a zone. By default this functionality +is enabled. +If your nodes should send out notifications independently from any other nodes (this will cause +duplicated notifications if not properly handled!), you can set `enable_ha = false` +in the [NotificationComponent](09-object-types.md#objecttype-notificationcomponent) feature. + +#### High-Availability with DB IDO + +All instances within the same zone (e.g. the `master` zone as HA cluster) must +have the DB IDO feature enabled. + +Example DB IDO MySQL: + +```bash +icinga2 feature enable ido-mysql +``` + +By default the DB IDO feature only runs on one node. All other nodes in the same zone disable +the active IDO database connection at runtime. The node with the active DB IDO connection is +not necessarily the zone master. + +**Note**: The DB IDO HA feature can be disabled by setting the `enable_ha` attribute to `false` +for the [IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection) or +[IdoPgsqlConnection](09-object-types.md#objecttype-idopgsqlconnection) object on **all** nodes in the +**same** zone. + +All endpoints will enable the DB IDO feature and connect to the configured +database and dump configuration, status and historical data on their own. + +If the instance with the active DB IDO connection dies, the HA functionality will +automatically elect a new DB IDO master. + +The DB IDO feature will try to determine which cluster endpoint is currently writing +to the database and bail out if another endpoint is active. You can manually verify that +by running the following query command: + +``` +icinga=> SELECT status_update_time, endpoint_name FROM icinga_programstatus; + status_update_time | endpoint_name +------------------------+--------------- + 2016-08-15 15:52:26+02 | icinga2-master1.localdomain +(1 Zeile) +``` + +This is useful when the cluster connection between endpoints breaks, and prevents +data duplication in split-brain-scenarios. The failover timeout can be set for the +`failover_timeout` attribute, but not lower than 60 seconds. + +### Endpoint Connection Direction + +Endpoints attempt to connect to another endpoint when its local [Endpoint](09-object-types.md#objecttype-endpoint) object +configuration specifies a valid `host` attribute (FQDN or IP address). + +Example for the master node `icinga2-master1.localdomain` actively connecting +to the agent node `icinga2-agent1.localdomain`: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +//... + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" // The master actively tries to connect to the agent + log_duration = 0 // Disable the replay log for command endpoint agents +} +``` + +Example for the agent node `icinga2-agent1.localdomain` not actively +connecting to the master node `icinga2-master1.localdomain`: + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +//... + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute + log_duration = 0 // Disable the replay log for command endpoint agents +} +``` + +It is not necessary that both the master and the agent node establish +two connections to each other. Icinga 2 will only use one connection +and close the second connection if established. This generates useless +CPU cycles and leads to blocking resources when the connection times out. + +**Tip**: Choose either to let master/satellite nodes connect to agent nodes +or vice versa. + + +### Disable Log Duration for Command Endpoints + +The replay log is a built-in mechanism to ensure that nodes in a distributed setup +keep the same history (check results, notifications, etc.) when nodes are temporarily +disconnected and then reconnect. + +This functionality is not needed when a master/satellite node is sending check +execution events to an agent which is configured as [command endpoint](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +for check execution. + +The [Endpoint](09-object-types.md#objecttype-endpoint) object attribute `log_duration` can +be lower or set to 0 to fully disable any log replay updates when the +agent is not connected. + +Configuration on the master node `icinga2-master1.localdomain`: + +``` +[root@icinga2-master1.localdomain /]# vim /etc/icinga2/zones.conf + +//... + +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" // The master actively tries to connect to the agent + log_duration = 0 +} + +object Endpoint "icinga2-agent2.localdomain" { + host = "192.168.56.112" // The master actively tries to connect to the agent + log_duration = 0 +} +``` + +Configuration on the agent `icinga2-agent1.localdomain`: + +``` +[root@icinga2-agent1.localdomain /]# vim /etc/icinga2/zones.conf + +//... + +object Endpoint "icinga2-master1.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute + log_duration = 0 +} + +object Endpoint "icinga2-master2.localdomain" { + // Do not actively connect to the master by leaving out the 'host' attribute + log_duration = 0 +} +``` + +### Initial Sync for new Endpoints in a Zone + +> **Note** +> +> This is required if you decide to change an already running single endpoint production +> environment into a HA-enabled cluster zone with two endpoints. +> The [initial setup](06-distributed-monitoring.md#distributed-monitoring-scenarios-ha-master-clients) +> with 2 HA masters doesn't require this step. + +In order to make sure that all of your zone endpoints have the same state you need +to pick the authoritative running one and copy the following content: + +* State file from `/var/lib/icinga2/icinga2.state` +* Internal config package for runtime created objects (downtimes, comments, hosts, etc.) at `/var/lib/icinga2/api/packages/_api` + +If you need already deployed config packages from the Director, or synced cluster zones, +you can also sync the entire `/var/lib/icinga2/api/packages` directory. This directory should also be +included in your backup strategy. + +Do **not** sync `/var/lib/icinga2/api/zones*` manually - this is an internal directory +and handled by the Icinga cluster config sync itself. + +> **Note** +> +> Ensure that all endpoints are shut down during this procedure. Once you have +> synced the cached files, proceed with configuring the remaining endpoints +> to let them know about the new master/satellite node (zones.conf). + +### Manual Certificate Creation + +#### Create CA on the Master + +Choose the host which should store the certificate authority (one of the master nodes). + +The first step is the creation of the certificate authority (CA) by running the following command +as root user: + +``` +[root@icinga2-master1.localdomain /root]# icinga2 pki new-ca +``` + +#### Create CSR and Certificate + +Create a certificate signing request (CSR) for the local instance: + +``` +[root@icinga2-master1.localdomain /root]# icinga2 pki new-cert --cn icinga2-master1.localdomain \ + --key icinga2-master1.localdomain.key \ + --csr icinga2-master1.localdomain.csr +``` + +Sign the CSR with the previously created CA: + +``` +[root@icinga2-master1.localdomain /root]# icinga2 pki sign-csr --csr icinga2-master1.localdomain.csr --cert icinga2-master1.localdomain +``` + +Repeat the steps for all instances in your setup. + +#### Copy Certificates + +Copy the host's certificate files and the public CA certificate to `/var/lib/icinga2/certs`: + +``` +[root@icinga2-master1.localdomain /root]# mkdir -p /var/lib/icinga2/certs +[root@icinga2-master1.localdomain /root]# cp icinga2-master1.localdomain.{crt,key} /var/lib/icinga2/certs +[root@icinga2-master1.localdomain /root]# cp /var/lib/icinga2/ca/ca.crt /var/lib/icinga2/certs +``` + +Ensure that proper permissions are set (replace `icinga` with the Icinga 2 daemon user): + +``` +[root@icinga2-master1.localdomain /root]# chown -R icinga:icinga /var/lib/icinga2/certs +[root@icinga2-master1.localdomain /root]# chmod 600 /var/lib/icinga2/certs/*.key +[root@icinga2-master1.localdomain /root]# chmod 644 /var/lib/icinga2/certs/*.crt +``` + +The CA public and private key are stored in the `/var/lib/icinga2/ca` directory. Keep this path secure and include +it in your backups. + +#### Create Multiple Certificates + +Use your preferred method to automate the certificate generation process. + +``` +[root@icinga2-master1.localdomain /var/lib/icinga2/certs]# for node in icinga2-master1.localdomain icinga2-master2.localdomain icinga2-satellite1.localdomain; do icinga2 pki new-cert --cn $node --csr $node.csr --key $node.key; done +information/base: Writing private key to 'icinga2-master1.localdomain.key'. +information/base: Writing certificate signing request to 'icinga2-master1.localdomain.csr'. +information/base: Writing private key to 'icinga2-master2.localdomain.key'. +information/base: Writing certificate signing request to 'icinga2-master2.localdomain.csr'. +information/base: Writing private key to 'icinga2-satellite1.localdomain.key'. +information/base: Writing certificate signing request to 'icinga2-satellite1.localdomain.csr'. + +[root@icinga2-master1.localdomain /var/lib/icinga2/certs]# for node in icinga2-master1.localdomain icinga2-master2.localdomain icinga2-satellite1.localdomain; do sudo icinga2 pki sign-csr --csr $node.csr --cert $node.crt; done +information/pki: Writing certificate to file 'icinga2-master1.localdomain.crt'. +information/pki: Writing certificate to file 'icinga2-master2.localdomain.crt'. +information/pki: Writing certificate to file 'icinga2-satellite1.localdomain.crt'. +``` + +Copy and move these certificates to the respective instances e.g. with SSH/SCP. + +## Automation + +These hints should get you started with your own automation tools (Puppet, Ansible, Chef, Salt, etc.) +or custom scripts for automated setup. + +These are collected best practices from various community channels. + +* [Silent Windows setup](06-distributed-monitoring.md#distributed-monitoring-automation-windows-silent) +* [Node Setup CLI command](06-distributed-monitoring.md#distributed-monitoring-automation-cli-node-setup) with parameters + +If you prefer an alternate method, we still recommend leaving all the Icinga 2 features intact (e.g. `icinga2 feature enable api`). +You should also use well known and documented default configuration file locations (e.g. `zones.conf`). +This will tremendously help when someone is trying to help in the [community channels](https://icinga.com/community/). + + +### Silent Windows Setup + +If you want to install the agent silently/unattended, use the `/qn` modifier. The +installation should not trigger a restart, but if you want to be completely sure, you can use the `/norestart` modifier. + +``` +C:> msiexec /i C:\Icinga2-v2.5.0-x86.msi /qn /norestart +``` + +Once the setup is completed you can use the `node setup` cli command too. + +### Node Setup using CLI Parameters + +Instead of using the `node wizard` CLI command, there is an alternative `node setup` +command available which has some prerequisites. + +**Note**: The CLI command can be used on Linux/Unix and Windows operating systems. +The graphical Windows setup wizard actively uses these CLI commands. + +#### Node Setup on the Master Node + +In case you want to setup a master node you must add the `--master` parameter +to the `node setup` CLI command. In addition to that the `--cn` can optionally +be passed (defaults to the FQDN). + + Parameter | Description + --------------------|-------------------- + `--cn` | **Optional.** Common name (CN). By convention this should be the host's FQDN. Defaults to the FQDN. + `--zone` | **Optional.** Zone name. Defaults to `master`. + `--listen` | **Optional.** Address to listen on. Syntax is `host,port`. + `--disable-confd` | **Optional.** If provided, this disables the `include_recursive "conf.d"` directive and adds the `api-users.conf` file inclusion to `icinga2.conf`. Available since v2.9+. Not set by default for compatibility reasons with Puppet, Ansible, Chef, etc. + +Example: + +``` +[root@icinga2-master1.localdomain /]# icinga2 node setup --master +``` + +In case you want to bind the `ApiListener` object to a specific +host/port you can specify it like this: + +``` +--listen 192.68.56.101,5665 +``` + +In case you don't need anything in `conf.d`, use the following command line: + +``` +[root@icinga2-master1.localdomain /]# icinga2 node setup --master --disable-confd +``` + + + + +#### Node Setup with Agents/Satellites + +##### Preparations + +Make sure that the `/var/lib/icinga2/certs` directory exists and is owned by the `icinga` +user (or the user Icinga 2 is running as). + +``` +[root@icinga2-agent1.localdomain /]# mkdir -p /var/lib/icinga2/certs +[root@icinga2-agent1.localdomain /]# chown -R icinga:icinga /var/lib/icinga2/certs +``` + +First you'll need to generate a new local self-signed certificate. +Pass the following details to the `pki new-cert` CLI command: + + Parameter | Description + --------------------|-------------------- + `--cn` | **Required.** Common name (CN). By convention this should be the host's FQDN. + `--key`, `--file` | **Required.** Client certificate files. These generated files will be put into the specified location. By convention this should be using `/var/lib/icinga2/certs` as directory. + +Example: + +``` +[root@icinga2-agent1.localdomain /]# icinga2 pki new-cert --cn icinga2-agent1.localdomain \ +--key /var/lib/icinga2/certs/icinga2-agent1.localdomain.key \ +--cert /var/lib/icinga2/certs/icinga2-agent1.localdomain.crt +``` + +##### Verify Parent Connection + +In order to verify the parent connection and avoid man-in-the-middle attacks, +fetch the parent instance's certificate and verify that it matches the connection. +The `trusted-parent.crt` file is a temporary file passed to `node setup` in the +next step and does not need to be stored for later usage. + +Pass the following details to the `pki save-cert` CLI command: + + Parameter | Description + --------------------|-------------------- + `--trustedcert` | **Required.** Store the parent's certificate file. Manually verify that you're trusting it. + `--host` | **Required.** FQDN or IP address of the parent host. + +Request the master certificate from the master host (`icinga2-master1.localdomain`) +and store it as `trusted-parent.crt`. Review it and continue. + +``` +[root@icinga2-agent1.localdomain /]# icinga2 pki save-cert \ +--trustedcert /var/lib/icinga2/certs/trusted-parent.crt \ +--host icinga2-master1.localdomain + +information/cli: Retrieving TLS certificate for 'icinga2-master1.localdomain:5665'. + + Subject: CN = icinga2-master1.localdomain + Issuer: CN = icinga2-master1.localdomain + Valid From: Feb 4 08:59:05 2020 GMT + Valid Until: Jan 31 08:59:05 2035 GMT + Fingerprint: B4 90 DE 46 81 DD 2E BF EE 9D D5 47 61 43 EF C6 6D 86 A6 CC + +*** +*** You have to ensure that this certificate actually matches the parent +*** instance's certificate in order to avoid man-in-the-middle attacks. +*** + +information/pki: Writing certificate to file '/var/lib/icinga2/certs/trusted-parent.crt'. +``` + +##### Node Setup + +Continue with the additional `node setup` step. Specify a local endpoint and zone name (`icinga2-agent1.localdomain`) +and set the master host (`icinga2-master1.localdomain`) as parent zone configuration. Specify the path to +the previously stored trusted parent certificate (`trusted-parent.crt`). + +Pass the following details to the `node setup` CLI command: + + Parameter | Description + --------------------|-------------------- + `--cn` | **Optional.** Common name (CN). By convention this should be the host's FQDN. + `--ticket` | **Required.** Request ticket. Add the previously generated [ticket number](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing). + `--trustedcert` | **Required.** Trusted parent certificate file as connection verification (received via 'pki save-cert'). + `--parent_host` | **Optional.** FQDN or IP address of the parent host. This is where the command connects for CSR signing. If not specified, you need to manually copy the parent's public CA certificate file into `/var/lib/icinga2/certs/ca.crt` in order to start Icinga 2. + `--endpoint` | **Required.** Specifies the parent's endpoint name. + `--zone` | **Required.** Specifies the agent/satellite zone name. + `--parent_zone` | **Optional.** Specifies the parent's zone name. + `--accept-config` | **Optional.** Whether this node accepts configuration sync from the master node (required for [config sync mode](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)). + `--accept-commands` | **Optional.** Whether this node accepts command execution messages from the master node (required for [command endpoint mode](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint)). + `--global_zones` | **Optional.** Allows to specify more global zones in addition to `global-templates` and `director-global`. + `--disable-confd` | **Optional.** If provided, this disables the `include_recursive "conf.d"` directive in `icinga2.conf`. Available since v2.9+. Not set by default for compatibility reasons with Puppet, Ansible, Chef, etc. + +> **Note** +> +> The `master_host` parameter is deprecated and will be removed. Please use `--parent_host` instead. + +Example: + +``` +[root@icinga2-agent1.localdomain /]# icinga2 node setup --ticket ead2d570e18c78abf285d6b85524970a0f69c22d \ +--cn icinga2-agent1.localdomain \ +--endpoint icinga2-master1.localdomain \ +--zone icinga2-agent1.localdomain \ +--parent_zone master \ +--parent_host icinga2-master1.localdomain \ +--trustedcert /var/lib/icinga2/certs/trusted-parent.crt \ +--accept-commands --accept-config \ +--disable-confd +``` + +In case the agent/satellite should connect to the master node, you'll +need to modify the `--endpoint` parameter using the format `cn,host,port`: + +``` +--endpoint icinga2-master1.localdomain,192.168.56.101,5665 +``` + +Specify the parent zone using the `--parent_zone` parameter. This is useful +if the agent connects to a satellite, not the master instance. + +``` +--parent_zone satellite +``` + +In case the agent should know the additional global zone `linux-templates`, you'll +need to set the `--global-zones` parameter. + +``` +--global_zones linux-templates +``` + +The `--parent-host` parameter is optional since v2.9 and allows you to perform a connection-less setup. +You cannot restart Icinga 2 yet, the CLI command asked to to manually copy the parent's public CA +certificate file in `/var/lib/icinga2/certs/ca.crt`. Once Icinga 2 is started, it sends +a ticket signing request to the parent node. If you have provided a ticket, the master node +signs the request and sends it back to the agent/satellite which performs a certificate update in-memory. + +In case you did not provide a ticket, you need to [manually sign the CSR on the master node](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing-master) +which holds the CA's key pair. + + +**You can find additional best practices below.** + +If this agent node is configured as [remote command endpoint execution](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +you can safely disable the `checker` feature. The `node setup` CLI command already disabled the `notification` feature. + +``` +[root@icinga2-agent1.localdomain /]# icinga2 feature disable checker +``` + +**Optional**: Add an ApiUser object configuration for remote troubleshooting. + +``` +[root@icinga2-agent1.localdomain /]# cat </etc/icinga2/conf.d/api-users.conf +object ApiUser "root" { + password = "agentsupersecretpassword" + permissions = ["*"] +} +EOF +``` + +Finally restart Icinga 2. + +``` +[root@icinga2-agent1.localdomain /]# systemctl restart icinga2 +``` + +Your automation tool must then configure master node in the meantime. + +``` +# cat <>/etc/icinga2/zones.conf +object Endpoint "icinga2-agent1.localdomain" { + // Agent connects itself +} + +object Zone "icinga2-agent1.localdomain" { + endpoints = [ "icinga2-agent1.localdomain" ] + parent = "master" +} + +EOF +``` + +## Using Multiple Environments + +> **Note** +> +> This documentation only covers the basics. Full functionality requires a not yet released addon. + +In some cases it can be desired to run multiple Icinga instances on the same host. +Two potential scenarios include: + +* Different versions of the same monitoring configuration (e.g. production and testing) +* Disparate sets of checks for entirely unrelated monitoring environments (e.g. infrastructure and applications) + +The configuration is done with the global constants `ApiBindHost` and `ApiBindPort` +or the `bind_host` and `bind_port` attributes of the +[ApiListener](09-object-types.md#objecttype-apilistener) object. + +The environment must be set with the global constant `Environment` or as object attribute +of the [IcingaApplication](09-object-types.md#objecttype-icingaapplication) object. + +In any case the constant is default value for the attribute and the direct configuration in the objects +have more precedence. The constants have been added to allow the values being set from the CLI on startup. + +When Icinga establishes a TLS connection to another cluster instance it automatically uses the [SNI extension](https://en.wikipedia.org/wiki/Server_Name_Indication) +to signal which endpoint it is attempting to connect to. On its own this can already be used to position multiple +Icinga instances behind a load balancer. + +SNI example: `icinga2-agent1.localdomain` + +However, if the environment is configured to `production`, Icinga appends the environment name to the SNI hostname like this: + +SNI example with environment: `icinga2-agent1.localdomain:production` + +Middleware like loadbalancers or TLS proxies can read the SNI header and route the connection to the appropriate target. +I.e., it uses a single externally-visible TCP port (usually 5665) and forwards connections to one or more Icinga +instances which are bound to a local TCP port. It does so by inspecting the environment name that is sent as part of the +SNI extension. diff --git a/doc/07-agent-based-monitoring.md b/doc/07-agent-based-monitoring.md new file mode 100644 index 0000000..a5a466e --- /dev/null +++ b/doc/07-agent-based-monitoring.md @@ -0,0 +1,485 @@ +# Agent-based Checks + +If the remote services are not directly accessible through the network, a +local agent installation exposing the results to check queries can +become handy. + +Prior to installing and configuration an agent service, evaluate possible +options based on these requirements: + +* Security (authentication, TLS certificates, secure connection handling, etc.) +* Connection direction + * Master/satellite can execute commands directly or + * Agent sends back passive/external check results +* Availability on specific OS types and versions + * Packages available +* Configuration and initial setup +* Updates and maintenance, compatibility + +Available agent types: + +* [Icinga Agent](07-agent-based-monitoring.md#agent-based-checks-icinga) on Linux/Unix and Windows +* [SSH](07-agent-based-monitoring.md#agent-based-checks-ssh) on Linux/Unix +* [SNMP](07-agent-based-monitoring.md#agent-based-checks-snmp) on Linux/Unix and hardware +* [SNMP Traps](07-agent-based-monitoring.md#agent-based-checks-snmp-traps) as passive check results +* [REST API](07-agent-based-monitoring.md#agent-based-checks-rest-api) for passive external check results +* [NSClient++](07-agent-based-monitoring.md#agent-based-checks-nsclient) and [WMI](07-agent-based-monitoring.md#agent-based-checks-wmi) on Windows + + +## Icinga Agent + +For the most common setups on Linux/Unix and Windows, we recommend +to setup the Icinga agent in a [distributed environment](06-distributed-monitoring.md#distributed-monitoring). + +![Icinga 2 Distributed Master with Agents](images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png) + +Key benefits: + +* Directly integrated into the distributed monitoring stack of Icinga +* Works on Linux/Unix and Windows +* Secure communication with TLS +* Connection can be established from both sides. Once connected, command execution and check results are exchanged. + * Master/satellite connects to agent + * Agent connects to parent satellite/master +* Same configuration language and binaries +* Troubleshooting docs and community best practices + +Follow the setup and configuration instructions [here](06-distributed-monitoring.md#distributed-monitoring-setup-agent-satellite). + +On Windows hosts, the Icinga agent can query a local NSClient++ service +for additional checks in case there are no plugins available. The NSCP +installer is bundled with Icinga and can be installed with the setup wizard. + +![Icinga 2 Windows Setup](images/distributed-monitoring/icinga2_windows_setup_wizard_01.png) + +## SSH + +> **Tip** +> +> This is the recommended way for systems where the Icinga agent is not available +> Be it specific hardware architectures, old systems or forbidden to install an additional software. + +This method uses the SSH service on the remote host to execute +an arbitrary plugin command line. The output and exit code is +returned and used by the core. + +The `check_by_ssh` plugin takes care of this. It is available in the +[Monitoring Plugins](https://www.monitoring-plugins.org/) package. +For your convenience, the Icinga template library provides the [by_ssh](10-icinga-template-library.md#plugin-check-command-by-ssh) +CheckCommand already. + +### SSH: Preparations + +SSH key pair for the Icinga daemon user. In case the user has no shell, temporarily enable this. +When asked for a passphrase, **do not set it** and press enter. + +```bash +sudo su - icinga + +ssh-keygen -b 4096 -t rsa -C "icinga@$(hostname) user for check_by_ssh" -f $HOME/.ssh/id_rsa +``` + +On the remote agent, create the icinga user and generate a temporary password. + +```bash +useradd -m icinga +passwd icinga +``` + +Copy the public key from the Icinga server to the remote agent, e.g. with `ssh-copy-id` +or manually into `/home/icinga/.ssh/authorized_keys`. +This will ask for the password once. + +```bash +sudo su - icinga + +ssh-copy-id -i $HOME/.ssh/id_rsa icinga@ssh-agent1.localdomain +``` + +After the SSH key is copied, test at the connection **at least once** and +accept the host key verification. If you forget about this step, checks will +become UNKNOWN later. + +```bash +ssh -i $HOME/.ssh/id_rsa icinga@ssh-agent1.localdomain +``` + +After the SSH key login works, disable the previously enabled logins. + +* Remote agent user's password with `passwd -l icinga` +* Local icinga user terminal + +Also, ensure that the permissions are correct for the `.ssh` directory +as otherwise logins will fail. + +* `.ssh` directory: 700 +* `.ssh/id_rsa.pub` public key file: 644 +* `.ssh/id_rsa` private key file: 600 + + +### SSH: Configuration + +First, create a host object which has SSH configured and enabled. +Mark this e.g. with the custom variable `agent_type` to later +use this for service apply rule matches. Best practice is to +store that in a specific template, either in the static configuration +or inside the Director. + +``` +template Host "ssh-agent" { + check_command = "hostalive" + + vars.agent_type = "ssh" + vars.os_type = "linux" +} + +object Host "ssh-agent1.localdomain" { + import "ssh-agent" + + address = "192.168.56.115" +} +``` + +Example for monitoring the remote users: + +``` +apply Service "users" { + check_command = "by_ssh" + + vars.by_ssh_command = [ "/usr/lib/nagios/plugins/check_users" ] + + // Follows the same principle as with command arguments, e.g. for ordering + vars.by_ssh_arguments = { + "-w" = { + value = "$users_wgreater$" // Can reference an existing custom variable defined on the host or service, evaluated at runtime + } + "-c" = { + value = "$users_cgreater$" + } + } + + vars.users_wgreater = 3 + vars.users_cgreater = 5 + + assign where host.vars.os_type == "linux" && host.vars.agent_type == "ssh" +} +``` + +A more advanced example with better arguments is shown in [this blogpost](https://www.netways.de/blog/2016/03/21/check_by_ssh-mit-icinga-2/). + + +## SNMP + +The SNMP daemon runs on the remote system and answers SNMP queries by plugin scripts. +The [Monitoring Plugins](https://www.monitoring-plugins.org/) package provides +the `check_snmp` plugin binary, but there are plenty of [existing plugins](05-service-monitoring.md#service-monitoring-plugins) +for specific use cases already around, for example monitoring Cisco routers. + +The following example uses the [SNMP ITL](10-icinga-template-library.md#plugin-check-command-snmp) +CheckCommand and sets the `snmp_oid` custom variable. A service is created for all hosts which +have the `snmp-community` custom variable. + +``` +template Host "snmp-agent" { + check_command = "hostalive" + + vars.agent_type = "snmp" + + vars.snmp_community = "public-icinga" +} + +object Host "snmp-agent1.localdomain" { + import "snmp-agent" +} +``` + +``` +apply Service "uptime" { + import "generic-service" + + check_command = "snmp" + vars.snmp_oid = "1.3.6.1.2.1.1.3.0" + vars.snmp_miblist = "DISMAN-EVENT-MIB" + + assign where host.vars.agent_type == "snmp" && host.vars.snmp_community != "" +} +``` + +If no `snmp_miblist` is specified, the plugin will default to `ALL`. As the number of available MIB files +on the system increases so will the load generated by this plugin if no `MIB` is specified. +As such, it is recommended to always specify at least one `MIB`. + +Additional SNMP plugins are available using the [Manubulon SNMP Plugins](10-icinga-template-library.md#snmp-manubulon-plugin-check-commands). + +For network monitoring, community members advise to use [nwc_health](05-service-monitoring.md#service-monitoring-network) +for example. + + +## SNMP Traps and Passive Check Results + +SNMP Traps can be received and filtered by using [SNMPTT](http://snmptt.sourceforge.net/) +and specific trap handlers passing the check results to Icinga 2. + +Following the SNMPTT [Format](http://snmptt.sourceforge.net/docs/snmptt.shtml#SNMPTT.CONF-FORMAT) +documentation and the Icinga external command syntax found [here](24-appendix.md#external-commands-list-detail) +we can create generic services that can accommodate any number of hosts for a given scenario. + +### Simple SNMP Traps + +A simple example might be monitoring host reboots indicated by an SNMP agent reset. +Building the event to auto reset after dispatching a notification is important. +Setup the manual check parameters to reset the event from an initial unhandled +state or from a missed reset event. + +Add a directive in `snmptt.conf` + +``` +EVENT coldStart .1.3.6.1.6.3.1.1.5.1 "Status Events" Normal +FORMAT Device reinitialized (coldStart) +EXEC echo "[$@] PROCESS_SERVICE_CHECK_RESULT;$A;Coldstart;2;The snmp agent has reinitialized." >> /var/run/icinga2/cmd/icinga2.cmd +SDESC +A coldStart trap signifies that the SNMPv2 entity, acting +in an agent role, is reinitializing itself and that its +configuration may have been altered. +EDESC +``` + +1. Define the `EVENT` as per your need. +2. Construct the `EXEC` statement with the service name matching your template +applied to your _n_ hosts. The host address inferred by SNMPTT will be the +correlating factor. You can have snmptt provide host names or ip addresses to +match your Icinga convention. + +> **Note** +> +> Replace the deprecated command pipe EXEC statement with a curl call +> to the REST API action [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result). + +Add an `EventCommand` configuration object for the passive service auto reset event. + +``` +object EventCommand "coldstart-reset-event" { + command = [ ConfigDir + "/conf.d/custom/scripts/coldstart_reset_event.sh" ] + + arguments = { + "-i" = "$service.state_id$" + "-n" = "$host.name$" + "-s" = "$service.name$" + } +} +``` + +Create the `coldstart_reset_event.sh` shell script to pass the expanded variable +data in. The `$service.state_id$` is important in order to prevent an endless loop +of event firing after the service has been reset. + +```bash +#!/bin/bash + +SERVICE_STATE_ID="" +HOST_NAME="" +SERVICE_NAME="" + +show_help() +{ +cat <<-EOF + Usage: ${0##*/} [-h] -n HOST_NAME -s SERVICE_NAME + Writes a coldstart reset event to the Icinga command pipe. + + -h Display this help and exit. + -i SERVICE_STATE_ID The associated service state id. + -n HOST_NAME The associated host name. + -s SERVICE_NAME The associated service name. +EOF +} + +while getopts "hi:n:s:" opt; do + case "$opt" in + h) + show_help + exit 0 + ;; + i) + SERVICE_STATE_ID=$OPTARG + ;; + n) + HOST_NAME=$OPTARG + ;; + s) + SERVICE_NAME=$OPTARG + ;; + '?') + show_help + exit 0 + ;; + esac +done + +if [ -z "$SERVICE_STATE_ID" ]; then + show_help + printf "\n Error: -i required.\n" + exit 1 +fi + +if [ -z "$HOST_NAME" ]; then + show_help + printf "\n Error: -n required.\n" + exit 1 +fi + +if [ -z "$SERVICE_NAME" ]; then + show_help + printf "\n Error: -s required.\n" + exit 1 +fi + +if [ "$SERVICE_STATE_ID" -gt 0 ]; then + echo "[`date +%s`] PROCESS_SERVICE_CHECK_RESULT;$HOST_NAME;$SERVICE_NAME;0;Auto-reset (`date +"%m-%d-%Y %T"`)." >> /var/run/icinga2/cmd/icinga2.cmd +fi +``` + +> **Note** +> +> Replace the deprecated command pipe EXEC statement with a curl call +> to the REST API action [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result). + +Finally create the `Service` and assign it: + +``` +apply Service "Coldstart" { + import "generic-service-custom" + + check_command = "dummy" + event_command = "coldstart-reset-event" + + enable_notifications = 1 + enable_active_checks = 0 + enable_passive_checks = 1 + enable_flapping = 0 + volatile = 1 + enable_perfdata = 0 + + vars.dummy_state = 0 + vars.dummy_text = "Manual reset." + + vars.sla = "24x7" + + assign where (host.vars.os == "Linux" || host.vars.os == "Windows") +} +``` + +### Complex SNMP Traps + +A more complex example might be passing dynamic data from a traps varbind list +for a backup scenario where the backup software dispatches status updates. By +utilizing active and passive checks, the older freshness concept can be leveraged. + +By defining the active check as a hard failed state, a missed backup can be reported. +As long as the most recent passive update has occurred, the active check is bypassed. + +Add a directive in `snmptt.conf` + +``` +EVENT enterpriseSpecific "Status Events" Normal +FORMAT Enterprise specific trap +EXEC echo "[$@] PROCESS_SERVICE_CHECK_RESULT;$A;$1;$2;$3" >> /var/run/icinga2/cmd/icinga2.cmd +SDESC +An enterprise specific trap. +The varbinds in order denote the Icinga service name, state and text. +EDESC +``` + +1. Define the `EVENT` as per your need using your actual oid. +2. The service name, state and text are extracted from the first three varbinds. +This has the advantage of accommodating an unlimited set of use cases. + +> **Note** +> +> Replace the deprecated command pipe EXEC statement with a curl call +> to the REST API action [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result). + +Create a `Service` for the specific use case associated to the host. If the host +matches and the first varbind value is `Backup`, SNMPTT will submit the corresponding +passive update with the state and text from the second and third varbind: + +``` +object Service "Backup" { + import "generic-service-custom" + + host_name = "host.domain.com" + check_command = "dummy" + + enable_notifications = 1 + enable_active_checks = 1 + enable_passive_checks = 1 + enable_flapping = 0 + volatile = 1 + max_check_attempts = 1 + check_interval = 87000 + enable_perfdata = 0 + + vars.sla = "24x7" + vars.dummy_state = 2 + vars.dummy_text = "No passive check result received." +} +``` + + +## Agents sending Check Results via REST API + +Whenever the remote agent cannot run the Icinga agent, or a backup script +should just send its current state after finishing, you can use the [REST API](12-icinga2-api.md#icinga2-api) +as secure transport and send [passive external check results](08-advanced-topics.md#external-check-results). + +Use the [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result) API action to send the external passive check result. +You can either use `curl` or implement the HTTP requests in your preferred programming +language. Examples for API clients are available in [this chapter](12-icinga2-api.md#icinga2-api-clients). + +Feeding check results from remote hosts requires the host/service +objects configured on the master/satellite instance. + +## NSClient++ on Windows + +[NSClient++](https://nsclient.org/) works on both Windows and Linux platforms and is well +known for its magnificent Windows support. There are alternatives like the WMI interface, +but using `NSClient++` will allow you to run local scripts similar to check plugins fetching +the required output and performance counters. + +> **Tip** +> +> Best practice is to use the Icinga agent as secure execution +> bridge (`check_nt` and `check_nrpe` are considered insecure) +> and query the NSClient++ service [locally](06-distributed-monitoring.md#distributed-monitoring-windows-nscp). + +You can use the `check_nt` plugin from the Monitoring Plugins project to query NSClient++. +Icinga 2 provides the [nscp check command](10-icinga-template-library.md#plugin-check-command-nscp) for this: + +Example: + +``` +object Service "disk" { + import "generic-service" + + host_name = "remote-windows-host" + + check_command = "nscp" + + vars.nscp_variable = "USEDDISKSPACE" + vars.nscp_params = "c" + vars.nscp_warn = 70 + vars.nscp_crit = 80 +} +``` + +For details on the `NSClient++` configuration please refer to the [official documentation](https://docs.nsclient.org/). + +## WMI on Windows + +The most popular plugin is [check_wmi_plus](https://edcint.co.nz/checkwmiplus/). + +> Check WMI Plus uses the Windows Management Interface (WMI) to check for common services (cpu, disk, sevices, eventlog…) on Windows machines. It requires the open source wmi client for Linux. + +Community examples: + +* [Icinga 2 check_wmi_plus example by 18pct](https://18pct.com/icinga2-check_wmi_plus-example/) +* [Agent-less monitoring with WMI](https://www.devlink.de/linux/icinga2-nagios-agentless-monitoring-von-windows/) diff --git a/doc/08-advanced-topics.md b/doc/08-advanced-topics.md new file mode 100644 index 0000000..34330ed --- /dev/null +++ b/doc/08-advanced-topics.md @@ -0,0 +1,1208 @@ +# Advanced Topics + +This chapter covers a number of advanced topics. If you're new to Icinga, you +can safely skip over things you're not interested in. + +## Downtimes + +Downtimes can be scheduled for planned server maintenance or +any other targeted service outage you are aware of in advance. + +Downtimes suppress notifications and can trigger other +downtimes too. If the downtime was set by accident, or the duration +exceeds the maintenance windows, you can manually cancel the downtime. + +### Scheduling a downtime + +The most convenient way to schedule planned downtimes is to create +them in Icinga Web 2 inside the host/service detail view. Select +multiple hosts/services from the listing with the shift key to +schedule multiple downtimes. + +![Downtime in Icinga Web 2](images/advanced-topics/icingaweb2_downtime_handled.png) + +In addition to that you can schedule a downtime by using the Icinga 2 API action +[schedule-downtime](12-icinga2-api.md#icinga2-api-actions-schedule-downtime). +This is especially useful to schedule a downtime on-demand inside a (remote) backup +script, or create maintenance downtimes from a cron job for specific dates and intervals. + +Multiple downtimes for a single object may overlap. This is useful +when you want to extend your maintenance window taking longer than expected. +If there are multiple downtimes triggered for one object, the overall downtime depth +will be greater than `1`. + +If the downtime was scheduled after the problem changed to a critical hard +state triggering a problem notification, and the service recovers during +the downtime window, the recovery notification won't be suppressed. + +Planned downtimes are also taken into account for SLA reporting +tools calculating the SLAs based on the state and downtime history. + +### Fixed and Flexible Downtimes + +A `fixed` downtime will be activated at the defined start time, and +removed at the end time. During this time window the service state +will change to `NOT-OK` and then actually trigger the downtime. +Notifications are suppressed and the downtime depth is incremented. + +Common scenarios are a planned distribution upgrade on your linux +servers, or database updates in your warehouse. The customer knows +about a fixed downtime window between 23:00 and 24:00. After 24:00 +all problems should be alerted again. Solution is simple - +schedule a `fixed` downtime starting at 23:00 and ending at 24:00. + +Unlike a `fixed` downtime, a `flexible` downtime will be triggered +by the state change in the time span defined by start and end time, +and then last for the specified duration in minutes. + +Imagine the following scenario: Your service is frequently polled +by users trying to grab free deleted domains for immediate registration. +Between 07:30 and 08:00 the impact will hit for 15 minutes and generate +a network outage visible to the monitoring. The service is still alive, +but answering too slow to Icinga 2 service checks. +For that reason, you may want to schedule a downtime between 07:30 and +08:00 with a duration of 15 minutes. The downtime will then last from +its trigger time until the duration is over. After that, the downtime +is removed (may happen before or after the actual end time!). + +#### Fixed Downtime + +If the host/service changes into a NOT-OK state between the start and +end time window, the downtime will be marked as `in effect` and +increases the downtime depth counter. + +``` + | | | +start | end + trigger time +``` + +#### Flexible Downtime + +A flexible downtime defines a time window where the downtime may be +triggered from a host/service NOT-OK state change. It will then last +until the specified time duration is reached. That way it can happen +that the downtime end time is already gone, but the downtime ends +at `trigger time + duration`. + + +``` + | | | +start | end actual end time + |--------------duration--------| + trigger time +``` + + +### Triggered Downtimes + +This is optional when scheduling a downtime. If there is already a downtime +scheduled for a future maintenance, the current downtime can be triggered by +that downtime. This renders useful if you have scheduled a host downtime and +are now scheduling a child host's downtime getting triggered by the parent +downtime on `NOT-OK` state change. + +### Recurring Downtimes + +[ScheduledDowntime objects](09-object-types.md#objecttype-scheduleddowntime) can be used to set up +recurring downtimes for services. + +Example: + +``` +apply ScheduledDowntime "backup-downtime" to Service { + author = "icingaadmin" + comment = "Scheduled downtime for backup" + + ranges = { + monday = "02:00-03:00" + tuesday = "02:00-03:00" + wednesday = "02:00-03:00" + thursday = "02:00-03:00" + friday = "02:00-03:00" + saturday = "02:00-03:00" + sunday = "02:00-03:00" + } + + assign where "backup" in service.groups +} +``` + +Icinga 2 attempts to find the next possible segment from a ScheduledDowntime object's +`ranges` attribute, and wont create multiple downtimes in the future. In case you need +all these downtimes planned and visible for the next days, weeks or months, schedule them +manually via the [REST API](12-icinga2-api.md#icinga2-api-actions-schedule-downtime) using +a script or cron job. + +> **Note** +> +> If ScheduledDowntime objects are synced in a distributed high-availability setup, +> both will create the next possible downtime on their own. These runtime generated +> downtimes are synced among both zone instances, and you may see sort-of duplicate downtimes +> in Icinga Web 2. + + +## Comments + +Comments can be added at runtime and are persistent over restarts. You can +add useful information for others on repeating incidents (for example +"last time syslog at 100% cpu on 17.10.2013 due to stale nfs mount") which +is primarily accessible using web interfaces. + +You can add a comment either by using the Icinga 2 API action +[add-comment](12-icinga2-api.md#icinga2-api-actions-add-comment) or +by sending an [external command](14-features.md#external-commands). + +## Acknowledgements + +If a problem persists and notifications have been sent, you can +acknowledge the problem. That way other users will get +a notification that you're aware of the issue and probably are +already working on a fix. + +Note: Acknowledgements also add a new [comment](08-advanced-topics.md#comments-intro) +which contains the author and text fields. + +You can send an acknowledgement either by using the Icinga 2 API action +[acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) or +by sending an [external command](14-features.md#external-commands). + + +### Sticky Acknowledgements + +The acknowledgement is removed if a state change occurs or if the host/service +recovers (OK/Up state). + +If you acknowledge a problem once you've received a `Critical` notification, +the acknowledgement will be removed if there is a state transition to `Warning`. +``` +OK -> WARNING -> CRITICAL -> WARNING -> OK +``` + +If you prefer to keep the acknowledgement until the problem is resolved (`OK` +recovery) you need to enable the `sticky` parameter. + + +### Expiring Acknowledgements + +Once a problem is acknowledged it may disappear from your `handled problems` +dashboard and no-one ever looks at it again since it will suppress +notifications too. + +This `fire-and-forget` action is quite common. If you're sure that a +current problem should be resolved in the future at a defined time, +you can define an expiration time when acknowledging the problem. + +Icinga 2 will clear the acknowledgement when expired and start to +re-notify, if the problem persists. + + +## Time Periods + +[Time Periods](09-object-types.md#objecttype-timeperiod) define +time ranges in Icinga where event actions are triggered, for +example whether a service check is executed or not within +the `check_period` attribute. Or a notification should be sent to +users or not, filtered by the `period` and `notification_period` +configuration attributes for `Notification` and `User` objects. + +The `TimePeriod` attribute `ranges` may contain multiple directives, +including weekdays, days of the month, and calendar dates. +These types may overlap/override other types in your ranges dictionary. + +The descending order of precedence is as follows: + +* Calendar date (2008-01-01) +* Specific month date (January 1st) +* Generic month date (Day 15) +* Offset weekday of specific month (2nd Tuesday in December) +* Offset weekday (3rd Monday) +* Normal weekday (Tuesday) + +If you don't set any `check_period` or `notification_period` attribute +on your configuration objects, Icinga 2 assumes `24x7` as time period +as shown below. + +``` +object TimePeriod "24x7" { + display_name = "Icinga 2 24x7 TimePeriod" + ranges = { + "monday" = "00:00-24:00" + "tuesday" = "00:00-24:00" + "wednesday" = "00:00-24:00" + "thursday" = "00:00-24:00" + "friday" = "00:00-24:00" + "saturday" = "00:00-24:00" + "sunday" = "00:00-24:00" + } +} +``` + +If your operation staff should only be notified during workhours, +create a new timeperiod named `workhours` defining a work day from +09:00 to 17:00. + +``` +object TimePeriod "workhours" { + display_name = "Icinga 2 8x5 TimePeriod" + ranges = { + "monday" = "09:00-17:00" + "tuesday" = "09:00-17:00" + "wednesday" = "09:00-17:00" + "thursday" = "09:00-17:00" + "friday" = "09:00-17:00" + } +} +``` + +### Across midnight + +If you want to specify a notification period across midnight, +you can define it the following way: + +``` +object TimePeriod "across-midnight" { + display_name = "Nightly Notification" + ranges = { + "saturday" = "22:00-24:00" + "sunday" = "00:00-03:00" + } +} +``` + +Starting with v2.11 this can be shortened to using +the first day as start with an overlapping range into +the next day: + +``` +object TimePeriod "do-not-disturb" { + display_name = "Weekend DND" + ranges = { + "saturday" = "22:00-06:00" + } +} +``` + +### Across several days, weeks or months + +Below you can see another example for configuring timeperiods across several +days, weeks or months. This can be useful when taking components offline +for a distinct period of time. + +``` +object TimePeriod "standby" { + display_name = "Standby" + ranges = { + "2016-09-30 - 2016-10-30" = "00:00-24:00" + } +} +``` + +Please note that the spaces before and after the dash are mandatory. + +Once your time period is configured you can Use the `period` attribute +to assign time periods to `Notification` and `Dependency` objects: + +``` +apply Notification "mail-icingaadmin" to Service { + import "mail-service-notification" + user_groups = host.vars.notification.mail.groups + users = host.vars.notification.mail.users + + period = "workhours" + + assign where host.vars.notification.mail +} +``` + +### Time Periods Inclusion and Exclusion + +Sometimes it is necessary to exclude certain time ranges from +your default time period definitions, for example, if you don't +want to send out any notification during the holiday season, +or if you only want to allow small time windows for executed checks. + +The [TimePeriod object](09-object-types.md#objecttype-timeperiod) +provides the `includes` and `excludes` attributes to solve this issue. +`prefer_includes` defines whether included or excluded time periods are +preferred. + +The following example defines a time period called `holidays` where +notifications should be suppressed: + +``` +object TimePeriod "holidays" { + ranges = { + "january 1" = "00:00-24:00" //new year's day + "july 4" = "00:00-24:00" //independence day + "december 25" = "00:00-24:00" //christmas + "december 31" = "18:00-24:00" //new year's eve (6pm+) + "2017-04-16" = "00:00-24:00" //easter 2017 + "monday -1 may" = "00:00-24:00" //memorial day (last monday in may) + "monday 1 september" = "00:00-24:00" //labor day (1st monday in september) + "thursday 4 november" = "00:00-24:00" //thanksgiving (4th thursday in november) + } +} +``` + +In addition to that the time period `weekends` defines an additional +time window which should be excluded from notifications: + +``` +object TimePeriod "weekends-excluded" { + ranges = { + "saturday" = "00:00-09:00,18:00-24:00" + "sunday" = "00:00-09:00,18:00-24:00" + } +} +``` + +The time period `prod-notification` defines the default time ranges +and adds the excluded time period names as an array. + +``` +object TimePeriod "prod-notification" { + excludes = [ "holidays", "weekends-excluded" ] + + ranges = { + "monday" = "00:00-24:00" + "tuesday" = "00:00-24:00" + "wednesday" = "00:00-24:00" + "thursday" = "00:00-24:00" + "friday" = "00:00-24:00" + "saturday" = "00:00-24:00" + "sunday" = "00:00-24:00" + } +} +``` + +### Time zone handling + +Icinga 2 takes the OS' time zone including DST changes into account. + +Times inside DST changes are interpreted as before the DST changes. +I.e. for the time zone Europe/Berlin: + +* On 2020-10-25 03:00 CEST the time jumps back to 02:00 CET. + For Icinga 02:30 means 02:30 CEST. +* On 2021-02-28 02:00 CET the time jumps forward to 03:00 CEST. + For Icinga (the actually not existing) 02:30 refers to CET + and effectively means 03:30 CEST. + +## External Passive Check Results + +Hosts or services which do not actively execute a check plugin to receive +the state and output are called "passive checks" or "external check results". +In this scenario an external client or script is sending in check results. + +You can feed check results into Icinga 2 with the following transport methods: + +* [process-check-result action](12-icinga2-api.md#icinga2-api-actions-process-check-result) available with the [REST API](12-icinga2-api.md#icinga2-api) (remote and local) +* External command sent via command pipe (local only) + +Each time a new check result is received, the next expected check time +is updated. This means that if there are no check result received from +the external source, Icinga 2 will execute [freshness checks](08-advanced-topics.md#check-result-freshness). + +> **Note** +> +> The REST API action allows to specify the `check_source` attribute +> which helps identifying the external sender. This is also visible +> in Icinga Web 2 and the REST API queries. + +## Check Result Freshness + +In Icinga 2 active check freshness is enabled by default. It is determined by the +`check_interval` attribute and no incoming check results in that period of time. + +The threshold is calculated based on the last check execution time for actively executed checks: + +``` +(last check execution time + check interval) > current time +``` + +If this host/service receives check results from an [external source](08-advanced-topics.md#external-check-results), +the threshold is based on the last time a check result was received: + +``` +(last check result time + check interval) > current time +``` + +> **Tip** +> +> The [process-check-result](12-icinga2-api.md#icinga2-api-actions-process-check-result) REST API +> action allows to overrule the pre-defined check interval with a specified TTL in Icinga 2 v2.9+. + +If the freshness checks fail, Icinga 2 will execute the defined check command unless active checks are disabled. + +Best practice is to define a [dummy](10-icinga-template-library.md#itl-dummy) `check_command` which gets +executed when freshness checks fail. + +``` +apply Service "external-check" { + check_command = "dummy" + check_interval = 1m + + /* Set the state to UNKNOWN (3) if freshness checks fail. */ + vars.dummy_state = 3 + + /* Use a runtime function to retrieve the last check time and more details. */ + vars.dummy_text = {{ + var service = get_service(macro("$host.name$"), macro("$service.name$")) + var lastCheck = DateTime(service.last_check).to_string() + + return "No check results received. Last result time: " + lastCheck + }} + + assign where "external" in host.vars.services +} +``` + +References: [get_service](18-library-reference.md#objref-get_service), [macro](18-library-reference.md#scoped-functions-macro), [DateTime](18-library-reference.md#datetime-type). + +Example output in Icinga Web 2: + +![Icinga 2 Freshness Checks](images/advanced-topics/icinga2_external_checks_freshness_icingaweb2.png) + + +## Check Flapping + +Icinga 2 supports optional detection of hosts and services that are "flapping". + +Flapping occurs when a service or host changes state too frequently, which would result in a storm of problem and +recovery notifications. With flapping detection enabled a flapping notification will be sent while other notifications are +suppressed until it calms down after receiving the same status from checks a few times. Flapping detection can help detect +configuration problems (wrong thresholds), troublesome services or network problems. + +Flapping detection can be enabled or disabled using the `enable_flapping` attribute. +The `flapping_threshold_high` and `flapping_threshold_low` attributes allows to specify the thresholds that control +when a [host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) is considered to be flapping. + +The default thresholds are 30% for high and 25% for low. If the computed flapping value exceeds the high threshold a +host or service is considered flapping until it drops below the low flapping threshold. + +The attribute `flapping_ignore_states` allows to ignore state changes to specified states during the flapping calculation. + +`FlappingStart` and `FlappingEnd` notifications will be sent out accordingly, if configured. See the chapter on +[notifications](alert-notifications) for details + +> Note: There is no distinctions between hard and soft states with flapping. All state changes count and notifications +> will be sent out regardless of the objects state. + +### How it works + +Icinga 2 saves the last 20 state changes for every host and service. See the graphic below: + +![Icinga 2 Flapping State Timeline](images/advanced-topics/flapping-state-graph.png) + +All the states are weighted, with the most recent one being worth the most (1.15) and the 20th the least (0.8). The +states in between are fairly distributed. The final flapping value are the weighted state changes divided by the total +count of 20. + +In the example above, the added states would have a total value of 7.82 (`0.84 + 0.86 + 0.88 + 0.9 + 0.98 + 1.06 + 1.12 + 1.18`). +This yields a flapping percentage of 39.1% (`7.82 / 20 * 100`). As the default upper flapping threshold is 30%, it would be +considered flapping. + +If the next seven check results then would not be state changes, the flapping percentage would fall below the lower threshold +of 25% and therefore the host or service would recover from flapping. + +## Volatile Services and Hosts + +The `volatile` option, if enabled for a host or service, makes it treat every [state change](03-monitoring-basics.md#hard-soft-states) +as a `HARD` state change. It is comparable to `max_check_attempts = 1`. With this any `NOT-OK` result will +ignore `max_check_attempts` and trigger notifications etc. It will further cause any additional `NOT-OK` +result to re-send notifications. + +It may be reasonable to have a volatile service which stays in a `HARD` state if the service stays in a `NOT-OK` +state. That way each service recheck will automatically trigger a notification unless the service is acknowledged or +in a scheduled downtime. + +A common example are security checks where each `NOT-OK` check result should immediately trigger a notification. + +The default for this option is `false` and should only be enabled when required. + + +## Monitoring Icinga 2 + +Why should you do that? Icinga and its components run like any other +service application on your server. There are predictable issues +such as "disk space is running low" and your monitoring suffers from just +that. + +You would also like to ensure that features and backends are running +and storing required data. Be it the database backend where Icinga Web 2 +presents fancy dashboards, forwarded metrics to Graphite or InfluxDB or +the entire distributed setup. + +This list isn't complete but should help with your own setup. +Windows client specific checks are highlighted. + +Type | Description | Plugins and CheckCommands +----------------|-------------------------------|----------------------------------------------------- +System | Filesystem | [disk](10-icinga-template-library.md#plugin-check-command-disk), [disk-windows](10-icinga-template-library.md#windows-plugins) (Windows Client) +System | Memory, Swap | [mem](10-icinga-template-library.md#plugin-contrib-command-mem), [swap](10-icinga-template-library.md#plugin-check-command-swap), [memory](10-icinga-template-library.md#windows-plugins) (Windows Client) +System | Hardware | [hpasm](10-icinga-template-library.md#plugin-contrib-command-hpasm), [ipmi-sensor](10-icinga-template-library.md#plugin-contrib-command-ipmi-sensor) +System | Virtualization | [VMware](10-icinga-template-library.md#plugin-contrib-vmware), [esxi_hardware](10-icinga-template-library.md#plugin-contrib-command-esxi-hardware) +System | Processes | [procs](10-icinga-template-library.md#plugin-check-command-processes), [service-windows](10-icinga-template-library.md#windows-plugins) (Windows Client) +System | System Activity Reports | [sar-perf](10-icinga-template-library.md#plugin-contrib-command-sar-perf) +System | I/O | [iostat](10-icinga-template-library.md#plugin-contrib-command-iostat) +System | Network interfaces | [nwc_health](10-icinga-template-library.md#plugin-contrib-command-nwc_health), [interfaces](10-icinga-template-library.md#plugin-contrib-command-interfaces) +System | Users | [users](10-icinga-template-library.md#plugin-check-command-users), [users-windows](10-icinga-template-library.md#windows-plugins) (Windows Client) +System | Logs | Forward them to [Elastic Stack](14-features.md#elastic-stack-integration) or [Graylog](14-features.md#graylog-integration) and add your own alerts. +System | NTP | [ntp_time](10-icinga-template-library.md#plugin-check-command-ntp-time) +System | Updates | [apt](10-icinga-template-library.md#plugin-check-command-apt), [yum](10-icinga-template-library.md#plugin-contrib-command-yum) +Icinga | Status & Stats | [icinga](10-icinga-template-library.md#itl-icinga) (more below) +Icinga | Cluster & Clients | [health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) +Database | MySQL | [mysql_health](10-icinga-template-library.md#plugin-contrib-command-mysql_health) +Database | PostgreSQL | [postgres](10-icinga-template-library.md#plugin-contrib-command-postgres) +Database | Housekeeping | Check the database size and growth and analyse metrics to examine trends. +Database | DB IDO | [ido](10-icinga-template-library.md#itl-icinga-ido) (more below) +Webserver | Apache2, Nginx, etc. | [http](10-icinga-template-library.md#plugin-check-command-http), [apache-status](10-icinga-template-library.md#plugin-contrib-command-apache-status), [nginx_status](10-icinga-template-library.md#plugin-contrib-command-nginx_status) +Webserver | Certificates | [http](10-icinga-template-library.md#plugin-check-command-http), [Icinga certificate monitoring](https://icinga.com/products/icinga-certificate-monitoring/) +Webserver | Authorization | [http](10-icinga-template-library.md#plugin-check-command-http) +Notifications | Mail (queue) | [smtp](10-icinga-template-library.md#plugin-check-command-smtp), [mailq](10-icinga-template-library.md#plugin-check-command-mailq) +Notifications | SMS (GSM modem) | [check_sms3_status](https://exchange.icinga.com/netways/check_sms3status) +Notifications | Messengers, Cloud services | XMPP, Twitter, IRC, Telegram, PagerDuty, VictorOps, etc. +Metrics | PNP, RRDTool | [check_pnp_rrds](https://github.com/lingej/pnp4nagios/tree/master/scripts) checks for stale RRD files. +Metrics | Graphite | [graphite](10-icinga-template-library.md#plugin-contrib-command-graphite) +Metrics | InfluxDB | [check_influxdb](https://exchange.icinga.com/Mikanoshi/InfluxDB+data+monitoring+plugin) +Metrics | Elastic Stack | [elasticsearch](10-icinga-template-library.md#plugin-contrib-command-elasticsearch), [Elastic Stack integration](14-features.md#elastic-stack-integration) +Metrics | Graylog | [Graylog integration](14-features.md#graylog-integration) + + +The [icinga](10-icinga-template-library.md#itl-icinga) CheckCommand provides metrics for the runtime stats of +Icinga 2. You can forward them to your preferred graphing solution. +If you require more metrics you can also query the [REST API](12-icinga2-api.md#icinga2-api) and write +your own custom check plugin. Or you keep using the built-in [object accessor functions](08-advanced-topics.md#access-object-attributes-at-runtime) +to calculate stats in-memory. + +There is a built-in [ido](10-icinga-template-library.md#itl-icinga-ido) check available for DB IDO MySQL/PostgreSQL +which provides additional metrics for the IDO database. + +``` +apply Service "ido-mysql" { + check_command = "ido" + + vars.ido_type = "IdoMysqlConnection" + vars.ido_name = "ido-mysql" //the name defined in /etc/icinga2/features-enabled/ido-mysql.conf + + assign where match("master*.localdomain", host.name) +} +``` + +More specific database queries can be found in the [DB IDO](14-features.md#db-ido) chapter. + +Distributed setups should include specific [health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks). + +You might also want to add additional checks for TLS certificate expiration. +This can be done using the [Icinga certificate monitoring](https://icinga.com/products/icinga-certificate-monitoring/) module. + + + +## Advanced Configuration Hints + +### Advanced Use of Apply Rules + +[Apply rules](03-monitoring-basics.md#using-apply) can be used to create a rule set which is +entirely based on host objects and their attributes. +In addition to that [apply for and custom variable override](03-monitoring-basics.md#using-apply-for) +extend the possibilities. + +The following example defines a dictionary on the host object which contains +configuration attributes for multiple web servers. This then used to add three checks: + +* A `ping4` check using the local IP `address` of the web server. +* A `tcp` check querying the TCP port where the HTTP service is running on. +* If the `url` key is defined, the third apply for rule will create service objects using the `http` CheckCommand. +In addition to that you can optionally define the `ssl` attribute which enables HTTPS checks. + +Host definition: + +``` +object Host "webserver01" { + import "generic-host" + address = "192.168.56.200" + vars.os = "Linux" + + vars.webserver = { + instance["status"] = { + address = "192.168.56.201" + port = "80" + url = "/status" + } + instance["tomcat"] = { + address = "192.168.56.202" + port = "8080" + } + instance["icingaweb2"] = { + address = "192.168.56.210" + port = "443" + url = "/icingaweb2" + ssl = true + } + } +} +``` + +Service apply for definitions: + +``` +apply Service "webserver_ping" for (instance => config in host.vars.webserver.instance) { + display_name = "webserver_" + instance + check_command = "ping4" + + vars.ping_address = config.address + + assign where host.vars.webserver.instance +} + +apply Service "webserver_port" for (instance => config in host.vars.webserver.instance) { + display_name = "webserver_" + instance + "_" + config.port + check_command = "tcp" + + vars.tcp_address = config.address + vars.tcp_port = config.port + + assign where host.vars.webserver.instance +} + +apply Service "webserver_url" for (instance => config in host.vars.webserver.instance) { + display_name = "webserver_" + instance + "_" + config.url + check_command = "http" + + vars.http_address = config.address + vars.http_port = config.port + vars.http_uri = config.url + + if (config.ssl) { + vars.http_ssl = config.ssl + } + + assign where config.url != "" +} +``` + +The variables defined in the host dictionary are not using the typical custom variable +prefix recommended for CheckCommand parameters. Instead they are re-used for multiple +service checks in this example. +In addition to defining check parameters this way, you can also enrich the `display_name` +attribute with more details. This will be shown in in Icinga Web 2 for example. + +### Use Functions in Object Configuration + +There is a limited scope where functions can be used as object attributes such as: + +* As value for [Custom Variables](03-monitoring-basics.md#custom-variables-functions) +* Returning boolean expressions for [set_if](08-advanced-topics.md#use-functions-command-arguments-setif) inside command arguments +* Returning a [command](08-advanced-topics.md#use-functions-command-attribute) array inside command objects + +The other way around you can create objects dynamically using your own global functions. + +> **Note** +> +> Functions called inside command objects share the same global scope as runtime macros. +> Therefore you can access host custom variables like `host.vars.os`, or any other +> object attribute from inside the function definition used for [set_if](08-advanced-topics.md#use-functions-command-arguments-setif) or [command](08-advanced-topics.md#use-functions-command-attribute). + +Tips when implementing functions: + +* Use [log()](18-library-reference.md#global-functions-log) to dump variables. You can see the output +inside the `icinga2.log` file depending in your log severity +* Use the `icinga2 console` to test basic functionality (e.g. iterating over a dictionary) +* Build them step-by-step. You can always refactor your code later on. + +#### Register and Use Global Functions + +[Functions](17-language-reference.md#functions) can be registered into the global scope. This allows custom functions being available +in objects and other functions. Keep in mind that these functions are not marked +as side-effect-free and as such are not available via the REST API. + +Add a new configuration file `functions.conf` and include it into the [icinga2.conf](04-configuration.md#icinga2-conf) +configuration file in the very beginning, e.g. after `constants.conf`. You can also manage global +functions inside `constants.conf` if you prefer. + +The following function converts a given state parameter into a returned string value. The important +bits for registering it into the global scope are: + +* `globals.` adds a new globals entry. +* `function()` specifies that a call to `state_to_string()` executes a function. +* Function parameters are defined inside the `function()` definition. + +``` +globals.state_to_string = function(state) { + if (state == 2) { + return "Critical" + } else if (state == 1) { + return "Warning" + } else if (state == 0) { + return "OK" + } else if (state == 3) { + return "Unknown" + } else { + log(LogWarning, "state_to_string", "Unknown state " + state + " provided.") + } +} +``` + +The else-condition allows for better error handling. This warning will be shown in the Icinga 2 +log file once the function is called. + +> **Note** +> +> If these functions are used in a distributed environment, you must ensure to deploy them +> everywhere needed. + +In order to test-drive the newly created function, restart Icinga 2 and use the [debug console](11-cli-commands.md#cli-command-console) +to connect to the REST API. + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' +Icinga 2 (version: v2.11.0) +<1> => globals.state_to_string(1) +"Warning" +<2> => state_to_string(2) +"Critical" +``` + +You can see that this function is now registered into the [global scope](17-language-reference.md#variable-scopes). The function call +`state_to_string()` can be used in any object at static config compile time or inside runtime +lambda functions. + +The following service object example uses the service state and converts it to string output. +The function definition is not optimized and is enrolled for better readability including a log message. + +``` +object Service "state-test" { + check_command = "dummy" + host_name = NodeName + + vars.dummy_state = 2 + + vars.dummy_text = {{ + var h = macro("$host.name$") + var s = macro("$service.name$") + + var state = get_service(h, s).state + + log(LogInformation, "dummy_state", "Host: " + h + " Service: " + s + " State: " + state) + + return state_to_string(state) + }} +} +``` + + +#### Use Custom Functions as Attribute + +To use custom functions as attributes, the function must be defined in a +slightly unexpected way. The following example shows how to assign values +depending on group membership. All hosts in the `slow-lan` host group use 300 +as value for `ping_wrta`, all other hosts use 100. + +``` +globals.group_specific_value = function(group, group_value, non_group_value) { + return function() use (group, group_value, non_group_value) { + if (group in host.groups) { + return group_value + } else { + return non_group_value + } + } +} + +apply Service "ping4" { + import "generic-service" + check_command = "ping4" + + vars.ping_wrta = group_specific_value("slow-lan", 300, 100) + vars.ping_crta = group_specific_value("slow-lan", 500, 200) + + assign where true +} +``` + +#### Use Functions in Assign Where Expressions + +If a simple expression for matching a name or checking if an item +exists in an array or dictionary does not fit, you should consider +writing your own global [functions](17-language-reference.md#functions). +You can call them inside `assign where` and `ignore where` expressions +for [apply rules](03-monitoring-basics.md#using-apply-expressions) or +[group assignments](03-monitoring-basics.md#group-assign-intro) just like +any other global functions for example [match](18-library-reference.md#global-functions-match). + +The following example requires the host `myprinter` being added +to the host group `printers-lexmark` but only if the host uses +a template matching the name `lexmark*`. + +``` +template Host "lexmark-printer-host" { + vars.printer_type = "Lexmark" +} + +object Host "myprinter" { + import "generic-host" + import "lexmark-printer-host" + + address = "192.168.1.1" +} + +/* register a global function for the assign where call */ +globals.check_host_templates = function(host, search) { + /* iterate over all host templates and check if the search matches */ + for (tmpl in host.templates) { + if (match(search, tmpl)) { + return true + } + } + + /* nothing matched */ + return false +} + +object HostGroup "printers-lexmark" { + display_name = "Lexmark Printers" + /* call the global function and pass the arguments */ + assign where check_host_templates(host, "lexmark*") +} +``` + +Take a different more complex example: All hosts with the +custom variable `vars_app` as nested dictionary should be +added to the host group `ABAP-app-server`. But only if the +`app_type` for all entries is set to `ABAP`. + +It could read as wildcard match for nested dictionaries: + +``` + where host.vars.vars_app["*"].app_type == "ABAP" +``` + +The solution for this problem is to register a global +function which checks the `app_type` for all hosts +with the `vars_app` dictionary. + +``` +object Host "appserver01" { + check_command = "dummy" + vars.vars_app["ABC"] = { app_type = "ABAP" } +} +object Host "appserver02" { + check_command = "dummy" + vars.vars_app["DEF"] = { app_type = "ABAP" } +} + +globals.check_app_type = function(host, type) { + /* ensure that other hosts without the custom variable do not match */ + if (typeof(host.vars.vars_app) != Dictionary) { + return false + } + + /* iterate over the vars_app dictionary */ + for (key => val in host.vars.vars_app) { + /* if the value is a dictionary and if contains the app_type being the requested type */ + if (typeof(val) == Dictionary && val.app_type == type) { + return true + } + } + + /* nothing matched */ + return false +} + +object HostGroup "ABAP-app-server" { + assign where check_app_type(host, "ABAP") +} +``` + +#### Use Functions in Command Arguments set_if + +The `set_if` attribute inside the command arguments definition in the +[CheckCommand object definition](09-object-types.md#objecttype-checkcommand) is primarily used to +evaluate whether the command parameter should be set or not. + +By default you can evaluate runtime macros for their existence. If the result is not an empty +string, the command parameter is passed. This becomes fairly complicated when want to evaluate +multiple conditions and attributes. + +The following example was found on the community support channels. The user had defined a host +dictionary named `compellent` with the key `disks`. This was then used inside service apply for rules. + +``` +object Host "dict-host" { + check_command = "check_compellent" + vars.compellent["disks"] = { + file = "/var/lib/check_compellent/san_disks.0.json", + checks = ["disks"] + } +} +``` + +The more significant problem was to only add the command parameter `--disk` to the plugin call +when the dictionary `compellent` contains the key `disks`, and omit it if not found. + +By defining `set_if` as [abbreviated lambda function](17-language-reference.md#nullary-lambdas) +and evaluating the host custom variable `compellent` containing the `disks` this problem was +solved like this: + +``` +object CheckCommand "check_compellent" { + command = [ "/usr/bin/check_compellent" ] + arguments = { + "--disks" = { + set_if = {{ + var host_vars = host.vars + log(host_vars) + var compel = host_vars.compellent + log(compel) + compel.contains("disks") + }} + } + } +} +``` + +This implementation uses the dictionary type method [contains](18-library-reference.md#dictionary-contains) +and will fail if `host.vars.compellent` is not of the type `Dictionary`. +Therefore you can extend the checks using the [typeof](17-language-reference.md#types) function. + +You can test the types using the `icinga2 console`: + +``` +# icinga2 console +Icinga (version: v2.3.0-193-g3eb55ad) +<1> => srv_vars.compellent["check_a"] = { file="outfile_a.json", checks = [ "disks", "fans" ] } +null +<2> => srv_vars.compellent["check_b"] = { file="outfile_b.json", checks = [ "power", "voltages" ] } +null +<3> => typeof(srv_vars.compellent) +type 'Dictionary' +<4> => +``` + +The more programmatic approach for `set_if` could look like this: + +``` + "--disks" = { + set_if = {{ + var srv_vars = service.vars + if(len(srv_vars) > 0) { + if (typeof(srv_vars.compellent) == Dictionary) { + return srv_vars.compellent.contains("disks") + } else { + log(LogInformation, "checkcommand set_if", "custom variable compellent_checks is not a dictionary, ignoring it.") + return false + } + } else { + log(LogWarning, "checkcommand set_if", "empty custom variables") + return false + } + }} + } +``` + +#### Use Functions as Command Attribute + +This comes in handy for [NotificationCommands](09-object-types.md#objecttype-notificationcommand) +or [EventCommands](09-object-types.md#objecttype-eventcommand) which does not require +a returned checkresult including state/output. + +The following example was taken from the community support channels. The requirement was to +specify a custom variable inside the notification apply rule and decide which notification +script to call based on that. + +``` +object User "short-dummy" { +} + +object UserGroup "short-dummy-group" { + assign where user.name == "short-dummy" +} + +apply Notification "mail-admins-short" to Host { + import "mail-host-notification" + command = "mail-host-notification-test" + user_groups = [ "short-dummy-group" ] + vars.short = true + assign where host.vars.notification.mail +} +``` + +The solution is fairly simple: The `command` attribute is implemented as function returning +an array required by the caller Icinga 2. +The local variable `mailscript` sets the default value for the notification scrip location. +If the notification custom variable `short` is set, it will override the local variable `mailscript` +with a new value. +The `mailscript` variable is then used to compute the final notification command array being +returned. + +You can omit the `log()` calls, they only help debugging. + +``` +object NotificationCommand "mail-host-notification-test" { + command = {{ + log("command as function") + var mailscript = "mail-host-notification-long.sh" + if (notification.vars.short) { + mailscript = "mail-host-notification-short.sh" + } + log("Running command") + log(mailscript) + + var cmd = [ ConfigDir + "/scripts/" + mailscript ] + log(LogCritical, "me", cmd) + return cmd + }} + + env = { + } +} +``` + +### Access Object Attributes at Runtime + +The [Object Accessor Functions](18-library-reference.md#object-accessor-functions) +can be used to retrieve references to other objects by name. + +This allows you to access configuration and runtime object attributes. A detailed +list can be found [here](09-object-types.md#object-types). + +#### Access Object Attributes at Runtime: Cluster Check + +This is a simple cluster example for accessing two host object states and calculating a virtual +cluster state and output: + +``` +object Host "cluster-host-01" { + check_command = "dummy" + vars.dummy_state = 2 + vars.dummy_text = "This host is down." +} + +object Host "cluster-host-02" { + check_command = "dummy" + vars.dummy_state = 0 + vars.dummy_text = "This host is up." +} + +object Host "cluster" { + check_command = "dummy" + vars.cluster_nodes = [ "cluster-host-01", "cluster-host-02" ] + + vars.dummy_state = {{ + var up_count = 0 + var down_count = 0 + var cluster_nodes = macro("$cluster_nodes$") + + for (node in cluster_nodes) { + if (get_host(node).state > 0) { + down_count += 1 + } else { + up_count += 1 + } + } + + if (up_count >= down_count) { + return 0 //same up as down -> UP + } else { + return 2 //something is broken + } + }} + + vars.dummy_text = {{ + var output = "Cluster hosts:\n" + var cluster_nodes = macro("$cluster_nodes$") + + for (node in cluster_nodes) { + output += node + ": " + get_host(node).last_check_result.output + "\n" + } + + return output + }} +} +``` + +#### Time Dependent Thresholds + +The following example sets time dependent thresholds for the load check based on the current +time of the day compared to the defined time period. + +``` +object TimePeriod "backup" { + ranges = { + monday = "02:00-03:00" + tuesday = "02:00-03:00" + wednesday = "02:00-03:00" + thursday = "02:00-03:00" + friday = "02:00-03:00" + saturday = "02:00-03:00" + sunday = "02:00-03:00" + } +} + +object Host "webserver-with-backup" { + check_command = "hostalive" + address = "127.0.0.1" +} + +object Service "webserver-backup-load" { + check_command = "load" + host_name = "webserver-with-backup" + + vars.load_wload1 = {{ + if (get_time_period("backup").is_inside) { + return 20 + } else { + return 5 + } + }} + vars.load_cload1 = {{ + if (get_time_period("backup").is_inside) { + return 40 + } else { + return 10 + } + }} +} +``` + + +## Advanced Value Types + +In addition to the default value types Icinga 2 also uses a few other types +to represent its internal state. The following types are exposed via the [API](12-icinga2-api.md#icinga2-api). + +### CheckResult + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + exit\_status | Number | The exit status returned by the check execution. + output | String | The check output. + performance\_data | Array | Array of [performance data values](08-advanced-topics.md#advanced-value-types-perfdatavalue). + check\_source | String | Name of the node executing the check. + scheduling\_source | String | Name of the node scheduling the check. + state | Number | The current state (0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN). + command | Value | Array of command with shell-escaped arguments or command line string. + execution\_start | Timestamp | Check execution start time (as a UNIX timestamp). + execution\_end | Timestamp | Check execution end time (as a UNIX timestamp). + schedule\_start | Timestamp | Scheduled check execution start time (as a UNIX timestamp). + schedule\_end | Timestamp | Scheduled check execution end time (as a UNIX timestamp). + active | Boolean | Whether the result is from an active or passive check. + vars\_before | Dictionary | Internal attribute used for calculations. + vars\_after | Dictionary | Internal attribute used for calculations. + ttl | Number | Time-to-live duration in seconds for this check result. The next expected check result is `now + ttl` where freshness checks are executed. + +### PerfdataValue + +Icinga 2 parses performance data strings returned by check plugins and makes the information available to external interfaces (e.g. [GraphiteWriter](09-object-types.md#objecttype-graphitewriter) or the [Icinga 2 API](12-icinga2-api.md#icinga2-api)). + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + label | String | Performance data label. + value | Number | Normalized performance data value without unit. + counter | Boolean | Enabled if the original value contains `c` as unit. Defaults to `false`. + unit | String | Unit of measurement (`seconds`, `bytes`. `percent`) according to the [plugin API](05-service-monitoring.md#service-monitoring-plugin-api). + crit | Value | Critical threshold value. + warn | Value | Warning threshold value. + min | Value | Minimum value returned by the check. + max | Value | Maximum value returned by the check. diff --git a/doc/09-object-types.md b/doc/09-object-types.md new file mode 100644 index 0000000..535f1c1 --- /dev/null +++ b/doc/09-object-types.md @@ -0,0 +1,1972 @@ +# Object Types + +This chapter provides an overview of all available config object types which can be +instantiated using the `object` keyword. + +Additional details on configuration and runtime attributes and their +description are explained here too. + +The attributes need to have a specific type value. Many of them are +explained in [this chapter](03-monitoring-basics.md#attribute-value-types) already. +You should note that the `Timestamp` type is a `Number`. +In addition to that `Object name` is an object reference to +an existing object name as `String` type. + +## Overview + +* [Monitoring Objects](09-object-types.md#object-types-monitoring) such as host, service, etc. +* [Runtime Objects](09-object-types.md#object-types-runtime) generated by Icinga itself. +* [Features](09-object-types.md#object-types-features) available via `icinga2 feature` CLI command. + +## Common Runtime Attributes + +Configuration objects share these runtime attributes which cannot be +modified by the user. You can access these attributes using +the [Icinga 2 API](12-icinga2-api.md#icinga2-api-config-objects). + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + version | Number | Timestamp when the object was created or modified. Synced throughout cluster nodes. + type | String | Object type. + original\_attributes | Dictionary | Original values of object attributes modified at runtime. + active | Boolean | Object is active (e.g. a service being checked). + paused | Boolean | Object has been paused at runtime (e.g. [IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection). Defaults to `false`. + templates | Array | Templates imported on object compilation. + package | String | [Configuration package name](12-icinga2-api.md#icinga2-api-config-management) this object belongs to. Local configuration is set to `_etc`, runtime created objects use `_api`. + source\_location | Dictionary | Location information where the configuration files are stored. + +## Monitoring Objects + +### ApiUser + +ApiUser objects are used for authentication against the [Icinga 2 API](12-icinga2-api.md#icinga2-api-authentication). + +Example: + +``` +object ApiUser "root" { + password = "mysecretapipassword" + permissions = [ "*" ] +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + password | String | **Optional.** Password string. Note: This attribute is hidden in API responses. + client\_cn | String | **Optional.** Client Common Name (CN). + permissions | Array | **Required.** Array of permissions. Either as string or dictionary with the keys `permission` and `filter`. The latter must be specified as function. + +Available permissions are explained in the [API permissions](12-icinga2-api.md#icinga2-api-permissions) +chapter. + +### CheckCommand + +A check command definition. Additional default command custom variables can be +defined here. + +Example: + +``` +object CheckCommand "http" { + command = [ PluginDir + "/check_http" ] + + arguments = { + "-H" = "$http_vhost$" + "-I" = "$http_address$" + "-u" = "$http_uri$" + "-p" = "$http_port$" + "-S" = { + set_if = "$http_ssl$" + } + "--sni" = { + set_if = "$http_sni$" + } + "-a" = { + value = "$http_auth_pair$" + description = "Username:password on sites with basic authentication" + } + "--no-body" = { + set_if = "$http_ignore_body$" + } + "-r" = "$http_expect_body_regex$" + "-w" = "$http_warn_time$" + "-c" = "$http_critical_time$" + "-e" = "$http_expect$" + } + + vars.http_address = "$address$" + vars.http_ssl = false + vars.http_sni = false +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + command | Array | **Required.** The command. This can either be an array of individual command arguments. Alternatively a string can be specified in which case the shell interpreter (usually /bin/sh) takes care of parsing the command. When using the "arguments" attribute this must be an array. Can be specified as function for advanced implementations. + env | Dictionary | **Optional.** A dictionary of macros which should be exported as environment variables prior to executing the command. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this command. + timeout | Duration | **Optional.** The command timeout in seconds. Defaults to `1m`. + arguments | Dictionary | **Optional.** A dictionary of command arguments. + + +#### CheckCommand Arguments + +Command arguments can be defined as key-value-pairs in the `arguments` +dictionary. Best practice is to assign a dictionary as value which +provides additional details such as the `description` next to the `value`. + +``` + arguments = { + "--parameter" = { + description = "..." + value = "..." + } + } +``` + +All available argument value entries are shown below: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + value | String/Function | Optional argument value set by a [runtime macro string](03-monitoring-basics.md#runtime-macros) or a [function call](17-language-reference.md#functions). [More details](03-monitoring-basics.md#command-arguments-value). + description | String | Optional argument description. [More details](03-monitoring-basics.md#command-arguments-description). + required | Boolean | Required argument. Execution error if not set. Defaults to false (optional). [More details](03-monitoring-basics.md#command-arguments-required). + skip\_key | Boolean | Use the value as argument and skip the key. [More details](03-monitoring-basics.md#command-arguments-skip-key). + set\_if | String/Function | Argument is added if the [runtime macro string](03-monitoring-basics.md#runtime-macros) resolves to a defined numeric or boolean value. String values are not supported. [Function calls](17-language-reference.md#functions) returning a value are supported too. [More details](03-monitoring-basics.md#command-arguments-set-if). + order | Number | Set if multiple arguments require a defined argument order. The syntax is `..., -3, -2, -1, , 1, 2, 3, ...`. [More details](03-monitoring-basics.md#command-arguments-order). + repeat\_key | Boolean | If the argument value is an array, repeat the argument key, or not. Defaults to true (repeat). [More details](03-monitoring-basics.md#command-arguments-repeat-key). + key | String | Optional argument key overriding the key identifier. [More details](03-monitoring-basics.md#command-arguments-key). + separator | String | Key-value separator. If given, e.g. `=`, appears between key and value like `--key=value` instead of the regular `--key` `value`. + +`value` and `description` are commonly used, the other entries allow +to build more advanced CheckCommand objects and arguments. + +Please continue reading [here](03-monitoring-basics.md#command-arguments) for advanced usage and examples +for command arguments. + + +### Dependency + +Dependency objects are used to specify dependencies between hosts and services. Dependencies +can be defined as Host-to-Host, Service-to-Service, Service-to-Host, or Host-to-Service +relations. + +> **Best Practice** +> +> Rather than creating a `Dependency` object for a specific host or service it is usually easier +> to just create a `Dependency` template and use the `apply` keyword to assign the +> dependency to a number of hosts or services. Use the `to` keyword to set the specific target +> type for `Host` or `Service`. +> Check the [dependencies](03-monitoring-basics.md#dependencies) chapter for detailed examples. + +Service-to-Service Example: + +``` +object Dependency "webserver-internet" { + parent_host_name = "internet" + parent_service_name = "ping4" + + child_host_name = "webserver" + child_service_name = "ping4" + + states = [ OK, Warning ] + + disable_checks = true +} +``` + +Host-to-Host Example: + +``` +object Dependency "webserver-internet" { + parent_host_name = "internet" + + child_host_name = "webserver" + + states = [ Up ] + + disable_checks = true +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + parent\_host\_name | Object name | **Required.** The parent host. + parent\_service\_name | Object name | **Optional.** The parent service. If omitted, this dependency object is treated as host dependency. + child\_host\_name | Object name | **Required.** The child host. + child\_service\_name | Object name | **Optional.** The child service. If omitted, this dependency object is treated as host dependency. + disable\_checks | Boolean | **Optional.** Whether to disable checks (i.e., don't schedule active checks and drop passive results) when this dependency fails. Defaults to false. + disable\_notifications | Boolean | **Optional.** Whether to disable notifications when this dependency fails. Defaults to true. + ignore\_soft\_states | Boolean | **Optional.** Whether to ignore soft states for the reachability calculation. Defaults to true. + period | Object name | **Optional.** Time period object during which this dependency is enabled. + states | Array | **Optional.** A list of state filters when this dependency should be OK. Defaults to [ OK, Warning ] for services and [ Up ] for hosts. + +Available state filters: + +``` +OK +Warning +Critical +Unknown +Up +Down +``` + +When using [apply rules](03-monitoring-basics.md#using-apply) for dependencies, you can leave out certain attributes which will be +automatically determined by Icinga 2. + +Service-to-Host Dependency Example: + +``` +apply Dependency "internet" to Service { + parent_host_name = "dsl-router" + disable_checks = true + + assign where host.name != "dsl-router" +} +``` + +This example sets all service objects matching the assign condition into a dependency relation to +the parent host object `dsl-router` as implicit child services. + +Service-to-Service-on-the-same-Host Dependency Example: + +``` +apply Dependency "disable-agent-checks" to Service { + parent_service_name = "agent-health" + + assign where service.check_command == "ssh" + ignore where service.name == "agent-health" +} +``` + +This example omits the `parent_host_name` attribute and Icinga 2 automatically sets its value to the name of the +host object matched by the apply rule condition. All services where apply matches are made implicit child services +in this dependency relation. + + +Dependency objects have composite names, i.e. their names are based on the `child_host_name` and `child_service_name` attributes and the +name you specified. This means you can define more than one object with the same (short) name as long as one of the `child_host_name` and +`child_service_name` attributes has a different value. + +### Endpoint + +Endpoint objects are used to specify connection information for remote +Icinga 2 instances. More details can be found in the [distributed monitoring chapter](06-distributed-monitoring.md#distributed-monitoring). + +Example: + +``` +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.56.111" + port = 5665 + log_duration = 1d +} +``` + +Example (disable replay log): + +``` +object Endpoint "icinga2-agent1.localdomain" { + host = "192.168.5.111" + port = 5665 + log_duration = 0 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** The hostname/IP address of the remote Icinga 2 instance. + port | Number | **Optional.** The service name/port of the remote Icinga 2 instance. Defaults to `5665`. + log\_duration | Duration | **Optional.** Duration for keeping replay logs on connection loss. Defaults to `1d` (86400 seconds). Attribute is specified in seconds. If log_duration is set to 0, replaying logs is disabled. You could also specify the value in human readable format like `10m` for 10 minutes or `1h` for one hour. + +Endpoint objects cannot currently be created with the API. + +### EventCommand + +An event command definition. + +Example: + +``` +object EventCommand "restart-httpd-event" { + command = "/opt/bin/restart-httpd.sh" +} +``` + + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + command | Array | **Required.** The command. This can either be an array of individual command arguments. Alternatively a string can be specified in which case the shell interpreter (usually /bin/sh) takes care of parsing the command. When using the "arguments" attribute this must be an array. Can be specified as function for advanced implementations. + env | Dictionary | **Optional.** A dictionary of macros which should be exported as environment variables prior to executing the command. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this command. + timeout | Duration | **Optional.** The command timeout in seconds. Defaults to `1m`. + arguments | Dictionary | **Optional.** A dictionary of command arguments. + +Command arguments can be used the same way as for [CheckCommand objects](09-object-types.md#objecttype-checkcommand-arguments). + +More advanced examples for event command usage can be found [here](03-monitoring-basics.md#event-commands). + + +### Host + +A host. + +Example: + +``` +object Host "icinga2-agent1.localdomain" { + display_name = "Linux Client 1" + address = "192.168.56.111" + address6 = "2a00:1450:4001:815::2003" + + groups = [ "linux-servers" ] + + check_command = "hostalive" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the host (e.g. displayed by external interfaces instead of the name if set). + address | String | **Optional.** The host's IPv4 address. Available as command runtime macro `$address$` if set. + address6 | String | **Optional.** The host's IPv6 address. Available as command runtime macro `$address6$` if set. + groups | Array of object names | **Optional.** A list of host groups this host belongs to. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this host. + check\_command | Object name | **Required.** The name of the check command. + max\_check\_attempts | Number | **Optional.** The number of times a host is re-checked before changing into a hard state. Defaults to 3. + check\_period | Object name | **Optional.** The name of a time period which determines when this host should be checked. Not set by default (effectively 24x7). + check\_timeout | Duration | **Optional.** Check command timeout in seconds. Overrides the CheckCommand's `timeout` attribute. + check\_interval | Duration | **Optional.** The check interval (in seconds). This interval is used for checks when the host is in a `HARD` state. Defaults to `5m`. + retry\_interval | Duration | **Optional.** The retry interval (in seconds). This interval is used for checks when the host is in a `SOFT` state. Defaults to `1m`. Note: This does not affect the scheduling [after a passive check result](08-advanced-topics.md#check-result-freshness). + enable\_notifications | Boolean | **Optional.** Whether notifications are enabled. Defaults to true. + enable\_active\_checks | Boolean | **Optional.** Whether active checks are enabled. Defaults to true. + enable\_passive\_checks | Boolean | **Optional.** Whether passive checks are enabled. Defaults to true. + enable\_event\_handler | Boolean | **Optional.** Enables event handlers for this host. Defaults to true. + enable\_flapping | Boolean | **Optional.** Whether flap detection is enabled. Defaults to false. + enable\_perfdata | Boolean | **Optional.** Whether performance data processing is enabled. Defaults to true. + event\_command | Object name | **Optional.** The name of an event command that should be executed every time the host's state changes or the host is in a `SOFT` state. + flapping\_threshold\_high | Number | **Optional.** Flapping upper bound in percent for a host to be considered flapping. Default `30.0` + flapping\_threshold\_low | Number | **Optional.** Flapping lower bound in percent for a host to be considered not flapping. Default `25.0` + flapping\_ignore\_states | Array | **Optional.** A list of states that should be ignored during flapping calculation. By default no state is ignored. + volatile | Boolean | **Optional.** Treat all state changes as HARD changes. See [here](08-advanced-topics.md#volatile-services-hosts) for details. Defaults to `false`. + zone | Object name | **Optional.** The zone this object is a member of. Please read the [distributed monitoring](06-distributed-monitoring.md#distributed-monitoring) chapter for details. + command\_endpoint | Object name | **Optional.** The endpoint where commands are executed on. + notes | String | **Optional.** Notes for the host. + notes\_url | String | **Optional.** URL for notes for the host (for example, in notification commands). + action\_url | String | **Optional.** URL for actions for the host (for example, an external graphing tool). + icon\_image | String | **Optional.** Icon image for the host. Used by external interfaces only. + icon\_image\_alt | String | **Optional.** Icon image description for the host. Used by external interface only. + +The actual check interval might deviate slightly from the configured values due to the fact that Icinga tries +to evenly distribute all checks over a certain period of time, i.e. to avoid load spikes. + +> **Best Practice** +> +> The `address` and `address6` attributes are required for running commands using +> the `$address$` and `$address6$` runtime macros. + +Runtime Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + next\_check | Timestamp | When the next check occurs (as a UNIX timestamp). + last\_check | Timestamp | When the last check occurred (as a UNIX timestamp). + check\_attempt | Number | The current check attempt number. + state\_type | Number | The current state type (0 = SOFT, 1 = HARD). + last\_state\_type | Number | The previous state type (0 = SOFT, 1 = HARD). + last\_reachable | Boolean | Whether the host was reachable when the last check occurred. + last\_check\_result | CheckResult | The current [check result](08-advanced-topics.md#advanced-value-types-checkresult). + last\_state\_change | Timestamp | When the last state change occurred (as a UNIX timestamp). + last\_hard\_state\_change | Timestamp | When the last hard state change occurred (as a UNIX timestamp). + last\_in\_downtime | Boolean | Whether the host was in a downtime when the last check occurred. + acknowledgement | Number | The acknowledgement type (0 = NONE, 1 = NORMAL, 2 = STICKY). + acknowledgement\_expiry | Timestamp | When the acknowledgement expires (as a UNIX timestamp; 0 = no expiry). + downtime\_depth | Number | Whether the host has one or more active downtimes. + flapping\_last\_change | Timestamp | When the last flapping change occurred (as a UNIX timestamp). + flapping | Boolean | Whether the host is flapping between states. + flapping\_current | Number | Current flapping value in percent (see flapping\_thresholds) + state | Number | The current state (0 = UP, 1 = DOWN). + last\_state | Number | The previous state (0 = UP, 1 = DOWN). + last\_hard\_state | Number | The last hard state (0 = UP, 1 = DOWN). + last\_state\_up | Timestamp | When the last UP state occurred (as a UNIX timestamp). + last\_state\_down | Timestamp | When the last DOWN state occurred (as a UNIX timestamp). + last\_state\_unreachable | Timestamp | When the host was unreachable the last time (as a UNIX timestamp). + previous\_state\_change | Timestamp | Previous timestamp of `last_state_change` before processing a new check result. + severity | Number | [Severity](19-technical-concepts.md#technical-concepts-checks-severity) calculated value. + problem | Boolean | Whether the host is considered in a problem state type (NOT-UP). + handled | Boolean | Whether the host problem is handled (downtime or acknowledgement). + next\_update | Timestamp | When the next check update is to be expected. + + + +### HostGroup + +A group of hosts. + +> **Best Practice** +> +> Assign host group members using the [group assign](17-language-reference.md#group-assign) rules. + +Example: + +``` +object HostGroup "linux-servers" { + display_name = "Linux Servers" + + assign where host.vars.os == "Linux" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the host group. + groups | Array of object names | **Optional.** An array of nested group names. + + + +### Notification + +Notification objects are used to specify how users should be notified in case +of host and service state changes and other events. + +> **Best Practice** +> +> Rather than creating a `Notification` object for a specific host or service it is +> usually easier to just create a `Notification` template and use the `apply` keyword +> to assign the notification to a number of hosts or services. Use the `to` keyword +> to set the specific target type for `Host` or `Service`. +> Check the [notifications](03-monitoring-basics.md#alert-notifications) chapter for detailed examples. + +Example: + +``` +object Notification "localhost-ping-notification" { + host_name = "localhost" + service_name = "ping4" + + command = "mail-notification" + + users = [ "user1", "user2" ] // reference to User objects + + types = [ Problem, Recovery ] + states = [ Critical, Warning, OK ] +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host\_name | Object name | **Required.** The name of the host this notification belongs to. + service\_name | Object name | **Optional.** The short name of the service this notification belongs to. If omitted, this notification object is treated as host notification. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this notification object. + users | Array of object names | **Required.** A list of user names who should be notified. **Optional.** if the `user_groups` attribute is set. + user\_groups | Array of object names | **Required.** A list of user group names who should be notified. **Optional.** if the `users` attribute is set. + times | Dictionary | **Optional.** A dictionary containing `begin` and `end` attributes for the notification. + command | Object name | **Required.** The name of the notification command which should be executed when the notification is triggered. + interval | Duration | **Optional.** The notification interval (in seconds). This interval is used for active notifications. Defaults to 30 minutes. If set to 0, [re-notifications](03-monitoring-basics.md#disable-renotification) are disabled. + period | Object name | **Optional.** The name of a time period which determines when this notification should be triggered. Not set by default (effectively 24x7). + zone | Object name | **Optional.** The zone this object is a member of. Please read the [distributed monitoring](06-distributed-monitoring.md#distributed-monitoring) chapter for details. + types | Array | **Optional.** A list of type filters when this notification should be triggered. By default everything is matched. + states | Array | **Optional.** A list of state filters when this notification should be triggered. By default everything is matched. Note that the states filter is ignored for notifications of type Acknowledgement! + +Available notification state filters for Service: + +``` +OK +Warning +Critical +Unknown +``` + +Available notification state filters for Host: + +``` +Up +Down +``` + +Available notification type filters: + +``` +DowntimeStart +DowntimeEnd +DowntimeRemoved +Custom +Acknowledgement +Problem +Recovery +FlappingStart +FlappingEnd +``` + +Runtime Attributes: + + Name | Type | Description + ----------------------------|-----------------------|----------------- + last\_notification | Timestamp | When the last notification was sent for this Notification object (as a UNIX timestamp). + next\_notification | Timestamp | When the next notification is going to be sent for this assuming the associated host/service is still in a non-OK state (as a UNIX timestamp). + notification\_number | Number | The notification number. + last\_problem\_notification | Timestamp | When the last notification was sent for a problem (as a UNIX timestamp). + + +### NotificationCommand + +A notification command definition. + +Example: + +``` +object NotificationCommand "mail-service-notification" { + command = [ ConfigDir + "/scripts/mail-service-notification.sh" ] + + arguments += { + "-4" = { + required = true + value = "$notification_address$" + } + "-6" = "$notification_address6$" + "-b" = "$notification_author$" + "-c" = "$notification_comment$" + "-d" = { + required = true + value = "$notification_date$" + } + "-e" = { + required = true + value = "$notification_servicename$" + } + "-f" = { + value = "$notification_from$" + description = "Set from address. Requires GNU mailutils (Debian/Ubuntu) or mailx (RHEL/SUSE)" + } + "-i" = "$notification_icingaweb2url$" + "-l" = { + required = true + value = "$notification_hostname$" + } + "-n" = { + required = true + value = "$notification_hostdisplayname$" + } + "-o" = { + required = true + value = "$notification_serviceoutput$" + } + "-r" = { + required = true + value = "$notification_useremail$" + } + "-s" = { + required = true + value = "$notification_servicestate$" + } + "-t" = { + required = true + value = "$notification_type$" + } + "-u" = { + required = true + value = "$notification_servicedisplayname$" + } + "-v" = "$notification_logtosyslog$" + } + + vars += { + notification_address = "$address$" + notification_address6 = "$address6$" + notification_author = "$notification.author$" + notification_comment = "$notification.comment$" + notification_type = "$notification.type$" + notification_date = "$icinga.long_date_time$" + notification_hostname = "$host.name$" + notification_hostdisplayname = "$host.display_name$" + notification_servicename = "$service.name$" + notification_serviceoutput = "$service.output$" + notification_servicestate = "$service.state$" + notification_useremail = "$user.email$" + notification_servicedisplayname = "$service.display_name$" + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + command | Array | **Required.** The command. This can either be an array of individual command arguments. Alternatively a string can be specified in which case the shell interpreter (usually /bin/sh) takes care of parsing the command. When using the "arguments" attribute this must be an array. Can be specified as function for advanced implementations. + env | Dictionary | **Optional.** A dictionary of macros which should be exported as environment variables prior to executing the command. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this command. + timeout | Duration | **Optional.** The command timeout in seconds. Defaults to `1m`. + arguments | Dictionary | **Optional.** A dictionary of command arguments. + +Command arguments can be used the same way as for [CheckCommand objects](09-object-types.md#objecttype-checkcommand-arguments). + +More details on specific attributes can be found in [this chapter](03-monitoring-basics.md#notification-commands). + +### ScheduledDowntime + +ScheduledDowntime objects can be used to set up recurring downtimes for hosts/services. + +> **Best Practice** +> +> Rather than creating a `ScheduledDowntime` object for a specific host or service it is usually easier +> to just create a `ScheduledDowntime` template and use the `apply` keyword to assign the +> scheduled downtime to a number of hosts or services. Use the `to` keyword to set the specific target +> type for `Host` or `Service`. +> Check the [recurring downtimes](08-advanced-topics.md#recurring-downtimes) example for details. + +Example: + +``` +object ScheduledDowntime "some-downtime" { + host_name = "localhost" + service_name = "ping4" + + author = "icingaadmin" + comment = "Some comment" + + fixed = false + duration = 30m + + ranges = { + "sunday" = "02:00-03:00" + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host\_name | Object name | **Required.** The name of the host this scheduled downtime belongs to. + service\_name | Object name | **Optional.** The short name of the service this scheduled downtime belongs to. If omitted, this downtime object is treated as host downtime. + author | String | **Required.** The author of the downtime. + comment | String | **Required.** A comment for the downtime. + fixed | Boolean | **Optional.** Whether this is a fixed downtime. Defaults to `true`. + duration | Duration | **Optional.** How long the downtime lasts. Only has an effect for flexible (non-fixed) downtimes. + ranges | Dictionary | **Required.** A dictionary containing information which days and durations apply to this timeperiod. + child\_options | String | **Optional.** Schedule child downtimes. `DowntimeNoChildren` does not do anything, `DowntimeTriggeredChildren` schedules child downtimes triggered by this downtime, `DowntimeNonTriggeredChildren` schedules non-triggered downtimes. Defaults to `DowntimeNoChildren`. + +ScheduledDowntime objects have composite names, i.e. their names are based +on the `host_name` and `service_name` attributes and the +name you specified. This means you can define more than one object +with the same (short) name as long as one of the `host_name` and +`service_name` attributes has a different value. + +See also [time zone handling](08-advanced-topics.md#timeperiods-timezones). + + +### Service + +Service objects describe network services and how they should be checked +by Icinga 2. + +> **Best Practice** +> +> Rather than creating a `Service` object for a specific host it is usually easier +> to just create a `Service` template and use the `apply` keyword to assign the +> service to a number of hosts. +> Check the [apply](03-monitoring-basics.md#using-apply) chapter for details. + +Example: + +``` +object Service "uptime" { + host_name = "localhost" + + display_name = "localhost Uptime" + + check_command = "snmp" + + vars.snmp_community = "public" + vars.snmp_oid = "DISMAN-EVENT-MIB::sysUpTimeInstance" + + check_interval = 60s + retry_interval = 15s + + groups = [ "all-services", "snmp" ] +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the service. + host\_name | Object name | **Required.** The host this service belongs to. There must be a `Host` object with that name. + groups | Array of object names | **Optional.** The service groups this service belongs to. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this service. + check\_command | Object name | **Required.** The name of the check command. + max\_check\_attempts | Number | **Optional.** The number of times a service is re-checked before changing into a hard state. Defaults to 3. + check\_period | Object name | **Optional.** The name of a time period which determines when this service should be checked. Not set by default (effectively 24x7). + check\_timeout | Duration | **Optional.** Check command timeout in seconds. Overrides the CheckCommand's `timeout` attribute. + check\_interval | Duration | **Optional.** The check interval (in seconds). This interval is used for checks when the service is in a `HARD` state. Defaults to `5m`. + retry\_interval | Duration | **Optional.** The retry interval (in seconds). This interval is used for checks when the service is in a `SOFT` state. Defaults to `1m`. Note: This does not affect the scheduling [after a passive check result](08-advanced-topics.md#check-result-freshness). + enable\_notifications | Boolean | **Optional.** Whether notifications are enabled. Defaults to `true`. + enable\_active\_checks | Boolean | **Optional.** Whether active checks are enabled. Defaults to `true`. + enable\_passive\_checks | Boolean | **Optional.** Whether passive checks are enabled. Defaults to `true`. + enable\_event\_handler | Boolean | **Optional.** Enables event handlers for this host. Defaults to `true`. + enable\_flapping | Boolean | **Optional.** Whether flap detection is enabled. Defaults to `false`. + flapping\_threshold\_high | Number | **Optional.** Flapping upper bound in percent for a service to be considered flapping. `30.0` + flapping\_threshold\_low | Number | **Optional.** Flapping lower bound in percent for a service to be considered not flapping. `25.0` + flapping\_ignore\_states | Array | **Optional.** A list of states that should be ignored during flapping calculation. By default no state is ignored. + enable\_perfdata | Boolean | **Optional.** Whether performance data processing is enabled. Defaults to `true`. + event\_command | Object name | **Optional.** The name of an event command that should be executed every time the service's state changes or the service is in a `SOFT` state. + volatile | Boolean | **Optional.** Treat all state changes as HARD changes. See [here](08-advanced-topics.md#volatile-services-hosts) for details. Defaults to `false`. + zone | Object name | **Optional.** The zone this object is a member of. Please read the [distributed monitoring](06-distributed-monitoring.md#distributed-monitoring) chapter for details. + name | String | **Required.** The service name. Must be unique on a per-host basis. For advanced usage in [apply rules](03-monitoring-basics.md#using-apply) only. + command\_endpoint | Object name | **Optional.** The endpoint where commands are executed on. + notes | String | **Optional.** Notes for the service. + notes\_url | String | **Optional.** URL for notes for the service (for example, in notification commands). + action\_url | String | **Optional.** URL for actions for the service (for example, an external graphing tool). + icon\_image | String | **Optional.** Icon image for the service. Used by external interfaces only. + icon\_image\_alt | String | **Optional.** Icon image description for the service. Used by external interface only. + +Service objects have composite names, i.e. their names are based on the host\_name attribute and the name you specified. This means +you can define more than one object with the same (short) name as long as the `host_name` attribute has a different value. + +The actual check interval might deviate slightly from the configured values due to the fact that Icinga tries +to evenly distribute all checks over a certain period of time, i.e. to avoid load spikes. + +Runtime Attributes: + + Name | Type | Description + ------------------------------|-------------------|---------------------------------- + next\_check | Timestamp | When the next check occurs (as a UNIX timestamp). + last\_check | Timestamp | When the last check occurred (as a UNIX timestamp). + check\_attempt | Number | The current check attempt number. + state\_type | Number | The current state type (0 = SOFT, 1 = HARD). + last\_state\_type | Number | The previous state type (0 = SOFT, 1 = HARD). + last\_reachable | Boolean | Whether the service was reachable when the last check occurred. + last\_check\_result | CheckResult | The current [check result](08-advanced-topics.md#advanced-value-types-checkresult). + last\_state\_change | Timestamp | When the last state change occurred (as a UNIX timestamp). + last\_hard\_state\_change | Timestamp | When the last hard state change occurred (as a UNIX timestamp). + last\_in\_downtime | Boolean | Whether the service was in a downtime when the last check occurred. + acknowledgement | Number | The acknowledgement type (0 = NONE, 1 = NORMAL, 2 = STICKY). + acknowledgement\_expiry | Timestamp | When the acknowledgement expires (as a UNIX timestamp; 0 = no expiry). + acknowledgement\_last\_change | Timestamp | When the acknowledgement has been set/cleared + downtime\_depth | Number | Whether the service has one or more active downtimes. + flapping\_last\_change | Timestamp | When the last flapping change occurred (as a UNIX timestamp). + flapping\_current | Number | Current flapping value in percent (see flapping\_thresholds) + flapping | Boolean | Whether the service is flapping between states. + state | Number | The current state (0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN). + last\_state | Number | The previous state (0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN). + last\_hard\_state | Number | The last hard state (0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN). + last\_state\_ok | Timestamp | When the last OK state occurred (as a UNIX timestamp). + last\_state\_warning | Timestamp | When the last WARNING state occurred (as a UNIX timestamp). + last\_state\_critical | Timestamp | When the last CRITICAL state occurred (as a UNIX timestamp). + last\_state\_unknown | Timestamp | When the last UNKNOWN state occurred (as a UNIX timestamp). + last\_state\_unreachable | Timestamp | When the service was unreachable the last time (as a UNIX timestamp). + previous\_state\_change | Timestamp | Previous timestamp of `last_state_change` before processing a new check result. + severity | Number | [Severity](19-technical-concepts.md#technical-concepts-checks-severity) calculated value. + problem | Boolean | Whether the service is considered in a problem state type (NOT-OK). + handled | Boolean | Whether the service problem is handled (downtime or acknowledgement). + next\_update | Timestamp | When the next check update is to be expected. + + +### ServiceGroup + +A group of services. + +> **Best Practice** +> +> Assign service group members using the [group assign](17-language-reference.md#group-assign) rules. + +Example: + +``` +object ServiceGroup "snmp" { + display_name = "SNMP services" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the service group. + groups | Array of object names | **Optional.** An array of nested group names. + + + +### TimePeriod + +Time periods can be used to specify when hosts/services should be checked or to limit +when notifications should be sent out. + +Examples: + +``` +object TimePeriod "nonworkhours" { + display_name = "Icinga 2 TimePeriod for non working hours" + + ranges = { + monday = "00:00-8:00,17:00-24:00" + tuesday = "00:00-8:00,17:00-24:00" + wednesday = "00:00-8:00,17:00-24:00" + thursday = "00:00-8:00,17:00-24:00" + friday = "00:00-8:00,16:00-24:00" + saturday = "00:00-24:00" + sunday = "00:00-24:00" + } +} + +object TimePeriod "exampledays" { + display_name = "Icinga 2 TimePeriod for random example days" + + ranges = { + //We still believe in Santa, no peeking! + //Applies every 25th of December every year + "december 25" = "00:00-24:00" + + //Any point in time can be specified, + //but you still have to use a range + "2038-01-19" = "03:13-03:15" + + //Evey 3rd day from the second monday of February + //to 8th of November + "monday 2 february - november 8 / 3" = "00:00-24:00" + } +} +``` + +Additional examples can be found [here](08-advanced-topics.md#timeperiods). + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the time period. + ranges | Dictionary | **Required.** A dictionary containing information which days and durations apply to this timeperiod. + prefer\_includes | Boolean | **Optional.** Whether to prefer timeperiods `includes` or `excludes`. Default to true. + excludes | Array of object names | **Optional.** An array of timeperiods, which should exclude from your timerange. + includes | Array of object names | **Optional.** An array of timeperiods, which should include into your timerange + + +Runtime Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + is\_inside | Boolean | Whether we're currently inside this timeperiod. + +See also [time zone handling](08-advanced-topics.md#timeperiods-timezones). + + +### User + +A user. + +Example: + +``` +object User "icingaadmin" { + display_name = "Icinga 2 Admin" + groups = [ "icingaadmins" ] + email = "icinga@localhost" + pager = "icingaadmin@localhost.localdomain" + + period = "24x7" + + states = [ OK, Warning, Critical, Unknown ] + types = [ Problem, Recovery ] + + vars.additional_notes = "This is the Icinga 2 Admin account." +} +``` + +Available notification state filters: + +``` +OK +Warning +Critical +Unknown +Up +Down +``` + +Available notification type filters: + +``` +DowntimeStart +DowntimeEnd +DowntimeRemoved +Custom +Acknowledgement +Problem +Recovery +FlappingStart +FlappingEnd +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the user. + email | String | **Optional.** An email string for this user. Useful for notification commands. + pager | String | **Optional.** A pager string for this user. Useful for notification commands. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are specific to this user. + groups | Array of object names | **Optional.** An array of group names. + enable\_notifications | Boolean | **Optional.** Whether notifications are enabled for this user. Defaults to true. + period | Object name | **Optional.** The name of a time period which determines when a notification for this user should be triggered. Not set by default (effectively 24x7). + types | Array | **Optional.** A set of type filters when a notification for this user should be triggered. By default everything is matched. + states | Array | **Optional.** A set of state filters when a notification for this should be triggered. By default everything is matched. + +Runtime Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + last\_notification | Timestamp | When the last notification was sent for this user (as a UNIX timestamp). + +### UserGroup + +A user group. + +> **Best Practice** +> +> Assign user group members using the [group assign](17-language-reference.md#group-assign) rules. + +Example: + +``` +object UserGroup "icingaadmins" { + display_name = "Icinga 2 Admin Group" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + display\_name | String | **Optional.** A short description of the user group. + groups | Array of object names | **Optional.** An array of nested group names. + + +### Zone + +Zone objects are used to specify which Icinga 2 instances are located in a zone. +Please read the [distributed monitoring chapter](06-distributed-monitoring.md#distributed-monitoring) for additional details. +Example: + +``` +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] + +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain" ] + parent = "master" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + endpoints | Array of object names | **Optional.** Array of endpoint names located in this zone. + parent | Object name | **Optional.** The name of the parent zone. (Do not specify a global zone) + global | Boolean | **Optional.** Whether configuration files for this zone should be [synced](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync) to all endpoints. Defaults to `false`. + +Zone objects cannot currently be created with the API. + + +## Runtime Objects + +These objects are generated at runtime by the daemon +from API actions. Downtime objects are also created +by ScheduledDowntime objects. + +### Comment + +Comments created at runtime are represented as objects. +Note: This is for reference only. You can create comments +with the [add-comment](12-icinga2-api.md#icinga2-api-actions-add-comment) API action. + +Example: + +``` +object Comment "my-comment" { + host_name = "localhost" + author = "icingaadmin" + text = "This is a comment." + entry_time = 1234567890 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host\_name | Object name | **Required.** The name of the host this comment belongs to. + service\_name | Object name | **Optional.** The short name of the service this comment belongs to. If omitted, this comment object is treated as host comment. + author | String | **Required.** The author's name. + text | String | **Required.** The comment text. + entry\_time | Timestamp | **Optional.** The UNIX timestamp when this comment was added. If omitted, the entry time is volatile! + entry\_type | Number | **Optional.** The comment type (`User` = 1, `Downtime` = 2, `Flapping` = 3, `Acknowledgement` = 4). + expire\_time | Timestamp | **Optional.** The comment's expire time as UNIX timestamp. + persistent | Boolean | **Optional.** Only evaluated for `entry_type` Acknowledgement. `true` does not remove the comment when the acknowledgement is removed. + +### Downtime + +Downtimes created at runtime are represented as objects. +You can create downtimes with the [schedule-downtime](12-icinga2-api.md#icinga2-api-actions-schedule-downtime) API action. + +Example: + +``` +object Downtime "my-downtime" { + host_name = "localhost" + author = "icingaadmin" + comment = "This is a downtime." + start_time = 1505312869 + end_time = 1505312924 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host\_name | Object name | **Required.** The name of the host this comment belongs to. + service\_name | Object name | **Optional.** The short name of the service this comment belongs to. If omitted, this comment object is treated as host comment. + author | String | **Required.** The author's name. + comment | String | **Required.** The comment text. + start\_time | Timestamp | **Required.** The start time as UNIX timestamp. + end\_time | Timestamp | **Required.** The end time as UNIX timestamp. + duration | Number | **Optional.** The duration as number. + entry\_time | Timestamp | **Optional.** The UNIX timestamp when this downtime was added. + fixed | Boolean | **Optional.** Whether the downtime is fixed (true) or flexible (false). Defaults to flexible. Details in the [advanced topics chapter](08-advanced-topics.md#fixed-flexible-downtimes). + triggers | Array of object names | **Optional.** List of downtimes which should be triggered by this downtime. + +Runtime Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + trigger\_time | Timestamp | The UNIX timestamp when this downtime was triggered. + triggered\_by | Object name | The name of the downtime this downtime was triggered by. + + + +## Features + +### ApiListener + +ApiListener objects are used for distributed monitoring setups +and API usage specifying the certificate files used for ssl +authorization and additional restrictions. +This configuration object is available as [api feature](11-cli-commands.md#cli-command-feature). + +The `TicketSalt` constant must be defined in [constants.conf](04-configuration.md#constants-conf). + +Example: + +``` +object ApiListener "api" { + accept_commands = true + accept_config = true + + ticket_salt = TicketSalt +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------------------|-----------------------|---------------------------------- + cert\_path | String | **Deprecated.** Path to the public key. + key\_path | String | **Deprecated.** Path to the private key. + ca\_path | String | **Deprecated.** Path to the CA certificate file. + ticket\_salt | String | **Optional.** Private key for [CSR auto-signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing). **Required** for a signing master instance. + crl\_path | String | **Optional.** Path to the CRL file. + bind\_host | String | **Optional.** The IP address the api listener should be bound to. If not specified, the ApiListener is bound to `::` and listens for both IPv4 and IPv6 connections or to `0.0.0.0` if IPv6 is not supported by the operating system. + bind\_port | Number | **Optional.** The port the api listener should be bound to. Defaults to `5665`. + accept\_config | Boolean | **Optional.** Accept zone configuration. Defaults to `false`. + accept\_commands | Boolean | **Optional.** Accept remote commands. Defaults to `false`. + max\_anonymous\_clients | Number | **Optional.** Limit the number of anonymous client connections (not configured endpoints and signing requests). + cipher\_list | String | **Optional.** Cipher list that is allowed. For a list of available ciphers run `openssl ciphers`. Defaults to `ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384:AES128-GCM-SHA256`. + tls\_protocolmin | String | **Optional.** Minimum TLS protocol version. Since v2.11, only `TLSv1.2` is supported. Defaults to `TLSv1.2`. + tls\_handshake\_timeout | Number | **Deprecated.** TLS Handshake timeout. Defaults to `10s`. + connect\_timeout | Number | **Optional.** Timeout for establishing new connections. Affects both incoming and outgoing connections. Within this time, the TCP and TLS handshakes must complete and either a HTTP request or an Icinga cluster connection must be initiated. Defaults to `15s`. + access\_control\_allow\_origin | Array | **Optional.** Specifies an array of origin URLs that may access the API. [(MDN docs)](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Access-Control-Allow-Origin) + access\_control\_allow\_credentials | Boolean | **Deprecated.** Indicates whether or not the actual request can be made using credentials. Defaults to `true`. [(MDN docs)](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Access-Control-Allow-Credentials) + access\_control\_allow\_headers | String | **Deprecated.** Used in response to a preflight request to indicate which HTTP headers can be used when making the actual request. Defaults to `Authorization`. [(MDN docs)](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Access-Control-Allow-Headers) + access\_control\_allow\_methods | String | **Deprecated.** Used in response to a preflight request to indicate which HTTP methods can be used when making the actual request. Defaults to `GET, POST, PUT, DELETE`. [(MDN docs)](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Access-Control-Allow-Methods) + environment | String | **Optional.** Used as suffix in TLS SNI extension name; default from constant `ApiEnvironment`, which is empty. + +The attributes `access_control_allow_credentials`, `access_control_allow_headers` and `access_control_allow_methods` +are controlled by Icinga 2 and are not changeable by config any more. + + +The ApiListener type expects its certificate files to be in the following locations: + + Type | Location + ---------------------|------------------------------------- + Private key | `DataDir + "/certs/" + NodeName + ".key"` + Certificate file | `DataDir + "/certs/" + NodeName + ".crt"` + CA certificate file | `DataDir + "/certs/ca.crt"` + +If the deprecated attributes `cert_path`, `key_path` and/or `ca_path` are specified Icinga 2 +copies those files to the new location in `DataDir + "/certs"` unless the +file(s) there are newer. + +Please check the [upgrading chapter](16-upgrading-icinga-2.md#upgrading-to-2-8-certificate-paths) for more details. + +While Icinga 2 and the underlying OpenSSL library use sane and secure defaults, the attributes +`cipher_list` and `tls_protocolmin` can be used to increase communication security. A good source +for a more secure configuration is provided by the [Mozilla Wiki](https://wiki.mozilla.org/Security/Server_Side_TLS). +Ensure to use the same configuration for both attributes on **all** endpoints to avoid communication problems which +requires to use `cipher_list` compatible with the endpoint using the oldest version of the OpenSSL library. If using +other tools to connect to the API ensure also compatibility with them as this setting affects not only inter-cluster +communcation but also the REST API. + +### CheckerComponent + +The checker component is responsible for scheduling active checks. +This configuration object is available as [checker feature](11-cli-commands.md#cli-command-feature). + +Example: + +``` +object CheckerComponent "checker" { } +``` + +In order to limit the concurrent checks on a master/satellite endpoint, +use [MaxConcurrentChecks](17-language-reference.md#icinga-constants-global-config) constant. +This also applies to an agent as command endpoint where the checker +feature is disabled. + +### CheckResultReader + +Reads Icinga 1.x check result files from a directory. This functionality is provided +to help existing Icinga 1.x users and might be useful for migration scenarios. + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Example: + +``` +object CheckResultReader "reader" { + spool_dir = "/data/check-results" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + spool\_dir | String | **Optional.** The directory which contains the check result files. Defaults to DataDir + "/spool/checkresults/". + +### CompatLogger + +Writes log files in a format that's compatible with Icinga 1.x. +This configuration object is available as [compatlog feature](14-features.md#compat-logging). + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Example: + +``` +object CompatLogger "compatlog" { + log_dir = "/var/log/icinga2/compat" + rotation_method = "DAILY" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + log\_dir | String | **Optional.** Path to the compat log directory. Defaults to LogDir + "/compat". + rotation\_method | String | **Optional.** Specifies when to rotate log files. Can be one of "HOURLY", "DAILY", "WEEKLY" or "MONTHLY". Defaults to "HOURLY". + + +### ElasticsearchWriter + +Writes check result metrics and performance data to an Elasticsearch instance. +This configuration object is available as [elasticsearch feature](14-features.md#elasticsearch-writer). + +Example: + +``` +object ElasticsearchWriter "elasticsearch" { + host = "127.0.0.1" + port = 9200 + index = "icinga2" + + enable_send_perfdata = true + + flush_threshold = 1024 + flush_interval = 10 +} +``` + +The index is rotated daily, as is recommended by Elastic, meaning the index will be renamed to `$index-$d.$M.$y`. + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Required.** Elasticsearch host address. Defaults to `127.0.0.1`. + port | Number | **Required.** Elasticsearch port. Defaults to `9200`. + index | String | **Required.** Elasticsearch index name. Defaults to `icinga2`. + enable\_send\_perfdata | Boolean | **Optional.** Send parsed performance data metrics for check results. Defaults to `false`. + flush\_interval | Duration | **Optional.** How long to buffer data points before transferring to Elasticsearch. Defaults to `10s`. + flush\_threshold | Number | **Optional.** How many data points to buffer before forcing a transfer to Elasticsearch. Defaults to `1024`. + username | String | **Optional.** Basic auth username if Elasticsearch is hidden behind an HTTP proxy. + password | String | **Optional.** Basic auth password if Elasticsearch is hidden behind an HTTP proxy. + enable\_tls | Boolean | **Optional.** Whether to use a TLS stream. Defaults to `false`. Requires an HTTP proxy. + insecure\_noverify | Boolean | **Optional.** Disable TLS peer verification. + ca\_path | String | **Optional.** Path to CA certificate to validate the remote host. Requires `enable_tls` set to `true`. + cert\_path | String | **Optional.** Path to host certificate to present to the remote host for mutual verification. Requires `enable_tls` set to `true`. + key\_path | String | **Optional.** Path to host key to accompany the cert\_path. Requires `enable_tls` set to `true`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + +Note: If `flush_threshold` is set too low, this will force the feature to flush all data to Elasticsearch too often. +Experiment with the setting, if you are processing more than 1024 metrics per second or similar. + +Basic auth is supported with the `username` and `password` attributes. This requires an +HTTP proxy (Nginx, etc.) in front of the Elasticsearch instance. Check [this blogpost](https://blog.netways.de/2017/09/14/secure-elasticsearch-and-kibana-with-an-nginx-http-proxy/) +for an example. + +TLS for the HTTP proxy can be enabled with `enable_tls`. In addition to that +you can specify the certificates with the `ca_path`, `cert_path` and `cert_key` attributes. + +### ExternalCommandListener + +Implements the Icinga 1.x command pipe which can be used to send commands to Icinga. +This configuration object is available as [command feature](14-features.md#external-commands). + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Example: + +``` +object ExternalCommandListener "command" { + command_path = "/var/run/icinga2/cmd/icinga2.cmd" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + command\_path | String | **Optional.** Path to the command pipe. Defaults to RunDir + "/icinga2/cmd/icinga2.cmd". + + + +### FileLogger + +Specifies Icinga 2 logging to a file. +This configuration object is available as `mainlog` and `debuglog` [logging feature](14-features.md#logging). + +Example: + +``` +object FileLogger "debug-file" { + severity = "debug" + path = "/var/log/icinga2/debug.log" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + path | String | **Required.** The log path. + severity | String | **Optional.** The minimum severity for this log. Can be "debug", "notice", "information", "warning" or "critical". Defaults to "information". + + +### GelfWriter + +Writes event log entries to a defined GELF receiver host (Graylog, Logstash). +This configuration object is available as [gelf feature](14-features.md#gelfwriter). + +Example: + +``` +object GelfWriter "gelf" { + host = "127.0.0.1" + port = 12201 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** GELF receiver host address. Defaults to `127.0.0.1`. + port | Number | **Optional.** GELF receiver port. Defaults to `12201`. + source | String | **Optional.** Source name for this instance. Defaults to `icinga2`. + enable\_send\_perfdata | Boolean | **Optional.** Enable performance data for 'CHECK RESULT' events. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + enable\_tls | Boolean | **Optional.** Whether to use a TLS stream. Defaults to `false`. + insecure\_noverify | Boolean | **Optional.** Disable TLS peer verification. + ca\_path | String | **Optional.** Path to CA certificate to validate the remote host. Requires `enable_tls` set to `true`. + cert\_path | String | **Optional.** Path to host certificate to present to the remote host for mutual verification. Requires `enable_tls` set to `true`. + key\_path | String | **Optional.** Path to host key to accompany the cert\_path. Requires `enable_tls` set to `true`. + +### GraphiteWriter + +Writes check result metrics and performance data to a defined +Graphite Carbon host. +This configuration object is available as [graphite feature](14-features.md#graphite-carbon-cache-writer). + +Example: + +``` +object GraphiteWriter "graphite" { + host = "127.0.0.1" + port = 2003 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** Graphite Carbon host address. Defaults to `127.0.0.1`. + port | Number | **Optional.** Graphite Carbon port. Defaults to `2003`. + host\_name\_template | String | **Optional.** Metric prefix for host name. Defaults to `icinga2.$host.name$.host.$host.check_command$`. + service\_name\_template | String | **Optional.** Metric prefix for service name. Defaults to `icinga2.$host.name$.services.$service.name$.$service.check_command$`. + enable\_send\_thresholds | Boolean | **Optional.** Send additional threshold metrics. Defaults to `false`. + enable\_send\_metadata | Boolean | **Optional.** Send additional metadata metrics. Defaults to `false`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + +Additional usage examples can be found [here](14-features.md#graphite-carbon-cache-writer). + + +### IcingaApplication + +The IcingaApplication object is required to start Icinga 2. +The object name must be `app`. If the object configuration +is missing, Icinga 2 will automatically create an IcingaApplication +object. + +Example: + +``` +object IcingaApplication "app" { + enable_perfdata = false +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + enable\_notifications | Boolean | **Optional.** Whether notifications are globally enabled. Defaults to true. + enable\_event\_handlers | Boolean | **Optional.** Whether event handlers are globally enabled. Defaults to true. + enable\_flapping | Boolean | **Optional.** Whether flap detection is globally enabled. Defaults to true. + enable\_host\_checks | Boolean | **Optional.** Whether active host checks are globally enabled. Defaults to true. + enable\_service\_checks | Boolean | **Optional.** Whether active service checks are globally enabled. Defaults to true. + enable\_perfdata | Boolean | **Optional.** Whether performance data processing is globally enabled. Defaults to true. + vars | Dictionary | **Optional.** A dictionary containing custom variables that are available globally. + environment | String | **Optional.** Specify the Icinga environment. This overrides the `Environment` constant specified in the configuration or on the CLI with `--define`. Defaults to empty. + + +### IcingaDB + +The `IcingaDB` object implements the [Icinga DB feature](14-features.md#icinga-db). + +Example: + +``` +object IcingaDB "icingadb" { + //host = "127.0.0.1" + //port = 6380 + //password = "xxx" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** Redis host. Defaults to `127.0.0.1`. + port | Number | **Optional.** Redis port. Defaults to `6380` since the Redis server provided by the `icingadb-redis` package listens on that port. + path | String | **Optional.** Redis unix socket path. Can be used instead of `host` and `port` attributes. + password | String | **Optional.** Redis auth password. + enable\_tls | Boolean | **Optional.** Whether to use TLS. + cert\_path | String | **Optional.** Path to the certificate. + key\_path | String | **Optional.** Path to the private key. + ca\_path | String | **Optional.** Path to the CA certificate to use instead of the system's root CAs. + crl\_path | String | **Optional.** Path to the CRL file. + cipher\_list | String | **Optional.** Cipher list that is allowed. For a list of available ciphers run `openssl ciphers`. Defaults to `ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384:AES128-GCM-SHA256`. + tls\_protocolmin | String | **Optional.** Minimum TLS protocol version. Defaults to `TLSv1.2`. + insecure\_noverify | Boolean | **Optional.** Whether not to verify the peer. + connect\_timeout | Number | **Optional.** Timeout for establishing new connections. Within this time, the TCP, TLS (if enabled) and Redis handshakes must complete. Defaults to `15s`. + +### IdoMySqlConnection + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +IDO database adapter for MySQL. +This configuration object is available as [ido-mysql feature](14-features.md#db-ido). + +Example: + +``` +object IdoMysqlConnection "mysql-ido" { + host = "127.0.0.1" + port = 3306 + user = "icinga" + password = "icinga" + database = "icinga" + + cleanup = { + downtimehistory_age = 48h + contactnotifications_age = 31d + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** MySQL database host address. Defaults to `localhost`. + port | Number | **Optional.** MySQL database port. Defaults to `3306`. + socket\_path | String | **Optional.** MySQL socket path. + user | String | **Optional.** MySQL database user with read/write permission to the icinga database. Defaults to `icinga`. + password | String | **Optional.** MySQL database user's password. Defaults to `icinga`. + database | String | **Optional.** MySQL database name. Defaults to `icinga`. + enable\_ssl | Boolean | **Optional.** Use SSL. Defaults to false. Change to `true` in case you want to use any of the SSL options. + ssl\_key | String | **Optional.** MySQL SSL client key file path. + ssl\_cert | String | **Optional.** MySQL SSL certificate file path. + ssl\_ca | String | **Optional.** MySQL SSL certificate authority certificate file path. + ssl\_capath | String | **Optional.** MySQL SSL trusted SSL CA certificates in PEM format directory path. + ssl\_cipher | String | **Optional.** MySQL SSL list of allowed ciphers. + table\_prefix | String | **Optional.** MySQL database table prefix. Defaults to `icinga_`. + instance\_name | String | **Optional.** Unique identifier for the local Icinga 2 instance, used for multiple Icinga 2 clusters writing to the same database. Defaults to `default`. + instance\_description | String | **Optional.** Description for the Icinga 2 instance. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`. + failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`. + cleanup | Dictionary | **Optional.** Dictionary with items for historical table cleanup. + categories | Array | **Optional.** Array of information types that should be written to the database. + +Cleanup Items: + + Name | Type | Description + --------------------------------|-----------------------|---------------------------------- + acknowledgements\_age | Duration | **Optional.** Max age for acknowledgements table rows (entry\_time). Defaults to 0 (never). + commenthistory\_age | Duration | **Optional.** Max age for commenthistory table rows (entry\_time). Defaults to 0 (never). + contactnotifications\_age | Duration | **Optional.** Max age for contactnotifications table rows (start\_time). Defaults to 0 (never). + contactnotificationmethods\_age | Duration | **Optional.** Max age for contactnotificationmethods table rows (start\_time). Defaults to 0 (never). + downtimehistory\_age | Duration | **Optional.** Max age for downtimehistory table rows (entry\_time). Defaults to 0 (never). + eventhandlers\_age | Duration | **Optional.** Max age for eventhandlers table rows (start\_time). Defaults to 0 (never). + externalcommands\_age | Duration | **Optional.** Max age for externalcommands table rows (entry\_time). Defaults to 0 (never). + flappinghistory\_age | Duration | **Optional.** Max age for flappinghistory table rows (event\_time). Defaults to 0 (never). + hostchecks\_age | Duration | **Optional.** Max age for hostchecks table rows (start\_time). Defaults to 0 (never). + logentries\_age | Duration | **Optional.** Max age for logentries table rows (logentry\_time). Defaults to 0 (never). + notifications\_age | Duration | **Optional.** Max age for notifications table rows (start\_time). Defaults to 0 (never). + processevents\_age | Duration | **Optional.** Max age for processevents table rows (event\_time). Defaults to 0 (never). + statehistory\_age | Duration | **Optional.** Max age for statehistory table rows (state\_time). Defaults to 0 (never). + servicechecks\_age | Duration | **Optional.** Max age for servicechecks table rows (start\_time). Defaults to 0 (never). + systemcommands\_age | Duration | **Optional.** Max age for systemcommands table rows (start\_time). Defaults to 0 (never). + +Data Categories: + + Name | Description | Required by + ---------------------|------------------------|-------------------- + DbCatConfig | Configuration data | Icinga Web 2 + DbCatState | Current state data | Icinga Web 2 + DbCatAcknowledgement | Acknowledgements | Icinga Web 2 + DbCatComment | Comments | Icinga Web 2 + DbCatDowntime | Downtimes | Icinga Web 2 + DbCatEventHandler | Event handler data | Icinga Web 2 + DbCatExternalCommand | External commands | -- + DbCatFlapping | Flap detection data | Icinga Web 2 + DbCatCheck | Check results | -- + DbCatLog | Log messages | -- + DbCatNotification | Notifications | Icinga Web 2 + DbCatProgramStatus | Program status data | Icinga Web 2 + DbCatRetention | Retention data | Icinga Web 2 + DbCatStateHistory | Historical state data | Icinga Web 2 + +The default value for `categories` includes everything required +by Icinga Web 2 in the table above. + +In addition to the category flags listed above the `DbCatEverything` +flag may be used as a shortcut for listing all flags. + +Runtime Attributes: + + Name | Type | Description + ----------------------------|-----------------------|----------------- + last\_failover | Timestamp | When the last failover happened for this connection (only available with `enable_ha = true`. + +### IdoPgsqlConnection + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +IDO database adapter for PostgreSQL. +This configuration object is available as [ido-pgsql feature](14-features.md#db-ido). + +Example: + +``` +object IdoPgsqlConnection "pgsql-ido" { + host = "127.0.0.1" + port = 5432 + user = "icinga" + password = "icinga" + database = "icinga" + + cleanup = { + downtimehistory_age = 48h + contactnotifications_age = 31d + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** PostgreSQL database host address. Defaults to `localhost`. + port | Number | **Optional.** PostgreSQL database port. Defaults to `5432`. + user | String | **Optional.** PostgreSQL database user with read/write permission to the icinga database. Defaults to `icinga`. + password | String | **Optional.** PostgreSQL database user's password. Defaults to `icinga`. + database | String | **Optional.** PostgreSQL database name. Defaults to `icinga`. + ssl\_mode | String | **Optional.** Enable SSL connection mode. Value must be set according to the [sslmode setting](https://www.postgresql.org/docs/9.3/static/libpq-connect.html#LIBPQ-CONNSTRING): `prefer`, `require`, `verify-ca`, `verify-full`, `allow`, `disable`. + ssl\_key | String | **Optional.** PostgreSQL SSL client key file path. + ssl\_cert | String | **Optional.** PostgreSQL SSL certificate file path. + ssl\_ca | String | **Optional.** PostgreSQL SSL certificate authority certificate file path. + table\_prefix | String | **Optional.** PostgreSQL database table prefix. Defaults to `icinga_`. + instance\_name | String | **Optional.** Unique identifier for the local Icinga 2 instance, used for multiple Icinga 2 clusters writing to the same database. Defaults to `default`. + instance\_description | String | **Optional.** Description for the Icinga 2 instance. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`. + failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`. + cleanup | Dictionary | **Optional.** Dictionary with items for historical table cleanup. + categories | Array | **Optional.** Array of information types that should be written to the database. + +Cleanup Items: + + Name | Type | Description + --------------------------------|-----------------------|---------------------------------- + acknowledgements\_age | Duration | **Optional.** Max age for acknowledgements table rows (entry\_time). Defaults to 0 (never). + commenthistory\_age | Duration | **Optional.** Max age for commenthistory table rows (entry\_time). Defaults to 0 (never). + contactnotifications\_age | Duration | **Optional.** Max age for contactnotifications table rows (start\_time). Defaults to 0 (never). + contactnotificationmethods\_age | Duration | **Optional.** Max age for contactnotificationmethods table rows (start\_time). Defaults to 0 (never). + downtimehistory\_age | Duration | **Optional.** Max age for downtimehistory table rows (entry\_time). Defaults to 0 (never). + eventhandlers\_age | Duration | **Optional.** Max age for eventhandlers table rows (start\_time). Defaults to 0 (never). + externalcommands\_age | Duration | **Optional.** Max age for externalcommands table rows (entry\_time). Defaults to 0 (never). + flappinghistory\_age | Duration | **Optional.** Max age for flappinghistory table rows (event\_time). Defaults to 0 (never). + hostchecks\_age | Duration | **Optional.** Max age for hostchecks table rows (start\_time). Defaults to 0 (never). + logentries\_age | Duration | **Optional.** Max age for logentries table rows (logentry\_time). Defaults to 0 (never). + notifications\_age | Duration | **Optional.** Max age for notifications table rows (start\_time). Defaults to 0 (never). + processevents\_age | Duration | **Optional.** Max age for processevents table rows (event\_time). Defaults to 0 (never). + statehistory\_age | Duration | **Optional.** Max age for statehistory table rows (state\_time). Defaults to 0 (never). + servicechecks\_age | Duration | **Optional.** Max age for servicechecks table rows (start\_time). Defaults to 0 (never). + systemcommands\_age | Duration | **Optional.** Max age for systemcommands table rows (start\_time). Defaults to 0 (never). + +Data Categories: + + Name | Description | Required by + ---------------------|------------------------|-------------------- + DbCatConfig | Configuration data | Icinga Web 2 + DbCatState | Current state data | Icinga Web 2 + DbCatAcknowledgement | Acknowledgements | Icinga Web 2 + DbCatComment | Comments | Icinga Web 2 + DbCatDowntime | Downtimes | Icinga Web 2 + DbCatEventHandler | Event handler data | Icinga Web 2 + DbCatExternalCommand | External commands | -- + DbCatFlapping | Flap detection data | Icinga Web 2 + DbCatCheck | Check results | -- + DbCatLog | Log messages | -- + DbCatNotification | Notifications | Icinga Web 2 + DbCatProgramStatus | Program status data | Icinga Web 2 + DbCatRetention | Retention data | Icinga Web 2 + DbCatStateHistory | Historical state data | Icinga Web 2 + +The default value for `categories` includes everything required +by Icinga Web 2 in the table above. + +In addition to the category flags listed above the `DbCatEverything` +flag may be used as a shortcut for listing all flags. + +Runtime Attributes: + + Name | Type | Description + ----------------------------|-----------------------|----------------- + last\_failover | Timestamp | When the last failover happened for this connection (only available with `enable_ha = true`. + +### InfluxdbWriter + +Writes check result metrics and performance data to a defined InfluxDB v1 host. +This configuration object is available as [influxdb feature](14-features.md#influxdb-writer). +For InfluxDB v2 support see the [Influxdb2Writer](#objecttype-influxdb2writer) below. + +Example: + +``` +object InfluxdbWriter "influxdb" { + host = "127.0.0.1" + port = 8086 + database = "icinga2" + username = "icinga2" + password = "icinga2" + + basic_auth = { + username = "icinga" + password = "icinga" + } + + flush_threshold = 1024 + flush_interval = 10s + + host_template = { + measurement = "$host.check_command$" + tags = { + hostname = "$host.name$" + } + } + service_template = { + measurement = "$service.check_command$" + tags = { + hostname = "$host.name$" + service = "$service.name$" + } + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Required.** InfluxDB host address. Defaults to `127.0.0.1`. + port | Number | **Required.** InfluxDB HTTP port. Defaults to `8086`. + database | String | **Required.** InfluxDB database name. Defaults to `icinga2`. + username | String | **Optional.** InfluxDB user name. Defaults to `none`. + password | String | **Optional.** InfluxDB user password. Defaults to `none`. + basic\_auth | Dictionary | **Optional.** Username and password for HTTP basic authentication. + ssl\_enable | Boolean | **Optional.** Whether to use a TLS stream. Defaults to `false`. + ssl\_insecure\_noverify | Boolean | **Optional.** Disable TLS peer verification. + ssl\_ca\_cert | String | **Optional.** Path to CA certificate to validate the remote host. + ssl\_cert | String | **Optional.** Path to host certificate to present to the remote host for mutual verification. + ssl\_key | String | **Optional.** Path to host key to accompany the ssl\_cert. + host\_template | Dictionary | **Required.** Host template to define the InfluxDB line protocol. + service\_template | Dictionary | **Required.** Service template to define the influxDB line protocol. + enable\_send\_thresholds | Boolean | **Optional.** Whether to send warn, crit, min & max tagged data. + enable\_send\_metadata | Boolean | **Optional.** Whether to send check metadata e.g. states, execution time, latency etc. + flush\_interval | Duration | **Optional.** How long to buffer data points before transferring to InfluxDB. Defaults to `10s`. + flush\_threshold | Number | **Optional.** How many data points to buffer before forcing a transfer to InfluxDB. Defaults to `1024`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + +Note: If `flush_threshold` is set too low, this will always force the feature to flush all data +to InfluxDB. Experiment with the setting, if you are processing more than 1024 metrics per second +or similar. + + + +### Influxdb2Writer + +Writes check result metrics and performance data to a defined InfluxDB v2 host. +This configuration object is available as [influxdb feature](14-features.md#influxdb-writer). +For InfluxDB v1 support see the [InfluxdbWriter](#objecttype-influxdbwriter) above. + +Example: + +``` +object Influxdb2Writer "influxdb2" { + host = "127.0.0.1" + port = 8086 + organization = "monitoring" + bucket = "icinga2" + auth_token = "ABCDEvwxyz0189-_" + + flush_threshold = 1024 + flush_interval = 10s + + host_template = { + measurement = "$host.check_command$" + tags = { + hostname = "$host.name$" + } + } + service_template = { + measurement = "$service.check_command$" + tags = { + hostname = "$host.name$" + service = "$service.name$" + } + } +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Required.** InfluxDB host address. Defaults to `127.0.0.1`. + port | Number | **Required.** InfluxDB HTTP port. Defaults to `8086`. + organization | String | **Required.** InfluxDB organization name. + bucket | String | **Required.** InfluxDB bucket name. + auth\_token | String | **Required.** InfluxDB authentication token. + ssl\_enable | Boolean | **Optional.** Whether to use a TLS stream. Defaults to `false`. + ssl\_insecure\_noverify | Boolean | **Optional.** Disable TLS peer verification. + ssl\_ca\_cert | String | **Optional.** Path to CA certificate to validate the remote host. + ssl\_cert | String | **Optional.** Path to host certificate to present to the remote host for mutual verification. + ssl\_key | String | **Optional.** Path to host key to accompany the ssl\_cert. + host\_template | Dictionary | **Required.** Host template to define the InfluxDB line protocol. + service\_template | Dictionary | **Required.** Service template to define the influxDB line protocol. + enable\_send\_thresholds | Boolean | **Optional.** Whether to send warn, crit, min & max tagged data. + enable\_send\_metadata | Boolean | **Optional.** Whether to send check metadata e.g. states, execution time, latency etc. + flush\_interval | Duration | **Optional.** How long to buffer data points before transferring to InfluxDB. Defaults to `10s`. + flush\_threshold | Number | **Optional.** How many data points to buffer before forcing a transfer to InfluxDB. Defaults to `1024`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + +Note: If `flush_threshold` is set too low, this will always force the feature to flush all data +to InfluxDB. Experiment with the setting, if you are processing more than 1024 metrics per second +or similar. + + + +### LiveStatusListener + +Livestatus API interface available as TCP or UNIX socket. Historical table queries +require the [CompatLogger](09-object-types.md#objecttype-compatlogger) feature enabled +pointing to the log files using the `compat_log_path` configuration attribute. +This configuration object is available as [livestatus feature](14-features.md#setting-up-livestatus). + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Examples: + +``` +object LivestatusListener "livestatus-tcp" { + socket_type = "tcp" + bind_host = "127.0.0.1" + bind_port = "6558" +} + +object LivestatusListener "livestatus-unix" { + socket_type = "unix" + socket_path = "/var/run/icinga2/cmd/livestatus" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + socket\_type | String | **Optional.** Specifies the socket type. Can be either `tcp` or `unix`. Defaults to `unix`. + bind\_host | String | **Optional.** Only valid when `socket_type` is set to `tcp`. Host address to listen on for connections. Defaults to `127.0.0.1`. + bind\_port | Number | **Optional.** Only valid when `socket_type` is set to `tcp`. Port to listen on for connections. Defaults to `6558`. + socket\_path | String | **Optional.** Only valid when `socket_type` is set to `unix`. Specifies the path to the UNIX socket file. Defaults to RunDir + "/icinga2/cmd/livestatus". + compat\_log\_path | String | **Optional.** Path to Icinga 1.x log files. Required for historical table queries. Requires `CompatLogger` feature enabled. Defaults to LogDir + "/compat" + +> **Note** +> +> UNIX sockets are not supported on Windows. + +### NotificationComponent + +The notification component is responsible for sending notifications. +This configuration object is available as [notification feature](11-cli-commands.md#cli-command-feature). + +Example: + +``` +object NotificationComponent "notification" { } +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-notifications). Disabling this currently only affects reminder notifications. Defaults to "true". + +### OpenTsdbWriter + +Writes check result metrics and performance data to [OpenTSDB](http://opentsdb.net). +This configuration object is available as [opentsdb feature](14-features.md#opentsdb-writer). + +Example: + +``` +object OpenTsdbWriter "opentsdb" { + host = "127.0.0.1" + port = 4242 +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host | String | **Optional.** OpenTSDB host address. Defaults to `127.0.0.1`. + port | Number | **Optional.** OpenTSDB port. Defaults to `4242`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + enable_generic_metrics | Boolean | **Optional.** Re-use metric names to store different perfdata values for a particular check. Use tags to distinguish perfdata instead of metric name. Defaults to `false`. + host_template | Dictionary | **Optional.** Specify additional tags to be included with host metrics. This requires a sub-dictionary named `tags`. Also specify a naming prefix by setting `metric`. More information can be found in [OpenTSDB custom tags](14-features.md#opentsdb-custom-tags) and [OpenTSDB Metric Prefix](14-features.md#opentsdb-metric-prefix). More information can be found in [OpenTSDB custom tags](14-features.md#opentsdb-custom-tags). Defaults to an `empty Dictionary`. + service_template | Dictionary | **Optional.** Specify additional tags to be included with service metrics. This requires a sub-dictionary named `tags`. Also specify a naming prefix by setting `metric`. More information can be found in [OpenTSDB custom tags](14-features.md#opentsdb-custom-tags) and [OpenTSDB Metric Prefix](14-features.md#opentsdb-metric-prefix). Defaults to an `empty Dictionary`. + + +### PerfdataWriter + +Writes check result performance data to a defined path using macro +pattern consisting of custom variables and runtime macros. +This configuration object is available as [perfdata feature](14-features.md#writing-performance-data-files). + +Example: + +``` +object PerfdataWriter "perfdata" { + host_perfdata_path = "/var/spool/icinga2/perfdata/host-perfdata" + + service_perfdata_path = "/var/spool/icinga2/perfdata/service-perfdata" + + host_format_template = "DATATYPE::HOSTPERFDATA\tTIMET::$icinga.timet$\tHOSTNAME::$host.name$\tHOSTPERFDATA::$host.perfdata$\tHOSTCHECKCOMMAND::$host.check_command$\tHOSTSTATE::$host.state$\tHOSTSTATETYPE::$host.state_type$" + service_format_template = "DATATYPE::SERVICEPERFDATA\tTIMET::$icinga.timet$\tHOSTNAME::$host.name$\tSERVICEDESC::$service.name$\tSERVICEPERFDATA::$service.perfdata$\tSERVICECHECKCOMMAND::$service.check_command$\tHOSTSTATE::$host.state$\tHOSTSTATETYPE::$host.state_type$\tSERVICESTATE::$service.state$\tSERVICESTATETYPE::$service.state_type$" + + rotation_interval = 15s +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + host\_perfdata\_path | String | **Optional.** Path to the host performance data file. Defaults to SpoolDir + "/perfdata/host-perfdata". + service\_perfdata\_path | String | **Optional.** Path to the service performance data file. Defaults to SpoolDir + "/perfdata/service-perfdata". + host\_temp\_path | String | **Optional.** Path to the temporary host file. Defaults to SpoolDir + "/tmp/host-perfdata". + service\_temp\_path | String | **Optional.** Path to the temporary service file. Defaults to SpoolDir + "/tmp/service-perfdata". + host\_format\_template | String | **Optional.** Host Format template for the performance data file. Defaults to a template that's suitable for use with PNP4Nagios. + service\_format\_template | String | **Optional.** Service Format template for the performance data file. Defaults to a template that's suitable for use with PNP4Nagios. + rotation\_interval | Duration | **Optional.** Rotation interval for the files specified in `{host,service}_perfdata_path`. Defaults to `30s`. + enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-features). Defaults to `false`. + +When rotating the performance data file the current UNIX timestamp is appended to the path specified +in `host_perfdata_path` and `service_perfdata_path` to generate a unique filename. + + +### StatusDataWriter + +Periodically writes status and configuration data files which are used by third-party tools. +This configuration object is available as [statusdata feature](14-features.md#status-data). + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Example: + +``` +object StatusDataWriter "status" { + status_path = "/var/cache/icinga2/status.dat" + objects_path = "/var/cache/icinga2/objects.cache" + update_interval = 30s +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + status\_path | String | **Optional.** Path to the `status.dat` file. Defaults to CacheDir + "/status.dat". + objects\_path | String | **Optional.** Path to the `objects.cache` file. Defaults to CacheDir + "/objects.cache". + update\_interval | Duration | **Optional.** The interval in which the status files are updated. Defaults to `15s`. + +### SyslogLogger + +Specifies Icinga 2 logging to syslog. +This configuration object is available as `syslog` [logging feature](14-features.md#logging). + +Example: + +``` +object SyslogLogger "syslog" { + severity = "warning" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + severity | String | **Optional.** The minimum severity for this log. Can be "debug", "notice", "information", "warning" or "critical". Defaults to "information". + facility | String | **Optional.** Defines the facility to use for syslog entries. This can be a facility constant like `FacilityDaemon`. Defaults to `FacilityUser`. + +Facility Constants: + + Name | Facility | Description + ---------------------|---------------|---------------- + FacilityAuth | LOG\_AUTH | The authorization system. + FacilityAuthPriv | LOG\_AUTHPRIV | The same as `FacilityAuth`, but logged to a file readable only by selected individuals. + FacilityCron | LOG\_CRON | The cron daemon. + FacilityDaemon | LOG\_DAEMON | System daemons that are not provided for explicitly by other facilities. + FacilityFtp | LOG\_FTP | The file transfer protocol daemons. + FacilityKern | LOG\_KERN | Messages generated by the kernel. These cannot be generated by any user processes. + FacilityLocal0 | LOG\_LOCAL0 | Reserved for local use. + FacilityLocal1 | LOG\_LOCAL1 | Reserved for local use. + FacilityLocal2 | LOG\_LOCAL2 | Reserved for local use. + FacilityLocal3 | LOG\_LOCAL3 | Reserved for local use. + FacilityLocal4 | LOG\_LOCAL4 | Reserved for local use. + FacilityLocal5 | LOG\_LOCAL5 | Reserved for local use. + FacilityLocal6 | LOG\_LOCAL6 | Reserved for local use. + FacilityLocal7 | LOG\_LOCAL7 | Reserved for local use. + FacilityLpr | LOG\_LPR | The line printer spooling system. + FacilityMail | LOG\_MAIL | The mail system. + FacilityNews | LOG\_NEWS | The network news system. + FacilitySyslog | LOG\_SYSLOG | Messages generated internally by syslogd. + FacilityUser | LOG\_USER | Messages generated by user processes. This is the default facility identifier if none is specified. + FacilityUucp | LOG\_UUCP | The UUCP system. + + +### WindowsEventLogLogger + +Specifies Icinga 2 logging to the Windows Event Log. +This configuration object is available as `windowseventlog` [logging feature](14-features.md#logging). + +Example: + +``` +object WindowsEventLogLogger "windowseventlog" { + severity = "information" +} +``` + +Configuration Attributes: + + Name | Type | Description + --------------------------|-----------------------|---------------------------------- + severity | String | **Optional.** The minimum severity for this log. Can be "debug", "notice", "information", "warning" or "critical". Defaults to "information". diff --git a/doc/10-icinga-template-library.md b/doc/10-icinga-template-library.md new file mode 100644 index 0000000..0cc2749 --- /dev/null +++ b/doc/10-icinga-template-library.md @@ -0,0 +1,6022 @@ +# Icinga Template Library + +The Icinga Template Library (ITL) implements standard templates +and object definitions. + +There is a subset of templates and object definitions available: + +* [Generic ITL templates](10-icinga-template-library.md#itl-generic-templates) +* [CheckCommand definitions for Icinga 2](10-icinga-template-library.md#itl-check-commands) (this includes [icinga](10-icinga-template-library.md#itl-icinga), +[cluster](10-icinga-template-library.md#itl-icinga-cluster), [cluster-zone](10-icinga-template-library.md#itl-icinga-cluster-zone), [ido](10-icinga-template-library.md#itl-icinga-ido), etc.) +* [CheckCommand definitions for Monitoring Plugins](10-icinga-template-library.md#plugin-check-commands-monitoring-plugins) +* [CheckCommand definitions for Icinga 2 Windows Plugins](10-icinga-template-library.md#windows-plugins) +* [CheckCommand definitions for NSClient++](10-icinga-template-library.md#nscp-plugin-check-commands) +* [CheckCommand definitions for Manubulon SNMP](10-icinga-template-library.md#snmp-manubulon-plugin-check-commands) +* [Contributed CheckCommand definitions](10-icinga-template-library.md#plugin-contrib) + +The ITL content is updated with new releases. Please do not modify +templates and/or objects as changes will be overridden without +further notice. + +You are advised to create your own CheckCommand definitions in +`/etc/icinga2`. + +## Generic Templates + +By default the generic templates are included in the [icinga2.conf](04-configuration.md#icinga2-conf) configuration file: + +``` +include +``` + +These templates are imported by the provided example configuration. + +> **Note**: +> +> These templates are built into the binaries. By convention +> all command and timeperiod objects should import these templates. + +### plugin-check-command + +Command template for check plugins executed by Icinga 2. + +The `plugin-check-command` command does not support any vars. + +By default this template is automatically imported into all [CheckCommand](09-object-types.md#objecttype-checkcommand) definitions. + +### plugin-notification-command + +Command template for notification scripts executed by Icinga 2. + +The `plugin-notification-command` command does not support any vars. + +By default this template is automatically imported into all [NotificationCommand](09-object-types.md#objecttype-notificationcommand) definitions. + +### plugin-event-command + +Command template for event handler scripts executed by Icinga 2. + +The `plugin-event-command` command does not support any vars. + +By default this template is automatically imported into all [EventCommand](09-object-types.md#objecttype-eventcommand) definitions. + +### legacy-timeperiod + +Timeperiod template for [TimePeriod objects](09-object-types.md#objecttype-timeperiod). + +The `legacy-timeperiod` timeperiod does not support any vars. + +By default this template is automatically imported into all [TimePeriod](09-object-types.md#objecttype-timeperiod) definitions. + +## Check Commands + +These check commands are embedded into Icinga 2 and do not require any external +plugin scripts. + +### icinga + +Check command for the built-in `icinga` check. This check returns performance +data for the current Icinga instance, reports as warning if the last reload failed and optionally allows for minimum version checks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------|--------------- +icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only. + +### cluster + +Check command for the built-in `cluster` check. This check returns performance +data for the current Icinga instance and connected endpoints. + +The `cluster` check command does not support any vars. + +### cluster-zone + +Check command for the built-in `cluster-zone` check. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------|--------------- +cluster\_zone | **Required.** The zone name. Defaults to `$host.name$`. +cluster\_lag\_warning | **Optional.** Warning threshold for log lag in seconds. Applies if the log lag is greater than the threshold. +cluster\_lag\_critical | **Optional.** Critical threshold for log lag in seconds. Applies if the log lag is greater than the threshold. + +### icingadb + +Check command for the built-in `icingadb` check. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------------------|----------------------------- +icingadb\_name | **Required.** The name of the Icinga DB connection object. Defaults to `icingadb`. +icingadb\_full\_dump\_duration\_warning | **Optional.** Warning threshold for ongoing Redis dump duration. Applies if the value is higher than the threshold. Defaults to 5 minutes. +icingadb\_full\_dump\_duration\_critical | **Optional.** Critical threshold for ongoing Redis dump duration. Applies if the value is higher than the threshold. Defaults to 10 minutes. +icingadb\_full\_sync\_duration\_warning | **Optional.** Warning threshold for ongoing database sync duration. Applies if the value is higher than the threshold. Defaults to 5 minutes. +icingadb\_full\_sync\_duration\_critical | **Optional.** Critical threshold for ongoing database sync duration. Applies if the value is higher than the threshold. Defaults to 10 minutes. +icingadb\_redis\_backlog\_warning | **Optional.** Warning threshold for Redis write backlog. Applies if the value is higher than the threshold. Defaults to 5 minutes. +icingadb\_redis\_backlog\_critical | **Optional.** Critical threshold for Redis write backlog. Applies if the value is higher than the threshold. Defaults to 15 minutes. +icingadb\_database\_backlog\_warning | **Optional.** Warning threshold for database sync backlog. Applies if the value is higher than the threshold. Defaults to 5 minutes. +icingadb\_database\_backlog\_critical | **Optional.** Critical threshold for database sync backlog. Applies if the value is higher than the threshold. Defaults to 15 minutes. + +### ido + +Check command for the built-in `ido` check. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------- +ido\_type | **Required.** The type of the IDO connection object. Can be either "IdoMysqlConnection" or "IdoPgsqlConnection". +ido\_name | **Required.** The name of the IDO connection object. +ido\_queries\_warning | **Optional.** Warning threshold for queries/s. Applies if the rate is lower than the threshold. +ido\_queries\_critical | **Optional.** Critical threshold for queries/s. Applies if the rate is lower than the threshold. +ido\_pending\_queries\_warning | **Optional.** Warning threshold for pending queries. Applies if pending queries are higher than the threshold. Supersedes the `ido_queries` thresholds above. +ido\_pending\_queries\_critical | **Optional.** Critical threshold for pending queries. Applies if pending queries are higher than the threshold. Supersedes the `ido_queries` thresholds above. + + +### dummy + +Check command for the built-in `dummy` check. This allows to set +a check result state and output and can be used in [freshness checks](08-advanced-topics.md#check-result-freshness) +or [runtime object checks](08-advanced-topics.md#access-object-attributes-at-runtime). +In contrast to the [check_dummy](https://www.monitoring-plugins.org/doc/man/check_dummy.html) +plugin, Icinga 2 implements a light-weight in memory check with 2.9+. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +dummy\_state | **Optional.** The state. Can be one of 0 (ok), 1 (warning), 2 (critical) and 3 (unknown). Defaults to 0. +dummy\_text | **Optional.** Plugin output. Defaults to "Check was successful.". + +### passive + +Specialised check command object for passive checks which uses the functionality of the "dummy" check command with appropriate default values. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +dummy_state | **Optional.** The state. Can be one of 0 (ok), 1 (warning), 2 (critical) and 3 (unknown). Defaults to 3. +dummy_text | **Optional.** Plugin output. Defaults to "No Passive Check Result Received.". + +### random + +Check command for the built-in `random` check. This check returns random states +and adds the check source to the check output. + +For test and demo purposes only. The `random` check command does not support +any vars. + +### exception + +Check command for the built-in `exception` check. This check throws an exception. + +For test and demo purposes only. The `exception` check command does not support +any vars. + +### sleep + +Check command for the built-in `sleep` check. This allows to use sleep for testing +and debugging only. + +Name | Description +----------------|-------------- +sleep\_time | **Optional.** The duration of the sleep in seconds. Defaults to 1s. + + + + +## Plugin Check Commands for Monitoring Plugins + +The Plugin Check Commands provides example configuration for plugin check commands +provided by the [Monitoring Plugins](https://www.monitoring-plugins.org) project. + +By default the Plugin Check Commands are included in the [icinga2.conf](04-configuration.md#icinga2-conf) configuration +file: + + include + +The plugin check commands assume that there's a global constant named `PluginDir` +which contains the path of the plugins from the Monitoring Plugins project. + +> **Note**: +> +> Please be aware that the CheckCommand definitions are based on the [Monitoring Plugins](https://www.monitoring-plugins.org), other Plugin collections might not support +> all parameters. If there are command parameters missing for the provided CheckCommand definitions please kindly send a patch upstream. +> This should include an update for the ITL CheckCommand itself and this documentation section. + +### apt + +The plugin [apt](https://www.monitoring-plugins.org/doc/man/check_apt.html) checks for software updates on systems that use +package management systems based on the apt-get(8) command found in Debian based systems. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +apt_extra_opts | **Optional.** Read options from an ini file. +apt_upgrade | **Optional.** [Default] Perform an upgrade. If an optional OPTS argument is provided, apt-get will be run with these command line options instead of the default. +apt_dist_upgrade | **Optional.** Perform a dist-upgrade instead of normal upgrade. Like with -U OPTS can be provided to override the default options. +apt_include | **Optional.** Include only packages matching REGEXP. Can be specified multiple times the values will be combined together. +apt_exclude | **Optional.** Exclude packages matching REGEXP from the list of packages that would otherwise be included. Can be specified multiple times. +apt_critical | **Optional.** If the full package information of any of the upgradable packages match this REGEXP, the plugin will return CRITICAL status. Can be specified multiple times. +apt_timeout | **Optional.** Seconds before plugin times out (default: 10). +apt_only_critical | **Optional.** Only warn about critical upgrades. +apt_list | **Optional.** List packages available for upgrade. + + +### breeze + +The [check_breeze](https://www.monitoring-plugins.org/doc/man/check_breeze.html) plugin reports the signal +strength of a Breezecom wireless equipment. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------|--------------------------------- +breeze_hostname | **Required.** Name or IP address of host to check. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +breeze_community | **Optional.** SNMPv1 community. Defaults to "public". +breeze_warning | **Required.** Percentage strength below which a WARNING status will result. Defaults to 50. +breeze_critical | **Required.** Percentage strength below which a WARNING status will result. Defaults to 20. + + +### by_ssh + +The [check_by_ssh](https://www.monitoring-plugins.org/doc/man/check_by_ssh.html) plugin uses SSH to execute +commands on a remote host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------- | -------------- +by_ssh_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +by_ssh_port | **Optional.** The SSH port. Defaults to 22. +by_ssh_command | **Required.** The command that should be executed. Can be an array if multiple arguments should be passed to `check_by_ssh`. +by_ssh_arguments | **Optional.** A dictionary with arguments for the command. This works exactly like the 'arguments' dictionary for ordinary CheckCommands. +by_ssh_logname | **Optional.** The SSH username. +by_ssh_identity | **Optional.** The SSH identity. +by_ssh_quiet | **Optional.** Whether to suppress SSH warnings. Defaults to false. +by_ssh_warn | **Optional.** The warning threshold. +by_ssh_crit | **Optional.** The critical threshold. +by_ssh_timeout | **Optional.** The timeout in seconds. +by_ssh_options | **Optional.** Call ssh with '-o OPTION' (multiple options may be specified as an array). +by_ssh_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +by_ssh_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. +by_ssh_skip_stderr | **Optional.** Ignore all or (if specified) first n lines on STDERR. + + +### clamd + +The [check_clamd](https://www.monitoring-plugins.org/doc/man/check_clamd.html) plugin tests CLAMD +connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------|-------------- +clamd_address | **Required.** The host's address or unix socket (must be an absolute path). +clamd_port | **Optional.** Port number (default: none). +clamd_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +clamd_all | **Optional.** All expect strings need to occur in server response. Defaults to false. +clamd_escape_send | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in send string. +clamd_send | **Optional.** String to send to the server. +clamd_escape_quit | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in quit string. +clamd_quit | **Optional.** String to send server to initiate a clean close of the connection. +clamd_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit. Defaults to crit. +clamd_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit. Defaults to warn. +clamd_jail | **Optional.** Hide output from TCP socket. +clamd_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +clamd_delay | **Optional.** Seconds to wait between sending string and polling for response. +clamd_certificate | **Optional.** Minimum number of days a certificate has to be valid. 1st value is number of days for warning, 2nd is critical (if not specified: 0) -- separated by comma. +clamd_ssl | **Optional.** Use SSL for the connection. Defaults to false. +clamd_wtime | **Optional.** Response time to result in warning status (seconds). +clamd_ctime | **Optional.** Response time to result in critical status (seconds). +clamd_timeout | **Optional.** Seconds before connection times out. Defaults to 10. +clamd_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +clamd_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### dhcp + +The [check_dhcp](https://www.monitoring-plugins.org/doc/man/check_dhcp.html) plugin +tests the availability of DHCP servers on a network. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +dhcp_serverip | **Optional.** The IP address of the DHCP server which we should get a response from. +dhcp_requestedip| **Optional.** The IP address which we should be offered by a DHCP server. +dhcp_timeout | **Optional.** The timeout in seconds. +dhcp_interface | **Optional.** The interface to use. +dhcp_mac | **Optional.** The MAC address to use in the DHCP request. +dhcp_unicast | **Optional.** Whether to use unicast requests. Defaults to false. + + +### dig + +The [check_dig](https://www.monitoring-plugins.org/doc/man/check_dig.html) plugin +test the DNS service on the specified host using dig. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------|-------------- +dig_server | **Optional.** The DNS server to query. Defaults to "127.0.0.1". +dig_port | **Optional.** Port number (default: 53). +dig_lookup | **Required.** The address that should be looked up. +dig_record_type | **Optional.** Record type to lookup (default: A). +dig_expected_address | **Optional.** An address expected to be in the answer section. If not set, uses whatever was in -l. +dig_arguments | **Optional.** Pass STRING as argument(s) to dig. +dig_retries | **Optional.** Number of retries passed to dig, timeout is divided by this value (Default: 3). +dig_warning | **Optional.** Response time to result in warning status (seconds). +dig_critical | **Optional.** Response time to result in critical status (seconds). +dig_timeout | **Optional.** Seconds before connection times out (default: 10). +dig_ipv4 | **Optional.** Force dig to only use IPv4 query transport. Defaults to false. +dig_ipv6 | **Optional.** Force dig to only use IPv6 query transport. Defaults to false. + + +### disk + +The [check_disk](https://www.monitoring-plugins.org/doc/man/check_disk.html) plugin +checks the amount of used disk space on a mounted file system and generates an alert +if free space is less than one of the threshold values. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------|------------------------ +disk\_wfree | **Optional.** The free space warning threshold. Defaults to "20%". If the percent sign is omitted, units from `disk_units` are used. +disk\_cfree | **Optional.** The free space critical threshold. Defaults to "10%". If the percent sign is omitted, units from `disk_units` are used. +disk\_inode\_wfree | **Optional.** The free inode warning threshold. +disk\_inode\_cfree | **Optional.** The free inode critical threshold. +disk\_partition | **Optional.** The partition. **Deprecated in 2.3.** +disk\_partition\_excluded | **Optional.** The excluded partition. **Deprecated in 2.3.** +disk\_partitions | **Optional.** The partition(s). Multiple partitions must be defined as array. +disk\_partitions\_excluded | **Optional.** The excluded partition(s). Multiple partitions must be defined as array. +disk\_clear | **Optional.** Clear thresholds. May be true or false. +disk\_exact\_match | **Optional.** For paths or partitions specified with -p, only check for exact paths. May be true or false. +disk\_errors\_only | **Optional.** Display only devices/mountpoints with errors. May be true or false. +disk\_ignore\_reserved | **Optional.** If set, account root-reserved blocks are not accounted for freespace in perfdata. May be true or false. +disk\_group | **Optional.** Group paths. Thresholds apply to (free-)space of all partitions together. +disk\_kilobytes | **Optional.** Same as --units kB. May be true or false. +disk\_local | **Optional.** Only check local filesystems. May be true or false. +disk\_stat\_remote\_fs | **Optional.** Only check local filesystems against thresholds. Yet call stat on remote filesystems to test if they are accessible (e.g. to detect Stale NFS Handles). May be true or false. +disk\_mountpoint | **Optional.** Display the mountpoint instead of the partition. May be true or false. +disk\_megabytes | **Optional.** Same as --units MB. May be true or false. +disk\_all | **Optional.** Explicitly select all paths. This is equivalent to -R '.\*'. May be true or false. +disk\_eregi\_path | **Optional.** Case insensitive regular expression for path/partition. Multiple regular expression strings must be defined as array. +disk\_ereg\_path | **Optional.** Regular expression for path or partition. Multiple regular expression strings must be defined as array. +disk\_ignore\_eregi\_path | **Optional.** Regular expression to ignore selected path/partition (case insensitive). Multiple regular expression strings must be defined as array. +disk\_ignore\_ereg\_path | **Optional.** Regular expression to ignore selected path or partition. Multiple regular expression strings must be defined as array. +disk\_timeout | **Optional.** Seconds before connection times out (default: 10). +disk\_units | **Optional.** Choose bytes, kB, MB, GB, TB (default: MB). +disk\_exclude\_type | **Optional.** Ignore all filesystems of indicated type. Multiple regular expression strings must be defined as array. Defaults to "none", "tmpfs", "sysfs", "proc", "configfs", "devtmpfs", "devfs", "mtmfs", "tracefs", "cgroup", "fuse.gvfsd-fuse", "fuse.gvfs-fuse-daemon", "fdescfs", "overlay", "nsfs", "squashfs". +disk\_include\_type | **Optional.** Check only filesystems of indicated type. Multiple regular expression strings must be defined as array. + +### disk_smb + +The [check_disk_smb](https://www.monitoring-plugins.org/doc/man/check_disk_smb.html) plugin +uses the `smbclient` binary to check SMB shares. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|------------------------ +disk_smb_hostname | **Required.** NetBIOS name of the server. +disk_smb_share | **Required.** Share name being queried. +disk_smb_workgroup | **Optional.** Workgroup or Domain used (defaults to 'WORKGROUP' if omitted). +disk_smb_address | **Optional.** IP address of the host (only necessary if host belongs to another network). +disk_smb_username | **Optional.** Username for server log-in (defaults to 'guest' if omitted). +disk_smb_password | **Optional.** Password for server log-in (defaults to an empty password if omitted). +disk_smb_wused | **Optional.** The used space warning threshold. Defaults to "85%". If the percent sign is omitted, use optional disk units. +disk_smb_cused | **Optional.** The used space critical threshold. Defaults to "95%". If the percent sign is omitted, use optional disk units. +disk_smb_port | **Optional.** Connection port, e.g. `139` or `445`. Defaults to `smbclient` default if omitted. + +### dns + +The [check_dns](https://www.monitoring-plugins.org/doc/man/check_dns.html) plugin +uses the nslookup program to obtain the IP address for the given host/domain query. +An optional DNS server to use may be specified. If no DNS server is specified, the +default server(s) specified in `/etc/resolv.conf` will be used. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------|-------------- +dns_lookup | **Optional.** The hostname or IP to query the DNS for. Defaults to "$host_name$". +dns_server | **Optional.** The DNS server to query. Defaults to the server configured in the OS. +dns_query_type | **Optional.** The DNS record query type where TYPE =(A, AAAA, SRV, TXT, MX, ANY). The default query type is 'A' (IPv4 host entry). **Only supported by the Nagios plugins version of check\_dns, not by the monitoring plugins one.** +dns_expected_answers | **Optional.** The answer(s) to look for. A hostname must end with a dot. Format depends on the monitoring-plugins version: In version 2.2 and before, a single string with the values alphabetically ordered and joined by commas. In version 2.3 and later, multiple answers must be defined as array. +dns_all_expected | **Optional.** Denotes whether to require all values passed in `dns_expected_answers` to pass, or at least one. Only supported in newer versions of monitoring-plugins (2.3 and later), and is needed in such versions to replicate behaviour of previous versions of the plugins. +dns_authoritative | **Optional.** Expect the server to send an authoritative answer. +dns_accept_cname | **Optional.** Accept cname responses as a valid result to a query. +dns_wtime | **Optional.** Return warning if elapsed time exceeds value. +dns_ctime | **Optional.** Return critical if elapsed time exceeds value. +dns_timeout | **Optional.** Seconds before connection times out. Defaults to 10. + + + +### file_age + +The [check_file_age](https://www.monitoring-plugins.org/doc/man/check_file_age.html) plugin +checks a file's size and modification time to make sure it's not empty and that it's sufficiently recent. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------|-------------------------------------------------------------------------------------------------------- +file_age_file | **Required.** File to monitor. +file_age_warning_time | **Optional.** File must be no more than this many seconds old as warning threshold. Defaults to "240s". +file_age_critical_time | **Optional.** File must be no more than this many seconds old as critical threshold. Defaults to "600s". +file_age_warning_size | **Optional.** File must be at least this many bytes long as warning threshold. No default given. +file_age_critical_size | **Optional.** File must be at least this many bytes long as critical threshold. Defaults to "0B". +file_age_ignoremissing | **Optional.** Return OK if the file does not exist. Defaults to false. + + +### flexlm + +The [check_flexlm](https://www.monitoring-plugins.org/doc/man/check_flexlm.html) plugin +checks available flexlm license managers. Requires the `lmstat` command. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------|---------------------------------------------------------- +flexlm_licensefile | **Required.** Name of license file (usually license.dat). +flexlm_timeout | **Optional.** Plugin time out in seconds. Defaults to 15. + + +### fping4 + +The [check_fping](https://www.monitoring-plugins.org/doc/man/check_fping.html) plugin +uses the `fping` command to ping the specified host for a fast check. Note that it is +necessary to set the `suid` flag on `fping`. + +This CheckCommand expects an IPv4 address. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +fping_address | **Optional.** The host's IPv4 address. Defaults to "$address$". +fping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +fping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +fping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +fping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +fping_number | **Optional.** The number of packets to send. Defaults to 5. +fping_interval | **Optional.** The interval between packets in milli-seconds. Defaults to 500. +fping_bytes | **Optional.** The size of ICMP packet. +fping_target_timeout | **Optional.** The target timeout in milli-seconds. +fping_source_ip | **Optional.** The name or ip address of the source ip. +fping_source_interface | **Optional.** The source interface name. + + +### fping6 + +The [check_fping](https://www.monitoring-plugins.org/doc/man/check_fping.html) plugin +will use the `fping` command to ping the specified host for a fast check. Note that it is +necessary to set the `suid` flag on `fping`. + +This CheckCommand expects an IPv6 address. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +fping_address | **Optional.** The host's IPv6 address. Defaults to "$address6$". +fping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +fping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +fping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +fping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +fping_number | **Optional.** The number of packets to send. Defaults to 5. +fping_interval | **Optional.** The interval between packets in milli-seconds. Defaults to 500. +fping_bytes | **Optional.** The size of ICMP packet. +fping_target_timeout | **Optional.** The target timeout in milli-seconds. +fping_source_ip | **Optional.** The name or ip address of the source ip. +fping_source_interface | **Optional.** The source interface name. + + +### ftp + +The [check_ftp](https://www.monitoring-plugins.org/doc/man/check_ftp.html) plugin +tests FTP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------|-------------- +ftp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ftp_port | **Optional.** The FTP port number. +ftp_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +ftp_all | **Optional.** All expect strings need to occur in server response. Defaults to false. +ftp_escape_send | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in send string. +ftp_send | **Optional.** String to send to the server. +ftp_escape_quit | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in quit string. +ftp_quit | **Optional.** String to send server to initiate a clean close of the connection. +ftp_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit. Defaults to crit. +ftp_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit. Defaults to warn. +ftp_jail | **Optional.** Hide output from TCP socket. +ftp_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +ftp_delay | **Optional.** Seconds to wait between sending string and polling for response. +ftp_certificate | **Optional.** Minimum number of days a certificate has to be valid. 1st value is number of days for warning, 2nd is critical (if not specified: 0) -- separated by comma. +ftp_ssl | **Optional.** Use SSL for the connection. Defaults to false. +ftp_wtime | **Optional.** Response time to result in warning status (seconds). +ftp_ctime | **Optional.** Response time to result in critical status (seconds). +ftp_timeout | **Optional.** Seconds before connection times out. Defaults to 10. +ftp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +ftp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### game + +The [check_game](https://www.monitoring-plugins.org/doc/man/check_game.html) plugin +tests game server connections with the specified host. +This plugin uses the 'qstat' command, the popular game server status query tool. +If you don't have the package installed, you will need to [download](http://www.activesw.com/people/steve/qstat.html) +or install the package `quakestat` before you can use this plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------|------------------- +game_game | **Required.** Name of the game. +game_ipaddress | **Required.** Ipaddress of the game server to query. +game_timeout | **Optional.** Seconds before connection times out. Defaults to 10. +game_port | **Optional.** Port to connect to. +game_gamefield | **Optional.** Field number in raw qstat output that contains game name. +game_mapfield | **Optional.** Field number in raw qstat output that contains map name. +game_pingfield | **Optional.** Field number in raw qstat output that contains ping time. +game_gametime | **Optional.** Field number in raw qstat output that contains game time. +game_hostname | **Optional.** Name of the host running the game. + + +### hostalive + +Check command object for the [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) +plugin with host check default values. This variant uses the host's `address` attribute +if available and falls back to using the `address6` attribute if the `address` attribute is not set. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 3000. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 80. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 5000. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 100. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### hostalive4 + +Check command object for the [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) +plugin with host check default values. This variant uses the host's `address` attribute. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's IPv4 address. Defaults to "$address$". +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 3000. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 80. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 5000. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 100. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### hostalive6 + +Check command object for the [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) +plugin with host check default values. This variant uses the host's `address6` attribute. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's IPv6 address. Defaults to "$address6$". +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 3000. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 80. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 5000. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 100. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### hpjd + +The [check_hpjd](https://www.monitoring-plugins.org/doc/man/check_hpjd.html) plugin +tests the state of an HP printer with a JetDirect card. Net-snmp must be installed +on the computer running the plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +hpjd_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +hpjd_port | **Optional.** The host's SNMP port. Defaults to 161. +hpjd_community | **Optional.** The SNMP community. Defaults to "public". + + +### http + +The [check_http](https://www.monitoring-plugins.org/doc/man/check_http.html) plugin +tests the HTTP service on the specified host. It can test normal (http) and secure +(https) servers, follow redirects, search for strings and regular expressions, +check connection times, and report on certificate expiration times. + +The plugin can either test the HTTP response of a server, or if `http_certificate` is set to a non-empty value, the TLS certificate age for a HTTPS host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|--------------------------------- +http_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +http_vhost | **Optional.** The virtual host that should be sent in the "Host" header. +http_uri | **Optional.** The request URI for GET or POST. Defaults to `/`. +http_port | **Optional.** The TCP port. Defaults to 80 when not using SSL, 443 otherwise. +http_ssl | **Optional.** Whether to use SSL. Defaults to false. +http_ssl_force_tlsv1 | **Optional.** Whether to force TLSv1. +http_ssl_force_tlsv1_1 | **Optional.** Whether to force TLSv1.1. +http_ssl_force_tlsv1_2 | **Optional.** Whether to force TLSv1.2. +http_ssl_force_sslv2 | **Optional.** Whether to force SSLv2. +http_ssl_force_sslv3 | **Optional.** Whether to force SSLv3. +http_ssl_force_tlsv1_or_higher | **Optional.** Whether to force TLSv1 or higher. +http_ssl_force_tlsv1_1_or_higher | **Optional.** Whether to force TLSv1.1 or higher. +http_ssl_force_tlsv1_2_or_higher | **Optional.** Whether to force TLSv1.2 or higher. +http_ssl_force_sslv2_or_higher | **Optional.** Whether to force SSLv2 or higher. +http_ssl_force_sslv3_or_higher | **Optional.** Whether to force SSLv3 or higher. +http_sni | **Optional.** Whether to use SNI. Defaults to false. +http_auth_pair | **Optional.** Add 'username:password' authorization pair. +http_proxy_auth_pair | **Optional.** Add 'username:password' authorization pair for proxy. +http_ignore_body | **Optional.** Don't download the body, just the headers. +http_linespan | **Optional.** Allow regex to span newline. +http_expect_body_regex | **Optional.** A regular expression which the body must match against. Incompatible with http_ignore_body. +http_expect_body_eregi | **Optional.** A case-insensitive expression which the body must match against. Incompatible with http_ignore_body. +http_invertregex | **Optional.** Changes behavior of http_expect_body_regex and http_expect_body_eregi to return CRITICAL if found, OK if not. +http_warn_time | **Optional.** The warning threshold. +http_critical_time | **Optional.** The critical threshold. +http_expect | **Optional.** Comma-delimited list of strings, at least one of them is expected in the first (status) line of the server response. Default: HTTP/1. +http_certificate | **Optional.** Minimum number of days a certificate has to be valid. Port defaults to 443. When this option is used the URL is not checked. The first parameter defines the warning threshold (in days), the second parameter the critical threshold (in days). (Example `http_certificate = "30,20"`). +http_clientcert | **Optional.** Name of file contains the client certificate (PEM format). +http_privatekey | **Optional.** Name of file contains the private key (PEM format). +http_headerstring | **Optional.** String to expect in the response headers. +http_string | **Optional.** String to expect in the content. +http_post | **Optional.** URL encoded http POST data. +http_method | **Optional.** Set http method (for example: HEAD, OPTIONS, TRACE, PUT, DELETE). +http_maxage | **Optional.** Warn if document is more than seconds old. +http_contenttype | **Optional.** Specify Content-Type header when POSTing. +http_useragent | **Optional.** String to be sent in http header as User Agent. +http_header | **Optional.** Any other tags to be sent in http header. +http_extendedperfdata | **Optional.** Print additional perfdata. Defaults to false. +http_onredirect | **Optional.** How to handle redirect pages. Possible values: "ok" (default), "warning", "critical", "follow", "sticky" (like follow but stick to address), "stickyport" (like sticky but also to port) +http_pagesize | **Optional.** Minimum page size required:Maximum page size required. +http_timeout | **Optional.** Seconds before connection times out. +http_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +http_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. +http_link | **Optional.** Wrap output in HTML link. Defaults to false. +http_verbose | **Optional.** Show details for command-line debugging. Defaults to false. + + +### icmp + +The [check_icmp](https://www.monitoring-plugins.org/doc/man/check_icmp.html) plugin +check_icmp allows for checking multiple hosts at once compared to `check_ping`. +The main difference is that check_ping executes the system's ping(1) command and +parses its output while `check_icmp` talks ICMP itself. `check_icmp` must be installed with +`setuid` root. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +icmp_address | **Optional.** The host's address. This can either be a single address or an array of addresses. Defaults to "$address$". +icmp_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +icmp_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +icmp_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +icmp_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +icmp_source | **Optional.** The source IP address to send packets from. +icmp_packets | **Optional.** The number of packets to send. Defaults to 5. +icmp_packet_interval | **Optional** The maximum packet interval. Defaults to 80 (milliseconds). +icmp_target_interval | **Optional.** The maximum target interval. +icmp_hosts_alive | **Optional.** The number of hosts which have to be alive for the check to succeed. +icmp_data_bytes | **Optional.** Payload size for each ICMP request. Defaults to 8. +icmp_timeout | **Optional.** The plugin timeout in seconds. Defaults to 10 (seconds). +icmp_ttl | **Optional.** The TTL on outgoing packets. + + +### imap + +The [check_imap](https://www.monitoring-plugins.org/doc/man/check_imap.html) plugin +tests IMAP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------|-------------- +imap_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +imap_port | **Optional.** The port that should be checked. Defaults to 143. +imap_escape | **Optional.** Can use \\n, \\r, \\t or \\ in send or quit string. Must come before send or quit option. Default: nothing added to send, \\r\\n added to end of quit. +imap_send | **Optional.** String to send to the server. +imap_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +imap_all | **Optional.** All expect strings need to occur in server response. Default is any. +imap_quit | **Optional.** String to send server to initiate a clean close of the connection. +imap_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit (default: crit). +imap_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit (default: warn). +imap_jail | **Optional.** Hide output from TCP socket. +imap_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +imap_delay | **Optional.** Seconds to wait between sending string and polling for response. +imap_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +imap_ssl | **Optional.** Use SSL for the connection. +imap_warning | **Optional.** Response time to result in warning status (seconds). +imap_critical | **Optional.** Response time to result in critical status (seconds). +imap_timeout | **Optional.** Seconds before connection times out (default: 10). +imap_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +imap_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### ldap + +The [check_ldap](https://www.monitoring-plugins.org/doc/man/check_ldap.html) plugin +can be used to check LDAP servers. + +The plugin can also be used for monitoring ldaps connections instead of the deprecated `check_ldaps`. +This can be ensured by enabling `ldap_starttls` or `ldap_ssl`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +ldap_address | **Optional.** Host name, IP Address, or unix socket (must be an absolute path). Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ldap_port | **Optional.** Port number. Defaults to 389. +ldap_attr | **Optional.** LDAP attribute to search for (default: "(objectclass=*)") +ldap_base | **Required.** LDAP base (eg. ou=myunit,o=myorg,c=at). +ldap_bind | **Optional.** LDAP bind DN (if required). +ldap_pass | **Optional.** LDAP password (if required). +ldap_starttls | **Optional.** Use STARTSSL mechanism introduced in protocol version 3. +ldap_ssl | **Optional.** Use LDAPS (LDAP v2 SSL method). This also sets the default port to 636. +ldap_v2 | **Optional.** Use LDAP protocol version 2 (enabled by default). +ldap_v3 | **Optional.** Use LDAP protocol version 3 (disabled by default) +ldap_warning | **Optional.** Response time to result in warning status (seconds). +ldap_critical | **Optional.** Response time to result in critical status (seconds). +ldap_warning_entries | **Optional.** Number of found entries to result in warning status. +ldap_critical_entries | **Optional.** Number of found entries to result in critical status. +ldap_timeout | **Optional.** Seconds before connection times out (default: 10). +ldap_verbose | **Optional.** Show details for command-line debugging (disabled by default) + +### load + +The [check_load](https://www.monitoring-plugins.org/doc/man/check_load.html) plugin +tests the current system load average. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +load_wload1 | **Optional.** The 1-minute warning threshold. Defaults to 5. +load_wload5 | **Optional.** The 5-minute warning threshold. Defaults to 4. +load_wload15 | **Optional.** The 15-minute warning threshold. Defaults to 3. +load_cload1 | **Optional.** The 1-minute critical threshold. Defaults to 10. +load_cload5 | **Optional.** The 5-minute critical threshold. Defaults to 6. +load_cload15 | **Optional.** The 15-minute critical threshold. Defaults to 4. +load_percpu | **Optional.** Divide the load averages by the number of CPUs (when possible). Defaults to false. + +### mailq + +The [check_mailq](https://www.monitoring-plugins.org/doc/man/check_mailq.html) plugin +checks the number of messages in the mail queue (supports multiple sendmail queues, qmail). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +mailq_warning | **Required.** Min. number of messages in queue to generate warning. +mailq_critical | **Required.** Min. number of messages in queue to generate critical alert ( w < c ). +mailq_domain_warning | **Optional.** Min. number of messages for same domain in queue to generate warning +mailq_domain_critical | **Optional.** Min. number of messages for same domain in queue to generate critical alert ( W < C ). +mailq_timeout | **Optional.** Plugin timeout in seconds (default = 15). +mailq_servertype | **Optional.** [ sendmail \| qmail \| postfix \| exim \| nullmailer ] (default = autodetect). +mailq_sudo | **Optional.** Use sudo to execute the mailq command. + +### mysql + +The [check_mysql](https://www.monitoring-plugins.org/doc/man/check_mysql.html) plugin +tests connections to a MySQL server. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------------- +mysql_hostname | **Optional.** Host name, IP Address, or unix socket (must be an absolute path). +mysql_port | **Optional.** Port number (default: 3306). +mysql_socket | **Optional.** Use the specified socket (has no effect if `mysql_hostname` is used). +mysql_ignore_auth | **Optional.** Ignore authentication failure and check for mysql connectivity only. +mysql_database | **Optional.** Check database with indicated name. +mysql_file | **Optional.** Read from the specified client options file. +mysql_group | **Optional.** Use a client options group. +mysql_username | **Optional.** Connect using the indicated username. +mysql_password | **Optional.** Use the indicated password to authenticate the connection. +mysql_check_slave | **Optional.** Check if the slave thread is running properly. +mysql_warning | **Optional.** Exit with WARNING status if slave server is more than INTEGER seconds behind master. +mysql_critical | **Optional.** Exit with CRITICAL status if slave server is more then INTEGER seconds behind master. +mysql_ssl | **Optional.** Use ssl encryption. +mysql_cacert | **Optional.** Path to CA signing the cert. +mysql_cert | **Optional.** Path to SSL certificate. +mysql_key | **Optional.** Path to private SSL key. +mysql_cadir | **Optional.** Path to CA directory. +mysql_ciphers | **Optional.** List of valid SSL ciphers. + + +### mysql_query + +The [check_mysql_query](https://www.monitoring-plugins.org/doc/man/check_mysql_query.html) plugin +checks a query result against threshold levels. +The result from the query should be numeric. For extra security, create a user with minimal access. + +**Note**: You must specify `mysql_query_password` with an empty string to force an empty password, +overriding any my.cnf settings. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------------- +mysql_query_hostname | **Optional.** Host name, IP Address, or unix socket (must be an absolute path). +mysql_query_port | **Optional.** Port number (default: 3306). +mysql_query_database | **Optional.** Check database with indicated name. +mysql_query_file | **Optional.** Read from the specified client options file. +mysql_query_group | **Optional.** Use a client options group. +mysql_query_username | **Optional.** Connect using the indicated username. +mysql_query_password | **Optional.** Use the indicated password to authenticate the connection. +mysql_query_execute | **Required.** SQL Query to run on the MySQL Server. +mysql_query_warning | **Optional.** Exit with WARNING status if query is outside of the range (format: start:end). +mysql_query_critical | **Optional.** Exit with CRITICAL status if query is outside of the range. + + +### negate + +The [negate](https://www.monitoring-plugins.org/doc/man/negate.html) plugin +negates the status of a plugin (returns OK for CRITICAL and vice-versa). +Additional switches can be used to control which state becomes what. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------|--------------------------------------------------------------- +negate_timeout | **Optional.** Seconds before plugin times out (default: 11). +negate_timeout_result | **Optional.** Custom result on Negate timeouts, default to UNKNOWN. +negate_ok | **Optional.** OK, WARNING, CRITICAL or UNKNOWN. +negate_warning | Numeric values are accepted. +negate_critical | If nothing is specified, +negate_unknown | permutes OK and CRITICAL. +negate_substitute | **Optional.** Substitute output text as well. Will only substitute text in CAPITALS. +negate_command | **Required.** Command to be negated. +negate_arguments | **Optional.** Arguments for the negated command. + +### nrpe + +The `check_nrpe` plugin can be used to query an [NRPE](https://icinga.com/docs/icinga1/latest/en/nrpe.html) +server or [NSClient++](https://www.nsclient.org). **Note**: This plugin +is considered insecure/deprecated. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +nrpe_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +nrpe_port | **Optional.** The NRPE port. Defaults to 5666. +nrpe_command | **Optional.** The command that should be executed. +nrpe_no_ssl | **Optional.** Whether to disable SSL or not. Defaults to `false`. +nrpe_timeout_unknown | **Optional.** Whether to set timeouts to unknown instead of critical state. Defaults to `false`. +nrpe_timeout | **Optional.** The timeout in seconds. +nrpe_arguments | **Optional.** Arguments that should be passed to the command. Multiple arguments must be defined as array. +nrpe_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +nrpe_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. +nrpe_version_2 | **Optional.** Use this if you want to connect using NRPE v2 protocol. Defaults to false. +nrpe_ca | **Optional.** The CA file to use for PKI. Defaults to none. +nrpe_cert | **Optional.** The client cert file to use for PKI. Defaults to none. +nrpe_key | **Optional.** The client key file to use for PKI. Defaults to none. +nrpe_ssl_version | **Optional.** The SSL/TLS version to use. Defaults to TLSv1+. +nrpe_cipher_list | **Optional.** The list of SSL ciphers to use. Default depends on check_nrpe version. +nrpe_dh_opt | **Optional.** Anonymous Diffie Hellman use: 0 = deny, 1 = allow, 2 = force. Default depends on check_nrpe version. + + +### nscp + +The [check_nt](https://www.monitoring-plugins.org/doc/man/check_nt.html) plugin +collects data from the [NSClient++](https://www.nsclient.org) service. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +nscp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +nscp_port | **Optional.** The NSClient++ port. Defaults to 12489. +nscp_password | **Optional.** The NSClient++ password. +nscp_variable | **Required.** The variable that should be checked. +nscp_params | **Optional.** Parameters for the query. Multiple parameters must be defined as array. +nscp_warn | **Optional.** The warning threshold. +nscp_crit | **Optional.** The critical threshold. +nscp_timeout | **Optional.** The query timeout in seconds. +nscp_showall | **Optional.** Use with SERVICESTATE to see working services or PROCSTATE for running processes. Defaults to false. + + +### ntp_time + +The [check_ntp_time](https://www.monitoring-plugins.org/doc/man/check_ntp_time.html) plugin +checks the clock offset between the local host and a remote NTP server. + +**Note**: If you want to monitor an NTP server, please use `ntp_peer`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ntp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ntp_port | **Optional.** Port number (default: 123). +ntp_quiet | **Optional.** Returns UNKNOWN instead of CRITICAL if offset cannot be found. +ntp_warning | **Optional.** Offset to result in warning status (seconds). +ntp_critical | **Optional.** Offset to result in critical status (seconds). +ntp_timeoffset | **Optional.** Expected offset of the ntp server relative to local server (seconds). +ntp_timeout | **Optional.** Seconds before connection times out (default: 10). +ntp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +ntp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### ntp_peer + +The [check_ntp_peer](https://www.monitoring-plugins.org/doc/man/check_ntp_peer.html) plugin +checks the health of an NTP server. It supports checking the offset with the sync peer, the +jitter and stratum. This plugin will not check the clock offset between the local host and NTP + server; please use `ntp_time` for that purpose. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ntp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ntp_port | **Optional.** The port to use. Default to 123. +ntp_quiet | **Optional.** Returns UNKNOWN instead of CRITICAL or WARNING if server isn't synchronized. +ntp_warning | **Optional.** Offset to result in warning status (seconds). +ntp_critical | **Optional.** Offset to result in critical status (seconds). +ntp_wstratum | **Optional.** Warning threshold for stratum of server's synchronization peer. +ntp_cstratum | **Optional.** Critical threshold for stratum of server's synchronization peer. +ntp_wjitter | **Optional.** Warning threshold for jitter. +ntp_cjitter | **Optional.** Critical threshold for jitter. +ntp_wsource | **Optional.** Warning threshold for number of usable time sources. +ntp_csource | **Optional.** Critical threshold for number of usable time sources. +ntp_timeout | **Optional.** Seconds before connection times out (default: 10). +ntp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +ntp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### pgsql + +The [check_pgsql](https://www.monitoring-plugins.org/doc/man/check_pgsql.html) plugin +tests a PostgreSQL DBMS to determine whether it is active and accepting queries. +If a query is specified using the `pgsql_query` attribute, it will be executed after +connecting to the server. The result from the query has to be numeric in order +to compare it against the query thresholds if set. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------------- +pgsql_hostname | **Optional.** Host name, IP Address, or unix socket (must be an absolute path). +pgsql_port | **Optional.** Port number (default: 5432). +pgsql_database | **Optional.** Database to check (default: template1). +pgsql_username | **Optional.** Login name of user. +pgsql_password | **Optional.** Password (BIG SECURITY ISSUE). +pgsql_options | **Optional.** Connection parameters (keyword = value), see below. +pgsql_warning | **Optional.** Response time to result in warning status (seconds). +pgsql_critical | **Optional.** Response time to result in critical status (seconds). +pgsql_timeout | **Optional.** Seconds before connection times out (default: 10). +pgsql_query | **Optional.** SQL query to run. Only first column in first row will be read. +pgsql_query_warning | **Optional.** SQL query value to result in warning status (double). +pgsql_query_critical | **Optional.** SQL query value to result in critical status (double). + +### ping + +The [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) plugin +uses the ping command to probe the specified host for packet loss (percentage) and +round trip average (milliseconds). + +This command uses the host's `address` attribute if available and falls back to using +the `address6` attribute if the `address` attribute is not set. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### ping4 + +The [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) plugin +uses the ping command to probe the specified host for packet loss (percentage) and +round trip average (milliseconds). + +This command uses the host's `address` attribute if not explicitly specified using +the `ping_address` attribute. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's IPv4 address. Defaults to "$address$". +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + +### ping6 + +The [check_ping](https://www.monitoring-plugins.org/doc/man/check_ping.html) plugin +uses the ping command to probe the specified host for packet loss (percentage) and +round trip average (milliseconds). + +This command uses the host's `address6` attribute if not explicitly specified using +the `ping_address` attribute. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ping_address | **Optional.** The host's IPv6 address. Defaults to "$address6$". +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 100. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 5. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 200. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 15. +ping_packets | **Optional.** The number of packets to send. Defaults to 5. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### pop + +The [check_pop](https://www.monitoring-plugins.org/doc/man/check_pop.html) plugin +tests POP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------|-------------- +pop_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +pop_port | **Optional.** The port that should be checked. Defaults to 110. +pop_escape | **Optional.** Can use \\n, \\r, \\t or \\ in send or quit string. Must come before send or quit option. Default: nothing added to send, \\r\\n added to end of quit. +pop_send | **Optional.** String to send to the server. +pop_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +pop_all | **Optional.** All expect strings need to occur in server response. Default is any. +pop_quit | **Optional.** String to send server to initiate a clean close of the connection. +pop_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit (default: crit). +pop_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit (default: warn). +pop_jail | **Optional.** Hide output from TCP socket. +pop_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +pop_delay | **Optional.** Seconds to wait between sending string and polling for response. +pop_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +pop_ssl | **Optional.** Use SSL for the connection. +pop_warning | **Optional.** Response time to result in warning status (seconds). +pop_critical | **Optional.** Response time to result in critical status (seconds). +pop_timeout | **Optional.** Seconds before connection times out (default: 10). +pop_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +pop_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### procs + +The [check_procs](https://www.monitoring-plugins.org/doc/man/check_procs.html) plugin +checks all processes and generates WARNING or CRITICAL states if the specified +metric is outside the required threshold ranges. The metric defaults to number +of processes. Search filters can be applied to limit the processes to check. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------|-------------- +procs_warning | **Optional.** The process count warning threshold. Defaults to 250. +procs_critical | **Optional.** The process count critical threshold. Defaults to 400. +procs_metric | **Optional.** Check thresholds against metric. +procs_timeout | **Optional.** Seconds before plugin times out. +procs_traditional | **Optional.** Filter own process the traditional way by PID instead of /proc/pid/exe. Defaults to false. +procs_state | **Optional.** Only scan for processes that have one or more of the status flags you specify. +procs_ppid | **Optional.** Only scan for children of the parent process ID indicated. +procs_vsz | **Optional.** Only scan for processes with VSZ higher than indicated. +procs_rss | **Optional.** Only scan for processes with RSS higher than indicated. +procs_pcpu | **Optional.** Only scan for processes with PCPU higher than indicated. +procs_user | **Optional.** Only scan for processes with user name or ID indicated. +procs_argument | **Optional.** Only scan for processes with args that contain STRING. +procs_argument_regex | **Optional.** Only scan for processes with args that contain the regex STRING. +procs_command | **Optional.** Only scan for exact matches of COMMAND (without path). +procs_nokthreads | **Optional.** Only scan for non kernel threads. Defaults to false. + + +### radius + +The [check_radius](https://www.monitoring-plugins.org/doc/man/check_radius.html) plugin +checks a RADIUS server to see if it is accepting connections. The server to test +must be specified in the invocation, as well as a user name and password. A configuration +file may also be present. The format of the configuration file is described in the +radiusclient library sources. The password option presents a substantial security +issue because the password can possibly be determined by careful watching of the +command line in a process listing. This risk is exacerbated because the plugin will +typically be executed at regular predictable intervals. Please be sure that the +password used does not allow access to sensitive system resources. + + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------|-------------- +radius_address | **Optional.** The radius server's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +radius_config_file | **Required.** The radius configuration file. +radius_username | **Required.** The radius username to test. +radius_password | **Required.** The radius password to test. +radius_port | **Optional.** The radius port number (default 1645). +radius_nas_id | **Optional.** The NAS identifier. +radius_nas_address | **Optional.** The NAS IP address. +radius_expect | **Optional.** The response string to expect from the server. +radius_retries | **Optional.** The number of times to retry a failed connection. +radius_timeout | **Optional.** The number of seconds before connection times out (default: 10). + +### rpc + +The [check_rpc](https://www.monitoring-plugins.org/doc/man/check_rpc.html) +plugin tests if a service is registered and running using `rpcinfo -H host -C rpc_command`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--- | --- +rpc_address | **Optional.** The rpc host address. Defaults to "$address$ if the host `address` attribute is set, "$address6$" otherwise. +rpc_command | **Required.** The programm name (or number). +rpc_port | **Optional.** The port that should be checked. +rpc_version | **Optional.** The version you want to check for (one or more). +rpc_udp | **Optional.** Use UDP test. Defaults to false. +rpc_tcp | **Optional.** Use TCP test. Defaults to false. +rpc_verbose | **Optional.** Show verbose output. Defaults to false. + +### simap + +The [check_simap](https://www.monitoring-plugins.org/doc/man/check_simap.html) plugin +tests SIMAP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------|-------------- +simap_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +simap_port | **Optional.** The port that should be checked. Defaults to 993. +simap_escape | **Optional.** Can use \\n, \\r, \\t or \\ in send or quit string. Must come before send or quit option. Default: nothing added to send, \\r\\n added to end of quit. +simap_send | **Optional.** String to send to the server. +simap_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +simap_all | **Optional.** All expect strings need to occur in server response. Default is any. +simap_quit | **Optional.** String to send server to initiate a clean close of the connection. +simap_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit (default: crit). +simap_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit (default: warn). +simap_jail | **Optional.** Hide output from TCP socket. +simap_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +simap_delay | **Optional.** Seconds to wait between sending string and polling for response. +simap_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +simap_ssl | **Optional.** Use SSL for the connection. +simap_warning | **Optional.** Response time to result in warning status (seconds). +simap_critical | **Optional.** Response time to result in critical status (seconds). +simap_timeout | **Optional.** Seconds before connection times out (default: 10). +simap_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +simap_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + +### smart + +The [check_ide_smart](https://www.monitoring-plugins.org/doc/man/check_ide_smart.html) plugin +checks a local hard drive with the (Linux specific) SMART interface. Requires installation of `smartctl`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +smart_device | **Required.** The name of a local hard drive to monitor. + + +### smtp + +The [check_smtp](https://www.monitoring-plugins.org/doc/man/check_smtp.html) plugin +will attempt to open an SMTP connection with the host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------|-------------- +smtp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +smtp_port | **Optional.** The port that should be checked. Defaults to 25. +smtp_mail_from | **Optional.** Test a MAIL FROM command with the given email address. +smtp_expect | **Optional.** String to expect in first line of server response (default: '220'). +smtp_command | **Optional.** SMTP command (may be used repeatedly). +smtp_response | **Optional.** Expected response to command (may be used repeatedly). +smtp_helo_fqdn | **Optional.** FQDN used for HELO +smtp_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +smtp_starttls | **Optional.** Use STARTTLS for the connection. +smtp_authtype | **Optional.** SMTP AUTH type to check (default none, only LOGIN supported). +smtp_authuser | **Optional.** SMTP AUTH username. +smtp_authpass | **Optional.** SMTP AUTH password. +smtp_ignore_quit | **Optional.** Ignore failure when sending QUIT command to server. +smtp_warning | **Optional.** Response time to result in warning status (seconds). +smtp_critical | **Optional.** Response time to result in critical status (seconds). +smtp_timeout | **Optional.** Seconds before connection times out (default: 10). +smtp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +smtp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### snmp + +The [check_snmp](https://www.monitoring-plugins.org/doc/man/check_snmp.html) plugin +checks the status of remote machines and obtains system information via SNMP. + +**Note**: This plugin uses the `snmpget` command included with the NET-SNMP package. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_oid | **Required.** The SNMP OID. +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port. Defaults to "161". +snmp_retries | **Optional.** Number of retries to be used in the SNMP requests. +snmp_warn | **Optional.** The warning threshold. +snmp_crit | **Optional.** The critical threshold. +snmp_string | **Optional.** Return OK state if the string matches exactly with the output value +snmp_ereg | **Optional.** Return OK state if extended regular expression REGEX matches with the output value +snmp_eregi | **Optional.** Return OK state if case-insensitive extended REGEX matches with the output value +snmp_label | **Optional.** Prefix label for output value +snmp_invert_search | **Optional.** Invert search result and return CRITICAL state if found +snmp_units | **Optional.** Units label(s) for output value (e.g., 'sec.'). +snmp_version | **Optional.** Version to use. E.g. 1, 2, 2c or 3. +snmp_miblist | **Optional.** MIB's to use, comma separated. Defaults to "ALL". +snmp_rate_multiplier | **Optional.** Converts rate per second. For example, set to 60 to convert to per minute. +snmp_rate | **Optional.** Boolean. Enable rate calculation. +snmp_getnext | **Optional.** Boolean. Use SNMP GETNEXT. Defaults to false. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 10 seconds. +snmp_offset | **Optional.** Add/subtract the specified OFFSET to numeric sensor data. +snmp_output_delimiter | **Optional.** Separates output on multiple OID requests. +snmp_perf_oids | **Optional.** Label performance data with OIDs instead of --label's. + +### snmpv3 + +Check command object for the [check_snmp](https://www.monitoring-plugins.org/doc/man/check_snmp.html) +plugin, using SNMPv3 authentication and encryption options. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------|-------------- +snmpv3_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmpv3_getnext | **Optional.** Use SNMP GETNEXT instead of SNMP GET. +snmpv3_seclevel | **Optional.** The security level. Defaults to authPriv. +snmpv3_auth_alg | **Optional.** The authentication algorithm. Defaults to SHA. +snmpv3_user | **Required.** The username to log in with. +snmpv3_context | **Optional.** The SNMPv3 context. +snmpv3_auth_key | **Required,** The authentication key. Required if `snmpv3_seclevel` is set to `authPriv` otherwise optional. +snmpv3_priv_key | **Required.** The encryption key. +snmpv3_oid | **Required.** The SNMP OID. +snmpv3_priv_alg | **Optional.** The encryption algorithm. Defaults to AES. +snmpv3_warn | **Optional.** The warning threshold. +snmpv3_crit | **Optional.** The critical threshold. +snmpv3_string | **Optional.** Return OK state (for that OID) if STRING is an exact match. +snmpv3_ereg | **Optional.** Return OK state (for that OID) if extended regular expression REGEX matches. +snmpv3_eregi | **Optional.** Return OK state (for that OID) if case-insensitive extended REGEX matches. +snmpv3_invert_search | **Optional.** Invert search result and return CRITICAL if found +snmpv3_label | **Optional.** Prefix label for output value. +snmpv3_units | **Optional.** Units label(s) for output value (e.g., 'sec.'). +snmpv3_rate_multiplier | **Optional.** Converts rate per second. For example, set to 60 to convert to per minute. +snmpv3_rate | **Optional.** Boolean. Enable rate calculation. +snmpv3_timeout | **Optional.** The command timeout in seconds. Defaults to 10 seconds. + +### snmp-uptime + +Check command object for the [check_snmp](https://www.monitoring-plugins.org/doc/man/check_snmp.html) +plugin, using the uptime OID by default. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_oid | **Optional.** The SNMP OID. Defaults to "1.3.6.1.2.1.1.3.0". +snmp_community | **Optional.** The SNMP community. Defaults to "public". + + +### spop + +The [check_spop](https://www.monitoring-plugins.org/doc/man/check_spop.html) plugin +tests SPOP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------|-------------- +spop_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +spop_port | **Optional.** The port that should be checked. Defaults to 995. +spop_escape | **Optional.** Can use \\n, \\r, \\t or \\ in send or quit string. Must come before send or quit option. Default: nothing added to send, \\r\\n added to end of quit. +spop_send | **Optional.** String to send to the server. +spop_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +spop_all | **Optional.** All expect strings need to occur in server response. Default is any. +spop_quit | **Optional.** String to send server to initiate a clean close of the connection. +spop_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit (default: crit). +spop_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit (default: warn). +spop_jail | **Optional.** Hide output from TCP socket. +spop_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +spop_delay | **Optional.** Seconds to wait between sending string and polling for response. +spop_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +spop_ssl | **Optional.** Use SSL for the connection. +spop_warning | **Optional.** Response time to result in warning status (seconds). +spop_critical | **Optional.** Response time to result in critical status (seconds). +spop_timeout | **Optional.** Seconds before connection times out (default: 10). +spop_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +spop_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### ssh + +The [check_ssh](https://www.monitoring-plugins.org/doc/man/check_ssh.html) plugin +connects to an SSH server at a specified host and port. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ssh_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ssh_port | **Optional.** The port that should be checked. Defaults to 22. +ssh_timeout | **Optional.** Seconds before connection times out. Defaults to 10. +ssh_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +ssh_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### ssl + +Check command object for the [check_tcp](https://www.monitoring-plugins.org/doc/man/check_tcp.html) plugin, +using ssl-related options. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------|-------------- +ssl_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ssl_port | **Optional.** The port that should be checked. Defaults to 443. +ssl_timeout | **Optional.** Timeout in seconds for the connect and handshake. The plugin default is 10 seconds. +ssl_cert_valid_days_warn | **Optional.** Warning threshold for days before the certificate will expire. When used, the default for ssl_cert_valid_days_critical is 0. +ssl_cert_valid_days_critical | **Optional.** Critical threshold for days before the certificate will expire. When used, ssl_cert_valid_days_warn must also be set. +ssl_sni | **Optional.** The `server_name` that is send to select the SSL certificate to check. Important if SNI is used. + + +### ssmtp + +The [check_ssmtp](https://www.monitoring-plugins.org/doc/man/check_ssmtp.html) plugin +tests SSMTP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------|-------------- +ssmtp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ssmtp_port | **Optional.** The port that should be checked. Defaults to 465. +ssmtp_escape | **Optional.** Can use \\n, \\r, \\t or \\ in send or quit string. Must come before send or quit option. Default: nothing added to send, \\r\\n added to end of quit. +ssmtp_send | **Optional.** String to send to the server. +ssmtp_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +ssmtp_all | **Optional.** All expect strings need to occur in server response. Default is any. +ssmtp_quit | **Optional.** String to send server to initiate a clean close of the connection. +ssmtp_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit (default: crit). +ssmtp_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit (default: warn). +ssmtp_jail | **Optional.** Hide output from TCP socket. +ssmtp_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +ssmtp_delay | **Optional.** Seconds to wait between sending string and polling for response. +ssmtp_certificate_age | **Optional.** Minimum number of days a certificate has to be valid. +ssmtp_ssl | **Optional.** Use SSL for the connection. +ssmtp_warning | **Optional.** Response time to result in warning status (seconds). +ssmtp_critical | **Optional.** Response time to result in critical status (seconds). +ssmtp_timeout | **Optional.** Seconds before connection times out (default: 10). +ssmtp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +ssmtp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### swap + +The [check_swap](https://www.monitoring-plugins.org/doc/man/check_swap.html) plugin +checks the swap space on a local machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +swap_wfree | **Optional.** The free swap space warning threshold in % (enable `swap_integer` for number values). Defaults to `50%`. +swap_cfree | **Optional.** The free swap space critical threshold in % (enable `swap_integer` for number values). Defaults to `25%`. +swap_integer | **Optional.** Specifies whether the thresholds are passed as number or percent value. Defaults to false (percent values). +swap_allswaps | **Optional.** Conduct comparisons for all swap partitions, one by one. Defaults to false. +swap_noswap | **Optional.** Resulting state when there is no swap regardless of thresholds. Possible values are "ok", "warning", "critical", "unknown". Defaults to "critical". + + +### tcp + +The [check_tcp](https://www.monitoring-plugins.org/doc/man/check_tcp.html) plugin +tests TCP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +tcp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +tcp_port | **Required.** The port that should be checked. +tcp_expect | **Optional.** String to expect in server response. Multiple strings must be defined as array. +tcp_all | **Optional.** All expect strings need to occur in server response. Defaults to false. +tcp_escape_send | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in send string. +tcp_send | **Optional.** String to send to the server. +tcp_escape_quit | **Optional.** Enable usage of \\n, \\r, \\t or \\\\ in quit string. +tcp_quit | **Optional.** String to send server to initiate a clean close of the connection. +tcp_refuse | **Optional.** Accept TCP refusals with states ok, warn, crit. Defaults to crit. +tcp_mismatch | **Optional.** Accept expected string mismatches with states ok, warn, crit. Defaults to warn. +tcp_jail | **Optional.** Hide output from TCP socket. +tcp_maxbytes | **Optional.** Close connection once more than this number of bytes are received. +tcp_delay | **Optional.** Seconds to wait between sending string and polling for response. +tcp_certificate | **Optional.** Minimum number of days a certificate has to be valid. 1st value is number of days for warning, 2nd is critical (if not specified: 0) -- separated by comma. +tcp_ssl | **Optional.** Use SSL for the connection. Defaults to false. +tcp_wtime | **Optional.** Response time to result in warning status (seconds). +tcp_ctime | **Optional.** Response time to result in critical status (seconds). +tcp_timeout | **Optional.** Seconds before connection times out. Defaults to 10. +tcp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +tcp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### udp + +The [check_udp](https://www.monitoring-plugins.org/doc/man/check_udp.html) plugin +tests UDP connections with the specified host (or unix socket). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +udp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +udp_port | **Required.** The port that should be checked. +udp_send | **Required.** The payload to send in the UDP datagram. +udp_expect | **Required.** The payload to expect in the response datagram. +udp_quit | **Optional.** The payload to send to 'close' the session. +udp_ipv4 | **Optional.** Use IPv4 connection. Defaults to false. +udp_ipv6 | **Optional.** Use IPv6 connection. Defaults to false. + + +### ups + +The [check_ups](https://www.monitoring-plugins.org/doc/man/check_ups.html) plugin +tests the UPS service on the specified host. [Network UPS Tools](http://www.networkupstools.org) + must be running for this plugin to work. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +ups_address | **Required.** The address of the host running upsd. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ups_name | **Required.** The UPS name. Defaults to `ups`. +ups_port | **Optional.** The port to which to connect. Defaults to 3493. +ups_variable | **Optional.** The variable to monitor. Must be one of LINE, TEMP, BATTPCT or LOADPCT. If this is not set, the check only relies on the value of `ups.status`. +ups_warning | **Optional.** The warning threshold for the selected variable. +ups_critical | **Optional.** The critical threshold for the selected variable. +ups_celsius | **Optional.** Display the temperature in degrees Celsius instead of Fahrenheit. Defaults to `false`. +ups_timeout | **Optional.** The number of seconds before the connection times out. Defaults to 10. + + +### users + +The [check_users](https://www.monitoring-plugins.org/doc/man/check_users.html) plugin +checks the number of users currently logged in on the local system and generates an +error if the number exceeds the thresholds specified. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +users_wgreater | **Optional.** The user count warning threshold. Defaults to 20. +users_cgreater | **Optional.** The user count critical threshold. Defaults to 50. + + +### uptime + +The [check_uptime](https://www.monitoring-plugins.org/doc/man/check_uptime.html) plugin +checks the uptime of the system using /proc/uptime. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +uptime_warning | **Required.** Min. number of uptime to generate warning (-w 30m). Defaults to 30m. +uptime_critical | **Required.** Min. number of uptime to generate critical alert (-c 15m). Defaults to 15m. +uptime_for | **Optional.** Show uptime in a pretty format (Running for x weeks, x days, ...). Defaults to false. +uptime_since | **Optional.** Show last boot in yyyy-mm-dd HH:MM:SS format (output from 'uptime -s'). Defaults to false. + + + +## Windows Plugins for Icinga 2 + +> **Note** +> +> These plugins are DEPRECATED in favor of our +> [PowerShell Plugins](https://github.com/Icinga/icinga-powershell-plugins) +> and may be removed in a future release. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +To allow a basic monitoring of Windows clients Icinga 2 comes with a set of Windows only plugins. While trying to mirror the functionalities of their linux cousins from the monitoring-plugins package, the differences between Windows and Linux are too big to be able use the same CheckCommands for both systems. + +A check-commands-windows.conf comes with Icinga 2, it assumes that the Windows Plugins are installed in the PluginDir set in your constants.conf. To enable them the following include directive is needed in you icinga2.conf: + + include + +One of the differences between the Windows plugins and their linux counterparts is that they consistently do not require thresholds to run, functioning like dummies without. + + +### Threshold syntax + +So not specified differently the thresholds for the plugins all follow the same pattern + +Threshold | Meaning +:------------|:---------- +"29" | The threshold is 29. +"!29" | The threshold is 29, but the negative of the result is returned. +"[10-40]" | The threshold is a range from (including) 10 to 40, a value inside means the threshold has been exceeded. +"![10-40]" | Same as above, but the result is inverted. + + +### disk-windows + +Check command object for the `check_disk.exe` plugin. +Aggregates the disk space of all volumes and mount points it can find, or the ones defined in `disk_win_path`. Ignores removable storage like flash drives and discs (CD, DVD etc.). +The data collection is instant and free disk space (default, see `disk_win_show_used`) is used for threshold computation. + +> **Note** +> +> Percentage based thresholds can be used by adding a '%' to the threshold +> value. + +Custom variables: + +Name | Description +:---------------------|:------------ +disk\_win\_warn | **Optional**. The warning threshold. Defaults to "20%". +disk\_win\_crit | **Optional**. The critical threshold. Defaults to "10%". +disk\_win\_path | **Optional**. Check only these paths, default checks all. +disk\_win\_unit | **Optional**. Use this unit to display disk space, thresholds are interpreted in this unit. Defaults to "mb", possible values are: b, kb, mb, gb and tb. +disk\_win\_exclude | **Optional**. Exclude these drives from check. +disk\_win\_show\_used | **Optional**. Use used instead of free space. + +### load-windows + +Check command object for the `check_load.exe` plugin. +This plugin collects the inverse of the performance counter `\Processor(_Total)\% Idle Time` two times, with a wait time of one second between the collection. To change this wait time use [`perfmon-windows`](10-icinga-template-library.md#windows-plugins-load-windows). + +Custom variables: + +Name | Description +:---------------|:------------ +load\_win\_warn | **Optional**. The warning threshold. +load\_win\_crit | **Optional**. The critical threshold. + + +### memory-windows + +Check command object for the `check_memory.exe` plugin. +The memory collection is instant and free memory is used for threshold computation. + +> **Note** +> +> Percentage based thresholds can be used by adding a '%' to the threshold +> value. Keep in mind that memory\_win\_unit is applied before the +> value is calculated. + +Custom variables: + +Name | Description +:-----------------|:------------ +memory\_win\_warn | **Optional**. The warning threshold. Defaults to "10%". +memory\_win\_crit | **Optional**. The critical threshold. Defaults to "5%". +memory\_win\_unit | **Optional**. The unit to display the received value in, thresholds are interpreted in this unit. Defaults to "mb" (megabyte), possible values are: b, kb, mb, gb and tb. +memory\_win\_show\_used | **Optional**. Show used memory instead of the free memory. + + +### network-windows + +Check command object for the `check_network.exe` plugin. +Collects the total Bytes inbound and outbound for all interfaces in one second, to itemise interfaces or use a different collection interval use [`perfmon-windows`](10-icinga-template-library.md#windows-plugins-load-windows). + +Custom variables: + +Name | Description +:-------------------|:------------ +network\_win\_warn | **Optional**. The warning threshold. +network\_win\_crit | **Optional**. The critical threshold. +network\_no\_isatap | **Optional**. Do not print ISATAP interfaces. + + +### perfmon-windows + +Check command object for the `check_perfmon.exe` plugin. +This plugins allows to collect data from a Performance Counter. After the first data collection a second one is done after `perfmon_win_wait` milliseconds. When you know `perfmon_win_counter` only requires one set of data to provide valid data you can set `perfmon_win_wait` to `0`. + +To receive a list of possible Performance Counter Objects run `check_perfmon.exe --print-objects` and to view an objects instances and counters run `check_perfmon.exe --print-object-info -P "name of object"` + +Custom variables: + +Name | Description +:---------------------|:------------ +perfmon\_win\_warn | **Optional**. The warning threshold. +perfmon\_win\_crit | **Optional**. The critical threshold. +perfmon\_win\_counter | **Required**. The Performance Counter to use. Ex. `\Processor(_Total)\% Idle Time`. +perfmon\_win\_wait | **Optional**. Time in milliseconds to wait between data collection (default: 1000). +perfmon\_win\_type | **Optional**. Format in which to expect performance values. Possible are: long, int64 and double (default). +perfmon\_win\_syntax | **Optional**. Use this in the performance output instead of `perfmon\_win\_counter`. Exists for graphics compatibility reasons. + + +### ping-windows + +Check command object for the `check_ping.exe` plugin. +ping-windows should automatically detect whether `ping_win_address` is an IPv4 or IPv6 address. If not, use ping4-windows and ping6-windows. Also note that check\_ping.exe waits at least `ping_win_timeout` milliseconds between the pings. + +Custom variables: + +Name | Description +:------------------|:------------ +ping\_win\_warn | **Optional**. The warning threshold. RTA and package loss separated by comma. +ping\_win\_crit | **Optional**. The critical threshold. RTA and package loss separated by comma. +ping\_win\_address | **Required**. An IPv4 or IPv6 address. +ping\_win\_packets | **Optional**. Number of packages to send. Default: 5. +ping\_win\_timeout | **Optional**. The timeout in milliseconds. Default: 1000 + + +### procs-windows + +Check command object for `check_procs.exe` plugin. +When using `procs_win_user` this plugins needs administrative privileges to access the processes of other users, to just enumerate them no additional privileges are required. + +Custom variables: + +Name | Description +:----------------|:------------ +procs\_win\_warn | **Optional**. The warning threshold. +procs\_win\_crit | **Optional**. The critical threshold. +procs\_win\_user | **Optional**. Count this users processes. + + +### service-windows + +Check command object for `check_service.exe` plugin. +This checks thresholds work different since the binary decision whether a service is running or not does not allow for three states. As a default `check_service.exe` will return CRITICAL when `service_win_service` is not running, the `service_win_warn` flag changes this to WARNING. + +Custom variables: + +Name | Description +:-------------------------|:------------ +service\_win\_warn | **Optional**. Warn when service is not running. +service\_win\_description | **Optional**. If this is set, `service\_win\_service` looks at the service description. +service\_win\_service | **Required**. Name of the service to check. + + +### swap-windows + +Check command object for `check_swap.exe` plugin. +The data collection is instant. + +Custom variables: + +Name | Description +:--------------- | :------------ +swap\_win\_warn | **Optional**. The warning threshold. Defaults to "10%". +swap\_win\_crit | **Optional**. The critical threshold. Defaults to "5%". +swap\_win\_unit | **Optional**. The unit to display the received value in, thresholds are interpreted in this unit. Defaults to "mb" (megabyte). +swap\_win\_show\_used | **Optional**. Show used swap instead of the free swap. + +### update-windows + +Check command object for `check_update.exe` plugin. +Querying Microsoft for Windows updates can take multiple seconds to minutes. An update is treated as important when it has the WSUS flag for SecurityUpdates or CriticalUpdates. + +> **Note** +> +> The Network Services Account which runs Icinga 2 by default does not have the required +> permissions to run this check. + +Custom variables: + +Name | Description +:-------------------|:------------ +update\_win\_warn | **Optional**. The warning threshold. +update\_win\_crit | **Optional**. The critical threshold. +update\_win\_reboot | **Optional**. Set to treat 'may need update' as 'definitely needs update'. Please Note that this is true for almost every update and is therefore not recommended. +ignore\_reboot | **Optional**. Set to disable behavior of returning critical if any updates require a reboot. + + +If a warning threshold is set but not a critical threshold, the critical threshold will be set to one greater than the set warning threshold. +Unless the `ignore_reboot` flag is set, if any updates require a reboot the plugin will return critical. + +> **Note** +> +> If they are enabled, performance data will be shown in the web interface. +> If run without the optional parameters, the plugin will output critical if any important updates are available. + + +### uptime-windows + +Check command object for `check_uptime.exe` plugin. +Uses GetTickCount64 to get the uptime, so boot time is not included. + +Custom variables: + +Name | Description +:-----------------|:------------ +uptime\_win\_warn | **Optional**. The warning threshold. +uptime\_win\_crit | **Optional**. The critical threshold. +uptime\_win\_unit | **Optional**. The unit to display the received value in, thresholds are interpreted in this unit. Defaults to "s"(seconds), possible values are ms (milliseconds), s, m (minutes), h (hours). + + +### users-windows + +Check command object for `check_users.exe` plugin. + +Custom variables: + +Name | Description +:----------------|:------------ +users\_win\_warn | **Optional**. The warning threshold. +users\_win\_crit | **Optional**. The critical threshold. + +### file-age-windows + +Check command object for `check_file_age.cmd` command file and `check_file_age.cmd.ps1` plugin. + +Custom variables: + +Name | Description +:---------------------|:------------ +file_age_win_file | **Required**. File name and location +file_age_win_warning | **Required**. The warning threshold of file age in seconds. +file_age_win_critical | **Required**. The critical threshold of file age in seconds. + +All variables are required and all variables are positional. The variable order is: file warning critical. + +The check_file_age.cmd and the check_file_age.cmd.ps1 files are available for [download](https://github.com/KAMI911/icinga2-basic/tree/master/plugins). + +## Plugin Check Commands for NSClient++ + +There are two methods available for querying NSClient++: + +* Query the [HTTP API](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) locally from an Icinga 2 client (requires a running NSClient++ service) +* Run a [local CLI check](10-icinga-template-library.md#nscp-check-local) (does not require NSClient++ as a service) + +Both methods have their advantages and disadvantages. One thing to +note: If you rely on performance counter delta calculations such as +CPU utilization, please use the HTTP API instead of the CLI sample call. + +For security reasons, it is advised to enable the NSClient++ HTTP API for local +connection from the Icinga 2 client only. Remote connections to the HTTP API +are not recommended with using the legacy HTTP API. + +### nscp_api + +`check_nscp_api` is part of the Icinga 2 plugins. This plugin is available for +both, Windows and Linux/Unix. + +Verify that the ITL CheckCommand is included in the [icinga2.conf](04-configuration.md#icinga2-conf) configuration file: + + vim /etc/icinga2/icinga2.conf + + include + +`check_nscp_api` runs queries against the NSClient++ API. Therefore NSClient++ needs to have +the `webserver` module enabled, configured and loaded. + +You can install the webserver using the following CLI commands: + + ./nscp.exe web install + ./nscp.exe web password — –set icinga + +Now you can define specific [queries](https://docs.nsclient.org/reference/check/CheckHelpers.html#queries) +and integrate them into Icinga 2. + +The check plugin `check_nscp_api` can be integrated with the `nscp_api` CheckCommand object: + +Custom variables: + +Name | Description +:----------------------|:---------------------- +nscp\_api\_host | **Required**. NSCP API host address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +nscp\_api\_port | **Optional**. NSCP API port. Defaults to `8443`. +nscp\_api\_password | **Required**. NSCP API password. Please check the NSCP documentation for setup details. +nscp\_api\_query | **Required**. NSCP API query endpoint. Refer to the NSCP documentation for possible values. +nscp\_api\_arguments | **Optional**. NSCP API arguments dictionary either as single strings or key-value pairs using `=`. Refer to the NSCP documentation. + +`nscp_api_arguments` can be used to pass required thresholds to the executed check. The example below +checks the CPU utilization and specifies warning and critical thresholds. + +``` +check_nscp_api --host 10.0.10.148 --password icinga --query check_cpu --arguments show-all warning='load>40' critical='load>30' +check_cpu CRITICAL: critical(5m: 48%, 1m: 36%), 5s: 0% | 'total 5m'=48%;40;30 'total 1m'=36%;40;30 'total 5s'=0%;40;30 +``` + + +### nscp-local + +Icinga 2 can use the `nscp client` command to run arbitrary NSClient++ checks locally on the client. + +You can enable these check commands by adding the following the include directive in your +[icinga2.conf](04-configuration.md#icinga2-conf) configuration file: + + include + +You can also optionally specify an alternative installation directory for NSClient++ by adding +the NscpPath constant in your [constants.conf](04-configuration.md#constants-conf) configuration +file: + + const NscpPath = "C:\\Program Files (x86)\\NSClient++" + +By default Icinga 2 uses the Microsoft Installer API to determine where NSClient++ is installed. It should +not be necessary to manually set this constant. + +Note that it is not necessary to run NSClient++ as a Windows service for these commands to work. + +The check command object for NSClient++ is available as `nscp-local`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------- +nscp_log_level | **Optional.** The log level. Defaults to "critical". +nscp_load_all | **Optional.** Whether to load all modules. Defaults to false. +nscp_modules | **Optional.** An array of NSClient++ modules to load. Defaults to `[ "CheckSystem" ]`. +nscp_boot | **Optional.** Whether to use the --boot option. Defaults to true. +nscp_query | **Required.** The NSClient++ query. Try `nscp client -q x` for a list. +nscp_arguments | **Optional.** An array of query arguments. +nscp_showall | **Optional.** Shows more details in plugin output, default to false. + +> **Tip** +> +> In order to measure CPU load, you'll need a running NSClient++ service. +> Therefore it is advised to use a local [nscp-api](06-distributed-monitoring.md#distributed-monitoring-windows-nscp-check-api) +> check against its REST API. + +### nscp-local-cpu + +Check command object for the `check_cpu` NSClient++ plugin. + +Name | Description +--------------------|------------------ +nscp_cpu_time | **Optional.** Calculate average usage for the given time intervals. Value has to be an array, default to [ "1m", "5m", "15m" ]. +nscp_cpu_warning | **Optional.** Threshold for WARNING state in percent, default to 80. +nscp_cpu_critical | **Optional.** Threshold for CRITICAL state in percent, default to 90. +nscp_cpu_arguments | **Optional.** Additional arguments. +nscp_cpu_showall | **Optional.** Shows more details in plugin output, default to false. + +### nscp-local-memory + +Check command object for the `check_memory` NSClient++ plugin. + +Name | Description +----------------------|------------------ +nscp_memory_committed | **Optional.** Check for committed memory, default to false. +nscp_memory_physical | **Optional.** Check for physical memory, default to true. +nscp_memory_free | **Optional.** Switch between checking free (true) or used memory (false), default to false. +nscp_memory_warning | **Optional.** Threshold for WARNING state in percent or absolute (use MB, GB, ...), default to 80 (free=false) or 20 (free=true). +nscp_memory_critical | **Optional.** Threshold for CRITICAL state in percent or absolute (use MB, GB, ...), default to 90 (free=false) or 10 (free=true). +nscp_memory_arguments | **Optional.** Additional arguments. +nscp_memory_showall | **Optional.** Shows more details in plugin output, default to false. + +### nscp-local-os-version + +Check command object for the `check_os_version` NSClient++ plugin. + +This command has the same custom variables like the `nscp-local` check command. + +### nscp-local-pagefile + +Check command object for the `check_pagefile` NSClient++ plugin. + +This command has the same custom variables like the `nscp-local` check command. + +### nscp-local-process + +Check command object for the `check_process` NSClient++ plugin. + +This command has the same custom variables like the `nscp-local` check command. + +### nscp-local-service + +Check command object for the `check_service` NSClient++ plugin. + +Name | Description +-----------------------|------------------ +nscp_service_name | **Required.** Name of service to check. +nscp_service_type | **Optional.** Type to check, default to state. +nscp_service_ok | **Optional.** State for return an OK, i.e. for type=state running, stopped, ... +nscp_service_otype | **Optional.** Dedicate type for nscp_service_ok, default to nscp_service_state. +nscp_service_warning | **Optional.** State for return an WARNING. +nscp_service_wtype | **Optional.** Dedicate type for nscp_service_warning, default to nscp_service_state. +nscp_service_critical | **Optional.** State for return an CRITICAL. +nscp_service_ctype | **Optional.** Dedicate type for nscp_service_critical, default to nscp_service_state. +nscp_service_arguments | **Optional.** Additional arguments. +nscp_service_showall | **Optional.** Shows more details in plugin output, default to true. + +### nscp-local-uptime + +Check command object for the `check_uptime` NSClient++ plugin. + +This command has the same custom variables like the `nscp-local` check command. + +### nscp-local-version + +Check command object for the `check_version` NSClient++ plugin. + +This command has the same custom variables like the `nscp-local` check command. +In addition to that the default value for `nscp_modules` is set to `[ "CheckHelpers" ]`. + +### nscp-local-disk + +Check command object for the `check_drivesize` NSClient++ plugin. + +Name | Description +-----------------------|------------------ +nscp_disk_drive | **Optional.** Drive character, default to all drives. Can be an array if multiple drives should be monitored. +nscp_disk_exclude | **Optional.** Drive character, default to none. Can be an array of drive characters if multiple drives should be excluded. +nscp_disk_free | **Optional.** Switch between checking free space (free=true) or used space (free=false), default to false. +nscp_disk_warning | **Optional.** Threshold for WARNING in percent or absolute (use MB, GB, ...), default to 80 (used) or 20 percent (free). +nscp_disk_critical | **Optional.** Threshold for CRITICAL in percent or absolute (use MB, GB, ...), default to 90 (used) or 10 percent (free). +nscp_disk_arguments | **Optional.** Additional arguments. +nscp_disk_showall | **Optional.** Shows more details in plugin output, default to true. +nscp_modules | **Optional.** An array of NSClient++ modules to load. Defaults to `[ "CheckDisk" ]`. + +### nscp-local-counter + +Check command object for the `check_pdh` NSClient++ plugin. + +Name | Description +-----------------------|------------------ +nscp_counter_name | **Required.** Performance counter name. +nscp_counter_warning | **Optional.** WARNING Threshold. +nscp_counter_critical | **Optional.** CRITICAL Threshold. +nscp_counter_arguments | **Optional.** Additional arguments. +nscp_counter_showall | **Optional.** Shows more details in plugin output, default to false. +nscp_counter_perfsyntax | **Optional.** Apply performance data label, e.g. `Total Processor Time` to avoid special character problems. Defaults to `nscp_counter_name`. + +### nscp-local-tasksched + +Check Command object for the `check_tasksched` NSClient++ plugin. +You can check for a single task or for a complete folder (and sub folders) of tasks. + +Name | Description +-----------------------|------------------ +nscp_tasksched_name | **Optional.** Name of the task to check. +nscp_tasksched_folder | **Optional.** The folder in which the tasks to check reside. +nscp_tasksched_recursive | **Optional.** Recurse sub folder, defaults to true. +nscp_tasksched_hidden | **Optional.** Look for hidden tasks, defaults to false. +nscp_tasksched_warning | **Optional.** Filter which marks items which generates a warning state, defaults to `exit_code != 0`. +nscp_tasksched_critical | **Optional.** Filter which marks items which generates a critical state, defaults to `exit_code < 0`. +nscp_tasksched_emptystate | **Optional.** Return status to use when nothing matched filter, defaults to warning. +nscp_tasksched_perfsyntax | **Optional.** Performance alias syntax., defaults to `%(title)` +nscp_tasksched_detailsyntax | **Optional.** Detail level syntax, defaults to `%(folder)/%(title): %(exit_code) != 0` +nscp_tasksched_arguments | **Optional.** Additional arguments. +nscp_tasksched_showall | **Optional.** Shows more details in plugin output, default to false. +nscp_modules | **Optional.** An array of NSClient++ modules to load. Defaults to `[ "CheckTaskSched" ]`. + + +## Plugin Check Commands for Manubulon SNMP + +The `SNMP Manubulon Plugin Check Commands` provide configuration for plugin check +commands provided by the [SNMP Manubulon project](http://nagios.manubulon.com/index_snmp.html). + +**Note:** Some plugin parameters are only available in Debian packages or in a +[forked repository](https://github.com/dnsmichi/manubulon-snmp) with patches applied. + +The SNMP manubulon plugin check commands assume that the global constant named `ManubulonPluginDir` +is set to the path where the Manubublon SNMP plugins are installed. + +You can enable these plugin check commands by adding the following the include directive in your +[icinga2.conf](04-configuration.md#icinga2-conf) configuration file: + + include + +### Checks by Host Type + +**N/A** : Not available for this type. + +**SNMP** : Available for simple SNMP query. + +**??** : Untested. + +**Specific** : Script name for platform specific checks. + + + Host type | Interface | storage | load/cpu | mem | process | env | specific + ------------------------|------------|----------|-----------|-----|----------|-----|------------------------- + Linux | Yes | Yes | Yes | Yes | Yes | No | + Windows | Yes | Yes | Yes | Yes | Yes | No | check_snmp_win.pl + Cisco router/switch | Yes | N/A | Yes | Yes | N/A | Yes | + HP router/switch | Yes | N/A | Yes | Yes | N/A | No | + Bluecoat proxy | Yes | SNMP | Yes | SNMP| No | Yes | + CheckPoint on SPLAT | Yes | Yes | Yes | Yes | Yes | No | check_snmp_cpfw.pl + CheckPoint on Nokia IP | Yes | Yes | Yes | No | ?? | No | check_snmp_vrrp.pl + Boostedge | Yes | Yes | Yes | Yes | ?? | No | check_snmp_boostedge.pl + AS400 | Yes | Yes | Yes | Yes | No | No | + NetsecureOne Netbox | Yes | Yes | Yes | ?? | Yes | No | + Radware Linkproof | Yes | N/A | SNMP | SNMP| No | No | check_snmp_linkproof_nhr
check_snmp_vrrp.pl + IronPort | Yes | SNMP | SNMP | SNMP| No | Yes | + Cisco CSS | Yes | ?? | Yes | Yes | No | ?? | check_snmp_css.pl + + +### snmp-env + +Check command object for the [check_snmp_env.pl](http://nagios.manubulon.com/snmp_env.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + + +Name | Description +------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol| **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_env_type | **Optional.** Environment Type [cisco|nokia|bc|iron|foundry|linux]. Defaults to "cisco". +snmp_env_fan | **Optional.** Minimum fan rpm value (only needed for 'iron' & 'linux') +snmp_env_celsius | **Optional.** Maximum temp in degrees celsius (only needed for 'iron' & 'linux') +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. + +### snmp-load + +Check command object for the [check_snmp_load.pl](http://nagios.manubulon.com/snmp_load.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + + +Name | Description +------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol| **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_warn | **Optional.** The warning threshold. Change the `snmp_load_type` var to "netsl" for using 3 values. +snmp_crit | **Optional.** The critical threshold. Change the `snmp_load_type` var to "netsl" for using 3 values. +snmp_load_type | **Optional.** Load type. Defaults to "stand". Check all available types in the [snmp load](http://nagios.manubulon.com/snmp_load.html) documentation. +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. + +### snmp-memory + +Check command object for the [check_snmp_mem.pl](http://nagios.manubulon.com/snmp_mem.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol| **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_warn | **Optional.** The warning threshold. +snmp_crit | **Optional.** The critical threshold. +snmp_is_cisco | **Optional.** Change OIDs for Cisco switches. Defaults to false. +snmp_is_hp | **Optional.** Change OIDs for HP/Procurve switches. Defaults to false. +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_memcached | **Optional.** Include cached memory in used memory, Defaults to false. +snmp_membuffer | **Optional.** Exclude buffered memory in used memory, Defaults to false. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. + +### snmp-storage + +Check command object for the [check_snmp_storage.pl](http://nagios.manubulon.com/snmp_storage.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol| **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_warn | **Optional.** The warning threshold. +snmp_crit | **Optional.** The critical threshold. +snmp_storage_name | **Optional.** Storage name. Default to regex "^/$$". More options available in the [snmp storage](http://nagios.manubulon.com/snmp_storage.html) documentation. +snmp_storage_type | **Optional.** Filter by storage type. Valid options are Other, Ram, VirtualMemory, FixedDisk, RemovableDisk, FloppyDisk, CompactDisk, RamDisk, FlashMemory, or NetworkDisk. No value defined as default. +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_exclude | **Optional.** Select all storages except the one(s) selected by -m. No action on storage type selection. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. +snmp_storage_olength | **Optional.** Max-size of the SNMP message, usefull in case of Too Long responses. + +### snmp-interface + +Check command object for the [check_snmp_int.pl](http://nagios.manubulon.com/snmp_int.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol | **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_warn | **Optional.** The warning threshold. +snmp_crit | **Optional.** The critical threshold. +snmp_interface | **Optional.** Network interface name. Default to regex "eth0". +snmp_interface_inverse | **Optional.** Inverse Interface check, down is ok. Defaults to false as it is missing. +snmp_interface_perf | **Optional.** Check the input/output bandwidth of the interface. Defaults to true. +snmp_interface_label | **Optional.** Add label before speed in output: in=, out=, errors-out=, etc. +snmp_interface_bits_bytes | **Optional.** Output performance data in bits/s or Bytes/s. **Depends** on snmp_interface_kbits set to true. Defaults to true. +snmp_interface_percent | **Optional.** Output performance data in % of max speed. Defaults to false. +snmp_interface_kbits | **Optional.** Make the warning and critical levels in KBits/s. Defaults to true. +snmp_interface_megabytes | **Optional.** Make the warning and critical levels in Mbps or MBps. **Depends** on snmp_interface_kbits set to true. Defaults to true. +snmp_interface_64bit | **Optional.** Use 64 bits counters instead of the standard counters when checking bandwidth & performance data for interface >= 1Gbps. Defaults to false. +snmp_interface_errors | **Optional.** Add error & discard to Perfparse output. Defaults to true. +snmp_interface_extended_checks | **Optional.** Also check the error and discard input/output. When enabled format of `snmp_warn` and `snmp_crit` changes to ,,,,,. More options available in the [snmp interface](http://nagios.manubulon.com/snmp_int.html) documentation. Defaults to false. +snmp_interface_noregexp | **Optional.** Do not use regexp to match interface name in description OID. Defaults to false. +snmp_interface_delta | **Optional.** Delta time of perfcheck. Defaults to "300" (5 min). +snmp_interface_warncrit_percent | **Optional.** Make the warning and critical levels in % of reported interface speed. If set, **snmp_interface_megabytes** needs to be set to false. Defaults to false. +snmp_interface_ifname | **Optional.** Switch from IF-MIB::ifDescr to IF-MIB::ifName when looking up the interface's name. +snmp_interface_ifalias | **Optional.** Switch from IF-MIB::ifDescr to IF-MIB::ifAlias when looking up the interface's name. +snmp_interface_weathermap | **Optional.** Output data for ["weathermap" lines](http://docs.nagvis.org/1.9/en_US/lines_weathermap_style.html) in NagVis. **Depends** on `snmp_interface_perf` set to true. Defaults to `false`. **Note**: Available in `check_snmp_int.pl v2.1.0`. +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. +snmp_interface_admin | **Optional.** Use administrative status instead of operational. Defaults to false. + +### snmp-process + +Check command object for the [check_snmp_process.pl](http://nagios.manubulon.com/snmp_process.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol | **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default.. +snmp_warn | **Optional.** The warning threshold. +snmp_crit | **Optional.** The critical threshold. +snmp_process_name | **Optional.** Name of the process (regexp). No trailing slash!. Defaults to ".*". +snmp_perf | **Optional.** Enable perfdata values. Defaults to true. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. +snmp_process_use_params | **Optional.** Add process parameters to process name for regexp matching. Example: "named.*-t /var/named/chroot" will only select named process with this parameter. Defaults to false. +snmp_process_use_fullpath | **Optional.** Use full path name instead of process name to select processes. Example: "/opt/app1/app1bin" will only select named process with this full path. Defaults to false. +snmp_process_mem_usage | **Optional.** Define to check memory usage for the process. Defaults to false. +snmp_process_mem_threshold | **Optional.** Defines the warning and critical thresholds in Mb when snmp_process_mem_usage set to true. Example "512,1024". Defaults to "0,0". +snmp_process_cpu_usage | **Optional.** Define to check CPU usage for the process. Defaults to false. +snmp_process_cpu_threshold | **Optional.** Defines the warning and critical thresholds in % when snmp_process_cpu_usage set to true. If more than one CPU, value can be > 100% : 100%=1 CPU. Example "15,50". Defaults to "0,0". + +### snmp-service + +Check command object for the [check_snmp_win.pl](http://nagios.manubulon.com/snmp_windows.html) plugin. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------|-------------- +snmp_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +snmp_nocrypt | **Optional.** Define SNMP encryption. If set to `false`, `snmp_v3` needs to be enabled. Defaults to `true` (no encryption). +snmp_community | **Optional.** The SNMP community. Defaults to "public". +snmp_port | **Optional.** The SNMP port connection. +snmp_v2 | **Optional.** SNMP version to 2c. Defaults to false. +snmp_v3 | **Optional.** SNMP version to 3. Defaults to false. +snmp_login | **Optional.** SNMP version 3 username. Defaults to "snmpuser". +snmp_password | **Required.** SNMP version 3 password. No value defined as default. +snmp_v3_use_privpass | **Optional.** Define to use SNMP version 3 priv password. Defaults to false. +snmp_v3_use_authprotocol | **Optional.** Define to use SNMP version 3 authentication protocol. Defaults to false. +snmp_authprotocol | **Optional.** SNMP version 3 authentication protocol. Defaults to "md5,des". +snmp_privpass | **Required.** SNMP version 3 priv password. No value defined as default. +snmp_timeout | **Optional.** The command timeout in seconds. Defaults to 5 seconds. +snmp_service_name | **Optional.** Comma separated names of services (perl regular expressions can be used for every one). By default, it is not case sensitive. eg. ^dns$. Defaults to ".*". +snmp_service_count | **Optional.** Compare matching services with a specified number instead of the number of names provided. +snmp_service_showall | **Optional.** Show all services in the output, instead of only the non-active ones. Defaults to false. +snmp_service_noregexp | **Optional.** Do not use regexp to match NAME in service description. Defaults to false. + + +## Contributed Plugin Check Commands + +The contributed Plugin Check Commands provides various additional command definitions +contributed by community members. + +These check commands assume that the global constant named `PluginContribDir` +is set to the path where the user installs custom plugins and can be enabled by +uncommenting the corresponding line in [icinga2.conf](04-configuration.md#icinga2-conf): + +``` +vim /etc/icinga2/icinga2.conf + +include +``` + +This is enabled by default since Icinga 2 2.5.0. + +### Big Data + +This category contains plugins for various Big Data systems. + +#### cloudera_service_status + +The [cloudera_service_status](https://github.com/miso231/icinga2-cloudera-plugin) plugin +uses Cloudera Manager API to monitor cluster services + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------|----------------------------------------------------------------- +cloudera_host | **Required.** Hostname of cloudera server. +cloudera_port | **Optional.** Port where cloudera is listening. Defaults to 443. +cloudera_user | **Required.** The username for the API connection. +cloudera_pass | **Required.** The password for the API connection. +cloudera_api_version | **Required.** API version of cloudera. +cloudera_cluster | **Required.** The cluster name in cloudera manager. +cloudera_service | **Required.** Name of cluster service to be checked. +cloudera_verify_ssl | **Optional.** Verify SSL. Defaults to true. + +#### cloudera_hdfs_space + +The [cloudera_hdfs_space](https://github.com/miso231/icinga2-cloudera-plugin) plugin +connects to Hadoop Namenode and gets used capacity of selected disk + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------|----------------------------------------------------------------- +cloudera_hdfs_space_host | **Required.** Namenode host to connect to. +cloudera_hdfs_space_port | **Optional.** Namenode port (default 50070). +cloudera_hdfs_space_disk | **Required.** HDFS disk to check. +cloudera_hdfs_space_warn | **Required.** Warning threshold in percent. +cloudera_hdfs_space_crit | **Required.** Critical threshold in percent. + +#### cloudera_hdfs_files + +The [cloudera_hdfs_files](https://github.com/miso231/icinga2-cloudera-plugin) plugin +connects to Hadoop Namenode and gets total number of files on HDFS + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------|----------------------------------------------------------------- +cloudera_hdfs_files_host | **Required.** Namenode host to connect to. +cloudera_hdfs_files_port | **Optional.** Namenode port (default 50070). +cloudera_hdfs_files_warn | **Required.** Warning threshold. +cloudera_hdfs_files_crit | **Required.** Critical threshold. +cloudera_hdfs_files_max | **Required.** Max files count that causes problems (default 140,000,000). + +### Databases + +This category contains plugins for various database servers. + +#### db2_health + +The [check_db2_health](https://labs.consol.de/nagios/check_db2_health/) plugin +uses the `DBD::DB2` Perl library to monitor a [DB2](https://www.ibm.com/support/knowledgecenter/SSEPGG_11.1.0/) +database. + +The Git repository is located on [GitHub](https://github.com/lausser/check_db2_health). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +db2_health_database | **Required.** The name of the database. (If it was catalogued locally, this parameter and `db2_health_not_catalogued = false` are the only you need. Otherwise you must specify database, hostname and port) +db2_health_username | **Optional.** The username for the database connection. +db2_health_password | **Optional.** The password for the database connection. +db2_health_port | **Optional.** The port where DB2 is listening. +db2_health_warning | **Optional.** The warning threshold depending on the mode. +db2_health_critical | **Optional.** The critical threshold depending on the mode. +db2_health_mode | **Required.** The mode uses predefined keywords for the different checks. For example "connection-time", "database-usage" or "sql". +db2_health_method | **Optional.** This tells the plugin how to connect to the database. The only method implemented yet is “dbi” which is the default. (It means, the plugin uses the perl module DBD::DB2). +db2_health_name | **Optional.** The tablespace, datafile, wait event, latch, enqueue depending on the mode or SQL statement to be executed with "db2_health_mode" sql. +db2_health_name2 | **Optional.** If "db2_health_name" is a sql statement, "db2_health_name2" can be used to appear in the output and the performance data. +db2_health_regexp | **Optional.** If set to true, "db2_health_name" will be interpreted as a regular expression. Defaults to false. +db2_health_units | **Optional.** This is used for a better output of mode=sql and for specifying thresholds for mode=tablespace-free. Possible values are "%", "KB", "MB" and "GB". +db2_health_maxinactivity | **Optional.** Used for the maximum amount of time a certain event has not happened. +db2_health_mitigation | **Optional.** Classifies the severity of an offline tablespace. +db2_health_lookback | **Optional.** How many days in the past db2_health check should look back to calculate exitcode. +db2_health_report | **Optional.** Report can be used to output only the bad news. Possible values are "short", "long", "html". Defaults to `short`. +db2_health_not_catalogued | **Optional.** Set this variable to false if you want to use a catalogued locally database. Defaults to `true`. +db2_health_env_db2_home | **Required.** Specifies the location of the db2 client libraries as environment variable `DB2_HOME`. Defaults to "/opt/ibm/db2/V10.5". +db2_health_env_db2_version | **Optional.** Specifies the DB2 version as environment variable `DB2_VERSION`. + +#### mssql_health + +The [check_mssql_health](https://labs.consol.de/nagios/check_mssql_health/index.html) plugin +uses the `DBD::Sybase` Perl library based on [FreeTDS](https://www.freetds.org/) to monitor a +[MS SQL](https://www.microsoft.com/en-us/sql-server/) server. + +The Git repository is located on [GitHub](https://github.com/lausser/check_mssql_health). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +mssql_health_hostname | **Optional.** Specifies the database hostname or address. No default because you typically use "mssql_health_server". +mssql_health_username | **Optional.** The username for the database connection. +mssql_health_password | **Optional.** The password for the database connection. +mssql_health_port | **Optional.** Specifies the database port. No default because you typically use "mssql_health_server". +mssql_health_server | **Optional.** The name of a predefined connection (in freetds.conf). +mssql_health_currentdb | **Optional.** The name of a database which is used as the current database for the connection. +mssql_health_offlineok | **Optional.** Set this to true if offline databases are perfectly ok for you. Defaults to false. +mssql_health_nooffline | **Optional.** Set this to true to ignore offline databases. Defaults to false. +mssql_health_dbthresholds | **Optional.** With this parameter thresholds are read from the database table check_mssql_health_thresholds. +mssql_health_notemp | **Optional.** Set this to true to ignore temporary databases/tablespaces. Defaults to false. +mssql_health_commit | **Optional.** Set this to true to turn on autocommit for the dbd::sybase module. Defaults to false. +mssql_health_method | **Optional.** How the plugin should connect to the database (dbi for the perl module `DBD::Sybase` (default) and `sqlrelay` for the SQLRelay proxy). +mssql_health_mode | **Required.** The mode uses predefined keywords for the different checks. For example "connection-time", "database-free" or "sql". +mssql_health_regexp | **Optional.** If set to true, "mssql_health_name" will be interpreted as a regular expression. Defaults to false. +mssql_health_warning | **Optional.** The warning threshold depending on the mode. +mssql_health_critical | **Optional.** The critical threshold depending on the mode. +mssql_health_warningx | **Optional.** A possible override for the warning threshold. +mssql_health_criticalx | **Optional.** A possible override for the critical threshold. +mssql_health_units | **Optional.** This is used for a better output of mode=sql and for specifying thresholds for mode=tablespace-free. Possible values are "%", "KB", "MB" and "GB". +mssql_health_name | **Optional.** Depending on the mode this could be the database name or a SQL statement. +mssql_health_name2 | **Optional.** If "mssql_health_name" is a sql statement, "mssql_health_name2" can be used to appear in the output and the performance data. +mssql_health_name3 | **Optional.** Additional argument used for 'database-file-free' mode for example. +mssql_health_extraopts | **Optional.** Read command line arguments from an external file. +mssql_health_blacklist | **Optional.** Blacklist some (missing/failed) components +mssql_health_mitigation | **Optional.** The parameter allows you to change a critical error to a warning. +mssql_health_lookback | **Optional.** The amount of time you want to look back when calculating average rates. +mssql_health_environment | **Optional.** Add a variable to the plugin's environment. +mssql_health_negate | **Optional.** Emulate the negate plugin. --negate warning=critical --negate unknown=critical. +mssql_health_morphmessage | **Optional.** Modify the final output message. +mssql_health_morphperfdata | **Optional.** The parameter allows you to change performance data labels. +mssql_health_selectedperfdata | **Optional.** The parameter allows you to limit the list of performance data. +mssql_health_report | **Optional.** Report can be used to output only the bad news. Possible values are "short", "long", "html". Defaults to `short`. +mssql_health_multiline | **Optional.** Multiline output. +mssql_health_withmymodulesdyndir | **Optional.** Add-on modules for the my-modes will be searched in this directory. +mssql_health_statefilesdir | **Optional.** An alternate directory where the plugin can save files. +mssql_health_isvalidtime | **Optional.** Signals the plugin to return OK if now is not a valid check time. +mssql_health_timeout | **Optional.** Plugin timeout. Defaults to 15s. + +#### mysql_health + +The [check_mysql_health](https://labs.consol.de/nagios/check_mysql_health/index.html) plugin +uses the `DBD::MySQL` Perl library to monitor a +[MySQL](https://dev.mysql.com/downloads/mysql/) or [MariaDB](https://mariadb.org/about/) database. + +The Git repository is located on [GitHub](https://github.com/lausser/check_mysql_health). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +mysql_health_hostname | **Required.** Specifies the database hostname or address. Defaults to "$address$" or "$address6$" if the `address` attribute is not set. +mysql_health_port | **Optional.** Specifies the database port. Defaults to 3306 (or 1186 for "mysql_health_mode" cluster). +mysql_health_socket | **Optional.** Specifies the database unix socket. No default. +mysql_health_username | **Optional.** The username for the database connection. +mysql_health_password | **Optional.** The password for the database connection. +mysql_health_database | **Optional.** The database to connect to. Defaults to information_schema. +mysql_health_warning | **Optional.** The warning threshold depending on the mode. +mysql_health_critical | **Optional.** The critical threshold depending on the mode. +mysql_health_warningx | **Optional.** The extended warning thresholds depending on the mode. +mysql_health_criticalx | **Optional.** The extended critical thresholds depending on the mode. +mysql_health_mode | **Required.** The mode uses predefined keywords for the different checks. For example "connection-time", "slave-lag" or "sql". +mysql_health_method | **Optional.** How the plugin should connect to the database (`dbi` for using DBD::Mysql (default), `mysql` for using the mysql-Tool). +mysql_health_commit | **Optional.** Turns on autocommit for the dbd::\* module. +mysql_health_notemp | **Optional.** Ignore temporary databases/tablespaces. +mysql_health_nooffline | **Optional.** Skip the offline databases. +mysql_health_regexp | **Optional.** Parameter name/name2/name3 will be interpreted as (perl) regular expression. +mysql_health_name | **Optional.** The name of a specific component to check. +mysql_health_name2 | **Optional.** The secondary name of a component. +mysql_health_name3 | **Optional.** The tertiary name of a component. +mysql_health_units | **Optional.** This is used for a better output of mode=sql and for specifying thresholds for mode=tablespace-free. Possible values are "%", "KB", "MB" and "GB". +mysql_health_labelformat | **Optional.** One of those formats pnp4nagios or groundwork. Defaults to pnp4nagios. +mysql_health_extraopts | **Optional.** Read command line arguments from an external file. +mysql_health_blacklist | **Optional.** Blacklist some (missing/failed) components +mysql_health_mitigation | **Optional.** The parameter allows you to change a critical error to a warning. +mysql_health_lookback | **Optional.** The amount of time you want to look back when calculating average rates. +mysql_health_environment | **Optional.** Add a variable to the plugin's environment. +mysql_health_morphmessage | **Optional.** Modify the final output message. +mysql_health_morphperfdata | **Optional.** The parameter allows you to change performance data labels. +mysql_health_selectedperfdata | **Optional.** The parameter allows you to limit the list of performance data. +mysql_health_report | **Optional.** Can be used to shorten the output. +mysql_health_multiline | **Optional.** Multiline output. +mysql_health_negate | **Optional.** Emulate the negate plugin. --negate warning=critical --negate unknown=critical. +mysql_health_withmymodulesdyndir | **Optional.** Add-on modules for the my-modes will be searched in this directory. +mysql_health_statefilesdir | **Optional.** An alternate directory where the plugin can save files. +mysql_health_isvalidtime | **Optional.** Signals the plugin to return OK if now is not a valid check time. +mysql_health_timeout | **Optional.** Plugin timeout. Defaults to 60s. + +#### oracle_health + +The [check_oracle_health](https://labs.consol.de/nagios/check_oracle_health/index.html) plugin +uses the `DBD::Oracle` Perl library to monitor an [Oracle](https://www.oracle.com/database/) database. + +The Git repository is located on [GitHub](https://github.com/lausser/check_oracle_health). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +oracle_health_connect | **Required.** Specifies the database connection string (from tnsnames.ora). +oracle_health_username | **Optional.** The username for the database connection. +oracle_health_password | **Optional.** The password for the database connection. +oracle_health_warning | **Optional.** The warning threshold depending on the mode. +oracle_health_critical | **Optional.** The critical threshold depending on the mode. +oracle_health_mode | **Required.** The mode uses predefined keywords for the different checks. For example "connection-time", "flash-recovery-area-usage" or "sql". +oracle_health_method | **Optional.** How the plugin should connect to the database (`dbi` for using DBD::Oracle (default), `sqlplus` for using the sqlplus-Tool). +oracle_health_name | **Optional.** The tablespace, datafile, wait event, latch, enqueue depending on the mode or SQL statement to be executed with "oracle_health_mode" sql. +oracle_health_name2 | **Optional.** If "oracle_health_name" is a sql statement, "oracle_health_name2" can be used to appear in the output and the performance data. +oracle_health_regexp | **Optional.** If set to true, "oracle_health_name" will be interpreted as a regular expression. Defaults to false. +oracle_health_units | **Optional.** This is used for a better output of mode=sql and for specifying thresholds for mode=tablespace-free. Possible values are "%", "KB", "MB" and "GB". +oracle_health_ident | **Optional.** If set to true, outputs instance and database names. Defaults to false. +oracle_health_commit | **Optional.** Set this to true to turn on autocommit for the dbd::oracle module. Defaults to false. +oracle_health_noperfdata | **Optional.** Set this to true if you want to disable perfdata. Defaults to false. +oracle_health_timeout | **Optional.** Plugin timeout. Defaults to 60s. +oracle_health_report | **Optional.** Select the plugin output format. Can be short or long. Defaults to long. +oracle_health_notemp | **Optional.** Set this to true to hide temporary and system tablespaces. Defaults to false. + +Environment Macros: + +Name | Description +--------------------|------------------------------------------------------------------------------------------------------------------------------------------ +ORACLE\_HOME | **Required.** Specifies the location of the oracle instant client libraries. Defaults to "/usr/lib/oracle/11.2/client64/lib". Can be overridden by setting the custom variable `oracle_home`. +LD\_LIBRARY\_PATH | **Required.** Specifies the location of the oracle instant client libraries for the run-time shared library loader. Defaults to "/usr/lib/oracle/11.2/client64/lib". Can be overridden by setting the custom variable `oracle_ld_library_path`. +TNS\_ADMIN | **Required.** Specifies the location of the tnsnames.ora including the database connection strings. Defaults to "/etc/icinga2/plugin-configs". Can be overridden by setting the custom variable `oracle_tns_admin`. + +#### postgres + +The [check_postgres](https://bucardo.org/wiki/Check_postgres) plugin +uses the `psql` binary to monitor a [PostgreSQL](https://www.postgresql.org/about/) database. + +The Git repository is located on [GitHub](https://github.com/bucardo/check_postgres). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +postgres_host | **Optional.** Specifies the database hostname or address. Defaults to "$address$" or "$address6$" if the `address` attribute is not set. If "postgres_unixsocket" is set to true, falls back to unix socket. +postgres_port | **Optional.** Specifies the database port. Defaults to 5432. +postgres_dbname | **Optional.** Specifies the database name to connect to. Defaults to "postgres" or "template1". +postgres_dbuser | **Optional.** The username for the database connection. Defaults to "postgres". +postgres_dbpass | **Optional.** The password for the database connection. You can use a .pgpass file instead. +postgres_dbservice | **Optional.** Specifies the service name to use inside of pg_service.conf. +postgres_warning | **Optional.** Specifies the warning threshold, range depends on the action. +postgres_critical | **Optional.** Specifies the critical threshold, range depends on the action. +postgres_include | **Optional.** Specifies name(s) items to specifically include (e.g. tables), depends on the action. +postgres_exclude | **Optional.** Specifies name(s) items to specifically exclude (e.g. tables), depends on the action. +postgres_includeuser | **Optional.** Include objects owned by certain users. +postgres_excludeuser | **Optional.** Exclude objects owned by certain users. +postgres_standby | **Optional.** Assume that the server is in continuous WAL recovery mode if set to true. Defaults to false. +postgres_production | **Optional.** Assume that the server is in production mode if set to true. Defaults to false. +postgres_action | **Required.** Determines the test executed. +postgres_unixsocket | **Optional.** If "postgres_unixsocket" is set to true, the unix socket is used instead of an address. Defaults to false. +postgres_query | **Optional.** Query for "custom_query" action. +postgres_valtype | **Optional.** Value type of query result for "custom_query". +postgres_reverse | **Optional.** If "postgres_reverse" is set, warning and critical values are reversed for "custom_query" action. +postgres_tempdir | **Optional.** Specify directory for temporary files. The default directory is dependent on the OS. More details [here](https://perldoc.perl.org/File/Spec.html). + +#### mongodb + +The [check_mongodb.py](https://github.com/mzupan/nagios-plugin-mongodb) plugin +uses the `pymongo` Python library to monitor a [MongoDB](https://docs.mongodb.com/manual/) instance. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|------------------------------------------------------------------------------------------------------------------------------ +mongodb_host | **Required.** Specifies the hostname or address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +mongodb_port | **Required.** The port mongodb is running on. +mongodb_user | **Optional.** The username you want to login as. +mongodb_passwd | **Optional.** The password you want to use for that user. +mongodb_authdb | **Optional.** The database you want to authenticate against. +mongodb_warning | **Optional.** The warning threshold we want to set. +mongodb_critical | **Optional.** The critical threshold we want to set. +mongodb_action | **Required.** The action you want to take. +mongodb_maxlag | **Optional.** Get max replication lag (for replication_lag action only). +mongodb_mappedmemory | **Optional.** Get mapped memory instead of resident (if resident memory can not be read). +mongodb_perfdata | **Optional.** Enable output of Nagios performance data. +mongodb_database | **Optional.** Specify the database to check. +mongodb_alldatabases | **Optional.** Check all databases (action database_size). +mongodb_ssl | **Optional.** Connect using SSL. +mongodb_replicaset | **Optional.** Connect to replicaset. +mongodb_replcheck | **Optional.** If set to true, will enable the mongodb_replicaset value needed for "replica_primary" check. +mongodb_querytype | **Optional.** The query type to check [query\|insert\|update\|delete\|getmore\|command] from queries_per_second. +mongodb_collection | **Optional.** Specify the collection to check. +mongodb_sampletime | **Optional.** Time used to sample number of pages faults. + +#### elasticsearch + +The [check_elasticsearch](https://github.com/anchor/nagios-plugin-elasticsearch) plugin +uses the HTTP API to monitor an [Elasticsearch](https://www.elastic.co/products/elasticsearch) node. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------|------------------------------------------------------------------------------------------------------- +elasticsearch_host | **Optional.** Hostname or network address to probe. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +elasticsearch_failuredomain | **Optional.** A comma-separated list of ElasticSearch attributes that make up your cluster's failure domain. +elasticsearch_masternodes | **Optional.** Issue a warning if the number of master-eligible nodes in the cluster drops below this number. By default, do not monitor the number of nodes in the cluster. +elasticsearch_port | **Optional.** TCP port to probe. The ElasticSearch API should be listening here. Defaults to 9200. +elasticsearch_prefix | **Optional.** Optional prefix (e.g. 'es') for the ElasticSearch API. Defaults to ''. +elasticsearch_yellowcritical | **Optional.** Instead of issuing a 'warning' for a yellow cluster state, issue a 'critical' alert. Defaults to false. + +#### redis + +The [check_redis.pl](https://github.com/willixix/naglio-plugins/blob/master/check_redis.pl) plugin +uses the `Redis` Perl library to monitor a [Redis](https://redis.io/) instance. The plugin can +measure response time, hitrate, memory utilization, check replication synchronization, etc. It is +also possible to test data in a specified key and calculate averages or summaries on ranges. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------------|-------------------------------------------------------------------------------------------------------------- +redis_hostname | **Required.** Hostname or IP Address to check. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +redis_port | **Optional.** Port number to query. Default to "6379". +redis_database | **Optional.** Database name (usually a number) to query, needed for **redis_query**. +redis_password | **Optional.** Password for Redis authentication. Safer alternative is to put them in a file and use **redis_credentials**. +redis_credentials | **Optional.** Credentials file to read for Redis authentication. +redis_timeout | **Optional.** Allows to set timeout for execution of this plugin. +redis_variables | **Optional.** List of variables from info data to do threshold checks on. +redis_warn | **Optional.** This option can only be used if **redis_variables** is used and the number of values listed here must exactly match number of variables specified. +redis_crit | **Optional.** This option can only be used if **redis_variables** is used and the number of values listed here must exactly match number of variables specified. +redis_perfparse | **Optional.** This should only be used with variables and causes variable data not only to be printed as part of main status line but also as perfparse compatible output. Defaults to false. +redis_perfvars | **Optional.** This allows to list variables which values will go only into perfparse output (and not for threshold checking). +redis_prev_perfdata | **Optional.** If set to true, previous performance data are used to calculate rate of change for counter statistics variables and for proper calculation of hitrate. Defaults to false. +redis_rate_label | **Optional.** Prefix or Suffix label used to create a new variable which has rate of change of another base variable. You can specify PREFIX or SUFFIX or both as one string separated by ",". Default if not specified is suffix "_rate". +redis_query | **Optional.** Option specifies key to query and optional variable name to assign the results to after. +redis_option | **Optional.** Specifiers are separated by "," and must include NAME or PATTERN. +redis_response_time | **Optional.** If this is used, plugin will measure and output connection response time in seconds. With **redis_perfparse** this would also be provided on perf variables. +redis_hitrate | **Optional.** Calculates Hitrate and specify values are interpreted as WARNING and CRITICAL thresholds. +redis_memory_utilization | **Optional.** This calculates percent of total memory on system used by redis. Total_memory on server must be specified with **redis_total_memory**. If you specify by itself, the plugin will just output this info. Parameter values are interpreted as WARNING and CRITICAL thresholds. +redis_total_memory | **Optional.** Amount of memory on a system for memory utilization calculation. Use system memory or max_memory setting of redis. +redis_replication_delay | **Optional.** Allows to set threshold on replication delay info. + +#### proxysql + +The [check_proxysql](https://github.com/sysown/proxysql-nagios) plugin, +uses the `proxysql` binary to monitor [proxysql](https://proxysql.com/). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------|---------------------------------------------------------------------------------- +proxysql_user | **Optional.** ProxySQL admin username (default=admin) +proxysql_password | **Optional.** ProxySQL admin password (default=admin) +proxysql_host | **Optional.** ProxySQL hostname / IP (default=127.0.0.1) +proxysql_port | **Optional.** ProxySQL admin port (default=6032) +proxysql_defaultfile | **Optional.** ProxySQL defaults file +proxysql_type | **Required.** ProxySQL check type (one of conns,hg,rules,status,var) +proxysql_name | **Optional.** ProxySQL variable name to check +proxysql_lower | **Optional.** Alert if ProxySQL value are LOWER than defined WARN / CRIT thresholds (only applies to 'var' check type) +proxysql_runtime | **Optional.** Force ProxySQL Nagios check to query the runtime_mysql_XXX tables rather than the mysql_XXX tables +proxysql_warning | **Optional.** Warning threshold +proxysql_critical | **Optional.** Critical threshold +proxysql\_include\_hostgroup | **Optional.** ProxySQL hostgroup(s) to include (only applies to '--type hg' checks, accepts comma-separated list) +proxysql\_ignore\_hostgroup | **Optional.** ProxySQL hostgroup(s) to ignore (only applies to '--type hg' checks, accepts comma-separated list) + +#### memcached + +The [check_memcached](https://exchange.icinga.com/exchange/check_memcached) plugin +checks the health of a running [memcached](https://memcached.org/) service. + +On Debian/Ubuntu, it is provided with the `nagios-plugin-contrib` package. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|---------------------------------------------------------------------------------- +memcached_hostname | **Required.** Hostname or IP address (required) optional ':port' overrides -p +memcached_port | **Optional.** Port number (default: 11211) +memcached_verbose | **Optional.** verbose messages +memcached_keep | **Optional.** Keep up to this many items in the history object in memcached (default: 30) +memcached_minimum_stat_interval | **Optional.** Minimum time interval (in minutes) to use to analyse stats. (default: 30) +memcached_warning_hits_misses | **Optional.** Generate warning if quotient of hits/misses falls below this value (default: 2.0) +memcached_warning_evictions | **Optional.** Generate warning if number of evictions exceeds this threshold. 0=disable. (default: 10) +memcached_timeout | **Optional.** timeout in seconds (default: 1.0) +memcached_key | **Optional.** key name for history object (default: check_memcached) +memcached_expiry | **Optional.** expiry time in seconds for history object (default: 7200) +memcached_performance_output | **Optional.** output performance statistics as rate-per-minute figures (better suited to pnp4nagios) + +### Hardware + +This category includes all plugin check commands for various hardware checks. + +#### hpasm + +The [check_hpasm](https://labs.consol.de/de/nagios/check_hpasm/index.html) plugin +monitors the hardware health of HP Proliant Servers, provided that the `hpasm` +(HP Advanced Server Management) software is installed. It is also able to monitor +the system health of HP Bladesystems and storage systems. + +The plugin can run in two different ways: + +1. Local execution using the `hpasmcli` command line tool. +2. Remote SNMP query which invokes the HP Insight Tools on the remote node. + +You can either set or omit `hpasm_hostname` custom variable and select the corresponding node. + +The `hpasm_remote` attribute enables the plugin to execute remote SNMP queries if set to `true`. +For compatibility reasons this attribute uses `true` as default value, and ensures that +specifying the `hpasm_hostname` always enables remote checks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------------------------- +hpasm_hostname | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +hpasm_community | **Optional.** SNMP community of the server (SNMP v1/2 only). +hpasm_protocol | **Optional.** The SNMP protocol to use (default: 2c, other possibilities: 1,3). +hpasm_port | **Optional.** The SNMP port to use (default: 161). +hpasm_blacklist | **Optional.** Blacklist some (missing/failed) components. +hpasm_ignore-dimms | **Optional.** Ignore "N/A"-DIMM status on misc. servers (e.g. older DL320). +hpasm_ignore-fan-redundancy | **Optional.** Ignore missing redundancy partners. +hpasm_customthresholds | **Optional.** Use custom thresholds for certain temperatures. +hpasm_eventrange | **Optional.** Period of time before critical IML events respectively become warnings or vanish. A range is described as a number and a unit (s, m, h, d), e.g. --eventrange 1h/20m. +hpasm_perfdata | **Optional.** Output performance data. If your performance data string becomes too long and is truncated by Nagios, then you can use --perfdata=short instead. This will output temperature tags without location information. +hpasm_username | **Optional.** The securityName for the USM security model (SNMPv3 only). +hpasm_authpassword | **Optional.** The authentication password for SNMPv3. +hpasm_authprotocol | **Optional.** The authentication protocol for SNMPv3 (md5\|sha). +hpasm_privpassword | **Optional.** The password for authPriv security level. +hpasm_privprotocol | **Optional.** The private protocol for SNMPv3 (des\|aes\|aes128\|3des\|3desde). +hpasm_servertype | **Optional.** The type of the server: proliant (default) or bladesystem. +hpasm_eval-nics | **Optional.** Check network interfaces (and groups). Try it and report me whyt you think about it. I need to build up some know how on this subject. If you get an error and think, it is not justified for your configuration, please tell me about it. (always send the output of "snmpwalk -On .... 1.3.6.1.4.1.232" and a description how you setup your nics and why it is correct opposed to the plugins error message. +hpasm_remote | **Optional.** Run remote SNMP checks if enabled. Otherwise checks are executed locally using the `hpasmcli` binary. Defaults to `true`. + +#### openmanage + +The [check_openmanage](http://folk.uio.no/trondham/software/check_openmanage.html) plugin +checks the hardware health of Dell PowerEdge (and some PowerVault) servers. +It uses the Dell OpenManage Server Administrator (OMSA) software, which must be running on +the monitored system. check_openmanage can be used remotely with SNMP or locally with icinga2 agent, +check_by_ssh or similar, whichever suits your needs and particular taste. + +The plugin checks the health of the storage subsystem, power supplies, memory modules, +temperature probes etc., and gives an alert if any of the components are faulty or operate outside normal parameters. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------------------------- +openmanage_all | **Optional.** Check everything, even log content +openmanage_blacklist | **Optional.** Blacklist missing and/or failed components +openmanage_check | **Optional.** Fine-tune which components are checked +openmanage_community | **Optional.** SNMP community string [default=public] +openmanage_config | **Optional.** Specify configuration file +openmanage_critical | **Optional.** Custom temperature critical limits +openmanage_extinfo | **Optional.** Append system info to alerts +openmanage_fahrenheit | **Optional.** Use Fahrenheit as temperature unit +openmanage_hostname | **Optional.** Hostname or IP (required for SNMP) +openmanage_htmlinfo | **Optional.** HTML output with clickable links +openmanage_info | **Optional.** Prefix any alerts with the service tag +openmanage_ipv6 | **Optional.** Use IPv6 instead of IPv4 [default=no] +openmanage_legacy_perfdata | **Optional.** Legacy performance data output +openmanage_no_storage | **Optional.** Don't check storage +openmanage_only | **Optional.** Only check a certain component or alert type +openmanage_perfdata | **Optional.** Output performance data [default=no] +openmanage_port | **Optional.** SNMP port number [default=161] +openmanage_protocol | **Optional.** SNMP protocol version [default=2c] +openmanage_short_state | **Optional.** Prefix alerts with alert state abbreviated +openmanage_show_blacklist | **Optional.** Show blacklistings in OK output +openmanage_state | **Optional.** Prefix alerts with alert state +openmanage_tcp | **Optional.** Use TCP instead of UDP [default=no] +openmanage_timeout | **Optional.** Plugin timeout in seconds [default=30] +openmanage_vdisk_critical | **Optional.** Make any alerts on virtual disks critical +openmanage_warning | **Optional.** Custom temperature warning limits + +#### lmsensors + +The [check_lmsensors](https://github.com/jackbenny/check_temp) plugin, +uses the `lm-sensors` binary to monitor temperature sensors. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------- +lmsensors_warning | **Required.** Exit with WARNING status if above INTEGER degrees +lmsensors_critical | **Required.** Exit with CRITICAL status if above INTEGER degrees +lmsensors_sensor | **Optional.** Set what to monitor, for example CPU or MB (or M/B). Check sensors for the correct word. Default is CPU. + +#### hddtemp + +The [check_hddtemp](https://github.com/vint21h/nagios-check-hddtemp) plugin, +uses the `hddtemp` binary to monitor hard drive temperature. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------- +hddtemp_server | **Required.** server name or address +hddtemp_port | **Optional.** port number +hddtemp_devices | **Optional.** comma separated devices list, or empty for all devices in hddtemp response +hddtemp_separator | **Optional.** hddtemp separator +hddtemp_warning | **Required.** warning temperature +hddtemp_critical | **Required.** critical temperature +hddtemp_timeout | **Optional.** receiving data from hddtemp operation network timeout +hddtemp_performance | **Optional.** If set, return performance data +hddtemp_quiet | **Optional.** If set, be quiet + +The following sane default value are specified: +``` +vars.hddtemp_server = "127.0.0.1" +vars.hddtemp_warning = 55 +vars.hddtemp_critical = 60 +vars.hddtemp_performance = true +vars.hddtemp_timeout = 5 +``` + +#### adaptec-raid + +The [check_adaptec_raid](https://github.com/thomas-krenn/check_adaptec_raid) plugin +uses the `arcconf` binary to monitor Adaptec RAID controllers. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------------------------- +adaptec_controller_number | **Required.** Controller number to monitor. +arcconf_path | **Required.** Path to the `arcconf` binary, e.g. "/sbin/arcconf". + +#### lsi-raid + +The [check_lsi_raid](https://github.com/thomas-krenn/check_lsi_raid) plugin +uses the `storcli` binary to monitor MegaRAID RAID controllers. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------------------------- +lsi_controller_number | **Optional.** Controller number to monitor. +storcli_path | **Optional.** Path to the `storcli` binary, e.g. "/usr/sbin/storcli". +lsi_enclosure_id | **Optional.** Enclosure numbers to be checked, comma-separated. +lsi_ld_id | **Optional.** Logical devices to be checked, comma-separated. +lsi_pd_id | **Optional.** Physical devices to be checked, comma-separated. +lsi_temp_warning | **Optional.** RAID controller warning temperature. +lsi_temp_critical | **Optional.** RAID controller critical temperature. +lsi_pd_temp_warning | **Optional.** Disk warning temperature. +lsi_pd_temp_critical | **Optional.** Disk critical temperature. +lsi_bbu_temp_warning | **Optional.** Battery warning temperature. +lsi_bbu_temp_critical | **Optional.** Battery critical temperature. +lsi_cv_temp_warning | **Optional.** CacheVault warning temperature. +lsi_cv_temp_critical | **Optional.** CacheVault critical temperature. +lsi_ignored_media_errors | **Optional.** Warning threshold for media errors. +lsi_ignored_other_errors | **Optional.** Warning threshold for other errors. +lsi_ignored_predictive_fails | **Optional.** Warning threshold for predictive failures. +lsi_ignored_shield_counters | **Optional.** Warning threshold for shield counter. +lsi_ignored_bbm_counters | **Optional.** Warning threshold for BBM counter. +lsi_bbu | **Optional.** Define if BBU is present and it's state should be checked. +lsi_noenclosures | **Optional.** If set to true, does not check enclosures. +lsi_nosudo | **Optional.** If set to true, does not use sudo when running storcli. +lsi_nocleanlogs | **Optional.** If set to true, does not clean up the log files after executing storcli checks. + + +#### smart-attributes + +The [check_smart_attributes](https://github.com/thomas-krenn/check_smart_attributes) plugin +uses the `smartctl` binary to monitor SMART values of SSDs and HDDs. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------------------------- +smart_attributes_config_path | **Required.** Path to the smart attributes config file (e.g. check_smartdb.json). +smart_attributes_device | **Required.** Device name (e.g. /dev/sda) to monitor. + + +### IcingaCLI + +This category includes all plugins using the icingacli provided by Icinga Web 2. + +The user running Icinga 2 needs sufficient permissions to read the Icinga Web 2 configuration directory. e.g. `usermod -a -G icingaweb2 icinga`. You need to restart, not reload Icinga 2 for the new group membership to work. + +#### Business Process + +This subcommand is provided by the [business process module](https://exchange.icinga.com/icinga/Business+Process) +and executed as `icingacli businessprocess` CLI command. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------------------|----------------------------------------------------------------------------------------- +icingacli_businessprocess_process | **Required.** Business process to monitor. +icingacli_businessprocess_config | **Optional.** Configuration file containing your business process without file extension. +icingacli_businessprocess_details | **Optional.** Get details for root cause analysis. Defaults to false. +icingacli_businessprocess_statetype | **Optional.** Define which state type to look at, `soft` or `hard`. Overrides the default value inside the businessprocess module, if configured. + +#### Director + +This subcommand is provided by the [director module](https://github.com/Icinga/icingaweb2-module-director) > 1.4.2 and executed as `icingacli director health check`. Please refer to the [documentation](https://github.com/Icinga/icingaweb2-module-director/blob/master/doc/60-CLI.md#health-check-plugin) for all available sub-checks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------------------|----------------------------------------------------------------------------------------- +icingacli_director_check | **Optional.** Run only a specific test suite. +icingacli_director_db | **Optional.** Use a specific Icinga Web DB resource. + +#### Elasticsearch + +This subcommand is provided by the [elasticsearch_module](https://github.com/Icinga/icingaweb2-module-elasticsearch) and executed as `icingacli elasticsearch check`. + +* The value of `icingacli_elasticsearch_instance` is the same like in the configuration of the module. +* The value of `icingacli_elasticsearch_filter` are filters for events in Icinga Web 2 syntax. e.g. `"beat.hostname=www.example.com" AND severity=critical` +* The thresholds are just numerical values. They get checked against how many events match the filter within the given timeframe. +* The value of `icingacli_elasticsearch_index` is an index pattern. e.g. `logstash*` + +Name | Description +------------------------------------------|----------------------------------------------------------------------------------------- +icingacli_elasticsearch_instance | **Required.** The Elasticsearch to connect to +icingacli_elasticsearch_index | **Required.** Index pattern to use when searching +icingacli_elasticsearch_critical | **Required.** Critical threshold +icingacli_elasticsearch_warning | **Required.** Warning threshold +icingacli_elasticsearch_filter | **Required.** Filter for events +icingacli_elasticsearch_from | **Optional.** Negative value of time to search from now (Default: -5m) + +#### x509 + +This subcommand is provided by the [x509 module](https://github.com/Icinga/icingaweb2-module-x509) and executed as `icingacli x509 check host`. Please refer to the [documentation](https://github.com/Icinga/icingaweb2-module-x509/blob/master/doc/10-Monitoring.md#host-check-command) for more information. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------------------|----------------------------------------------------------------------------------------- +icingacli_x509_ip | **Required.** A hosts IP address [or] +icingacli_x509_host | **Required.** A hosts name +icingacli_x509_port | **Optional.** The port to check in particular +icingacli_x509_warning | **Optional.** Less remaining time results in state WARNING (Default: 25%) +icingacli_x509_critical | **Optional.** Less remaining time results in state CRITICAL (Default: 10%) +icingacli_x509_allow_self_signed | **Optional.** Ignore if a certificate or its issuer has been self-signed (Default: false) + +### IPMI Devices + +This category includes all plugins for IPMI devices. + +#### ipmi-sensor + +The [check_ipmi_sensor](https://github.com/thomas-krenn/check_ipmi_sensor_v3) plugin +uses the `ipmimonitoring` binary to monitor sensor data for IPMI devices. Please +read the [documentation](https://www.thomas-krenn.com/en/wiki/IPMI_Sensor_Monitoring_Plugin) +for installation and configuration details. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|----------------------------------------------------------------------------------------------------- +ipmi_address | **Required.** Specifies the remote host (IPMI device) to check. Defaults to "$address$". +ipmi_config_file | **Optional.** Path to the FreeIPMI configuration file. It should contain IPMI username, IPMI password, and IPMI privilege-level. +ipmi_username | **Optional.** The IPMI username. +ipmi_password | **Optional.** The IPMI password. +ipmi_privilege_level | **Optional.** The IPMI privilege level of the IPMI user. +ipmi_backward_compatibility_mode | **Optional.** Enable backward compatibility mode, useful for FreeIPMI 0.5.\* (this omits FreeIPMI options "--quiet-cache" and "--sdr-cache-recreate"). +ipmi_sensor_type | **Optional.** Limit sensors to query based on IPMI sensor type. Examples for IPMI sensor types are 'Fan', 'Temperature' and 'Voltage'. +ipmi_sel_type | **Optional.** Limit SEL entries to specific types, run 'ipmi-sel -L' for a list of types. All sensors are populated to the SEL and per default all sensor types are monitored. +ipmi_exclude_sensor_id | **Optional.** Exclude sensor matching ipmi_sensor_id. +ipmi_exclude_sensor | **Optional.** Exclude sensor based on IPMI sensor type. (Comma-separated) +ipmi_exclude_sel | **Optional.** Exclude SEL entries of specific sensor types. (comma-separated list). +ipmi_sensor_id | **Optional.** Include sensor matching ipmi_sensor_id. +ipmi_protocol_lan_version | **Optional.** Change the protocol LAN version. Defaults to "LAN_2_0". +ipmi_number_of_active_fans | **Optional.** Number of fans that should be active. Otherwise a WARNING state is returned. +ipmi_show_fru | **Optional.** Print the product serial number if it is available in the IPMI FRU data. +ipmi_show_assettag | **Optional.** Print the assettag if it is available in the IPMI FRU data. (--fru is mandatory) +ipmi_show_board | **Optional.** Print additional motherboard informations if it is available in the IPMI FRU data. (--fru is mandatory) +ipmi_no_sel_checking | **Optional.** Turn off system event log checking via ipmi-sel. +ipmi_no_thresholds | **Optional.** Turn off performance data thresholds from output-sensor-thresholds. +ipmi_verbose | **Optional.** Be Verbose multi line output, also with additional details for warnings. +ipmi_debug | **Optional.** Be Verbose debugging output, followed by normal multi line output. +ipmi_unify_file | **Optional.** Path to the unify file to unify sensor names. + +#### ipmi-alive + +The `ipmi-alive` check commands allows you to create a ping check for the IPMI Interface. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------------|----------------------------------------------------------------------------------------------------- +ping_address | **Optional.** The address of the IPMI interface. Defaults to "$address$" if the IPMI interface's `address` attribute is set, "$address6$" otherwise. +ping_wrta | **Optional.** The RTA warning threshold in milliseconds. Defaults to 5000. +ping_wpl | **Optional.** The packet loss warning threshold in %. Defaults to 100. +ping_crta | **Optional.** The RTA critical threshold in milliseconds. Defaults to 5000. +ping_cpl | **Optional.** The packet loss critical threshold in %. Defaults to 100. +ping_packets | **Optional.** The number of packets to send. Defaults to 1. +ping_timeout | **Optional.** The plugin timeout in seconds. Defaults to 0 (no timeout). + + +### Log Management + +This category includes all plugins for log management, for example [Logstash](https://www.elastic.co/products/logstash). + +#### logstash + +The [logstash](https://github.com/NETWAYS/check_logstash) plugin connects to +the Node API of Logstash. This plugin requires at least Logstash version 5.0.x. + +The Node API is not activated by default. You have to configure your Logstash +installation in order to allow plugin connections. + +Name | Description +---------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +logstash_hostname | **Optional.** Hostname where Logstash is running. Defaults to `check_address` +logstash_port | **Optional.** Port where Logstash is listening for API requests. Defaults to 9600 +logstash_filedesc_warn | **Optional.** Warning threshold of file descriptor usage in percent. Defaults to 85 (percent). +logstash_filedesc_crit | **Optional.** Critical threshold of file descriptor usage in percent. Defaults to 95 (percent). +logstash_heap_warn | **Optional.** Warning threshold of heap usage in percent. Defaults to 70 (percent). +logstash_heap_crit | **Optional.** Critical threshold of heap usage in percent Defaults to 80 (percent). +logstash_inflight_warn | **Optional.** Warning threshold of inflight events. +logstash_inflight_crit | **Optional.** Critical threshold of inflight events. +logstash_cpu_warn | **Optional.** Warning threshold for cpu usage in percent. +logstash_cpu_crit | **Optional.** Critical threshold for cpu usage in percent. + +#### logfiles + +The [logfiles](https://labs.consol.de/nagios/check_logfiles/) plugin finds +specified patterns in log files. + +Name | Description +----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +logfiles_tag | **Optional.** A short unique descriptor for this search. It will appear in the output of the plugin and is used to separare the different services. +logfiles_logfile | **Optional.** This is the name of the log file you want to scan. +logfiles_rotation | **Optional.** This is the method how log files are rotated. One of the predefined methods or a regular expression, which helps identify the rotated archives. If this key is missing, check_logfiles assumes that the log file will be simply overwritten instead of rotated. +logfiles_critical_pattern | **Optional.** A regular expression which will trigger a critical error. +logfiles_warning_pattern | **Optional.** A regular expression which will trigger a warning error. +logfiles_critical_exception | **Optional.** A regular expression, the exceptions which are not counted as critical errors. +logfiles_warning_exception | **Optional.** A regular expression, the exceptions which are not counted as warning errors. +logfiles_ok_pattern | **Optional.** A regular expression which resets the error counters. +logfiles_no_protocol | **Optional.** Normally all the matched lines are written into a protocol file with this file’s name appearing in the plugin’s output. This option switches this off. +logfiles_syslog_server | **Optional.** With this option you limit the pattern matching to lines originating from the host check_logfiles is running on. +logfiles_syslog_client | **Optional.** With this option you limit the pattern matching to lines originating from the host named in this option. +logfiles_sticky | **Optional.** Errors are propagated through successive runs. +logfiles_unstick | **Optional.** Resets sticky errors. +logfiles_config | **Optional.** The name of a configuration file. +logfiles_configdir | **Optional.** The name of a configuration directory. Configfiles ending in .cfg or .conf are (recursively) imported. +logfiles_searches | **Optional.** A list of tags of those searches which are to be run. Using this parameter, not all searches listed in the config file are run, but only those selected. +logfiles_selectedsearches | **Optional.** A list of tags of those searches which are to be run. Using this parameter, not all searches listed in the config file are run, but only those selected. +logfiles_report | **Optional.** This option turns on multiline output (Default: off). The setting html generates a table which display the last hits in the service details view. Possible values are: short, long, html or off. +logfiles_max_length | **Optional.** With this parameter long lines are truncated (Default: off). Some programs (e.g. TrueScan) generate entries in the eventlog of such a length, that the output of the plugin becomes longer than 1024 characters. NSClient++ discards these. +logfiles_winwarncrit | **Optional.** With this parameter messages in the eventlog are classified by the type WARNING/ERROR (Default: off). Replaces or complements warning/criticalpattern. +logfiles_run_unique | **Optional.** This parameter prevents check_logfiles from starting when there’s already another instance using the same config file. (exits with UNKNOWN). +logfiles_timeout | **Optional.** This parameter causes an abort of a running search after a defined number of seconds. It is an aborted in a controlled manner, so that the lines which have been read so far, are used for the computation of the final result. +logfiles_warning | **Optional.** Complex handler-scripts can be provided with a warning-parameter this way. Inside the scripts the value is accessible as the macro CL_WARNING. +logfiles_critical | **Optional.** Complex handler-scripts can be provided with a critical-parameter this way. Inside the scripts the value is accessible as the macro CL_CRITICAL. + + +### Metrics + +This category includes all plugins for metric-based checks. + +#### graphite + +The [check_graphite](https://github.com/obfuscurity/nagios-scripts) plugin +uses the `rest-client` Ruby library to monitor a [Graphite](https://graphiteapp.org) instance. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------------|----------------------------------------------------------------------------------------------------- +graphite_url | **Required.** Target url. +graphite_metric | **Required.** Metric path string. +graphite_shortname | **Optional.** Metric short name (used for performance data). +graphite_duration | **Optional.** Length, in minute of data to parse (default: 5). +graphite_function | **Optional.** Function applied to metrics for thresholds (default: average). +graphite_warning | **Required.** Warning threshold. +graphite_critical | **Required.** Critical threshold. +graphite_units | **Optional.** Adds a text tag to the metric count in the plugin output. Useful to identify the metric units. Doesn't affect data queries. +graphite_message | **Optional.** Text message to output (default: "metric count:"). +graphite_zero_on_error | **Optional.** Return 0 on a graphite 500 error. +graphite_link_graph | **Optional.** Add a link in the plugin output, showing a 24h graph for this metric in graphite. + +### Network Components + +This category includes all plugins for various network components like routers, switches and firewalls. + +#### interfacetable + +The [check_interfacetable_v3t](http://www.tontonitch.com/tiki/tiki-index.php?page=Nagios+plugins+-+interfacetable_v3t) plugin +generates a html page containing information about the monitored node and all of its interfaces. + +The Git repository is located on [GitHub](https://github.com/Tontonitch/interfacetable_v3t). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------------|----------------------------------------------------------------------------------------------------- +interfacetable_hostquery | **Required.** Specifies the remote host to poll. Defaults to "$address$". +interfacetable_hostdisplay | **Optional.** Specifies the hostname to display in the HTML link. Defaults to "$host.display_name$". +interfacetable_regex | **Optional.** Interface names and property names for some other options will be interpreted as regular expressions. Defaults to false. +interfacetable_outputshort | **Optional.** Reduce the verbosity of the plugin output. Defaults to false. +interfacetable_exclude | **Optional.** Comma separated list of interfaces globally excluded from the monitoring. +interfacetable_include | **Optional.** Comma separated list of interfaces globally included in the monitoring. +interfacetable_aliasmatching | **Optional.** Allow you to specify alias in addition to interface names. Defaults to false. +interfacetable_excludetraffic | **Optional.** Comma separated list of interfaces excluded from traffic checks. +interfacetable_includetraffic | **Optional.** Comma separated list of interfaces included for traffic checks. +interfacetable_warningtraffic | **Optional.** Interface traffic load percentage leading to a warning alert. +interfacetable_criticaltraffic | **Optional.** Interface traffic load percentage leading to a critical alert. +interfacetable_pkt | **Optional.** Add unicast/non-unicast pkt stats for each interface. +interfacetable_trafficwithpkt | **Optional.** Enable traffic calculation using pkt counters instead of octet counters. Useful when using 32-bit counters to track the load on > 1GbE interfaces. Defaults to false. +interfacetable_trackproperty | **Optional.** List of tracked properties. +interfacetable_excludeproperty | **Optional.** Comma separated list of interfaces excluded from the property tracking. +interfacetable_includeproperty | **Optional.** Comma separated list of interfaces included in the property tracking. +interfacetable_community | **Optional.** Specifies the snmp v1/v2c community string. Defaults to "public" if using snmp v1/v2c, ignored using v3. +interfacetable_snmpv2 | **Optional.** Use snmp v2c. Defaults to false. +interfacetable_login | **Optional.** Login for snmpv3 authentication. +interfacetable_passwd | **Optional.** Auth password for snmpv3 authentication. +interfacetable_privpass | **Optional.** Priv password for snmpv3 authentication. +interfacetable_protocols | **Optional.** Authentication protocol,Priv protocol for snmpv3 authentication. +interfacetable_domain | **Optional.** SNMP transport domain. +interfacetable_contextname | **Optional.** Context name for the snmp requests. +interfacetable_port | **Optional.** SNMP port. Defaults to standard port. +interfacetable_64bits | **Optional.** Use SNMP 64-bits counters. Defaults to false. +interfacetable_maxrepetitions | **Optional.** Increasing this value may enhance snmp query performances by gathering more results at one time. +interfacetable_snmptimeout | **Optional.** Define the Transport Layer timeout for the snmp queries. +interfacetable_snmpretries | **Optional.** Define the number of times to retry sending a SNMP message. +interfacetable_snmpmaxmsgsize | **Optional.** Size of the SNMP message in octets, useful in case of too long responses. Be careful with network filters. Range 484 - 65535. Apply only to netsnmp perl bindings. The default is 1472 octets for UDP/IPv4, 1452 octets for UDP/IPv6, 1460 octets for TCP/IPv4, and 1440 octets for TCP/IPv6. +interfacetable_unixsnmp | **Optional.** Use unix snmp utilities for snmp requests. Defaults to false, which means use the perl bindings. +interfacetable_enableperfdata | **Optional.** Enable port performance data. Defaults to false. +interfacetable_perfdataformat | **Optional.** Define which performance data will be generated. Possible values are "full" (default), "loadonly", "globalonly". +interfacetable_perfdatathreshold | **Optional.** Define which thresholds are printed in the generated performance data. Possible values are "full" (default), "loadonly", "globalonly". +interfacetable_perfdatadir | **Optional.** When specified, the performance data are also written directly to a file, in the specified location. +interfacetable_perfdataservicedesc | **Optional.** Specify additional parameters for output performance data to PNP. Defaults to "$service.name$", only affects **interfacetable_perfdatadir**. +interfacetable_grapher | **Optional.** Specify the used graphing solution. Possible values are "pnp4nagios" (default), "nagiosgrapher", "netwaysgrapherv2" and "ingraph". +interfacetable_grapherurl | **Optional.** Graphing system url. Default depends on **interfacetable_grapher**. +interfacetable_portperfunit | **Optional.** Traffic could be reported in bits (counters) or in bps (calculated value). +interfacetable_nodetype | **Optional.** Specify the node type, for specific information to be printed / specific oids to be used. Possible values: "standard" (default), "cisco", "hp", "netscreen", "netapp", "bigip", "bluecoat", "brocade", "brocade-nos", "nortel", "hpux". +interfacetable_duplex | **Optional.** Add the duplex mode property for each interface in the interface table. Defaults to false. +interfacetable_stp | **Optional.** Add the stp state property for each interface in the interface table. Defaults to false. +interfacetable_vlan | **Optional.** Add the vlan attribution property for each interface in the interface table. Defaults to false. This option is available only for the following nodetypes: "cisco", "hp", "nortel" +interfacetable_noipinfo | **Optional.** Remove the ip information for each interface from the interface table. Defaults to false. +interfacetable_alias | **Optional.** Add the alias information for each interface in the interface table. Defaults to false. +interfacetable_accessmethod | **Optional.** Access method for a shortcut to the host in the HTML page. Format is : [:] Where method can be: ssh, telnet, http or https. +interfacetable_htmltablelinktarget | **Optional.** Specifies the windows or the frame where the [details] link will load the generated html page. Possible values are: "_blank", "_self" (default), "_parent", "_top", or a frame name. +interfacetable_delta | **Optional.** Set the delta used for interface throughput calculation in seconds. +interfacetable_ifs | **Optional.** Input field separator. Defaults to ",". +interfacetable_cache | **Optional.** Define the retention time of the cached data in seconds. +interfacetable_noifloadgradient | **Optional.** Disable color gradient from green over yellow to red for the load percentage. Defaults to false. +interfacetable_nohuman | **Optional.** Do not translate bandwidth usage in human readable format. Defaults to false. +interfacetable_snapshot | **Optional.** Force the plugin to run like if it was the first launch. Defaults to false. +interfacetable_timeout | **Optional.** Define the global timeout limit of the plugin in seconds. Defaults to "15s". +interfacetable_css | **Optional.** Define the css stylesheet used by the generated html files. Possible values are "classic", "icinga" or "icinga-alternate1". +interfacetable_config | **Optional.** Specify a config file to load. +interfacetable_noconfigtable | **Optional.** Disable configuration table on the generated HTML page. Defaults to false. +interfacetable_notips | **Optional.** Disable the tips in the generated html tables. Defaults to false. +interfacetable_defaulttablesorting | **Optional.** Default table sorting can be "index" (default) or "name". +interfacetable_tablesplit | **Optional.** Generate multiple interface tables, one per interface type. Defaults to false. +interfacetable_notype | **Optional.** Remove the interface type for each interface. Defaults to false. + +#### iftraffic + +The [check_iftraffic](https://exchange.icinga.com/exchange/iftraffic) plugin +checks the utilization of a given interface name using the SNMP protocol. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------- +iftraffic_address | **Required.** Specifies the remote host. Defaults to "$address$". +iftraffic_community | **Optional.** SNMP community. Defaults to "public'" if omitted. +iftraffic_version | **Optional.** SNMP version to use. Defaults to "1" if omitted. Requires v1.0.2+. +iftraffic_interface | **Required.** Queried interface name. +iftraffic_bandwidth | **Required.** Interface maximum speed in kilo/mega/giga/bits per second. +iftraffic_units | **Optional.** Interface units can be one of these values: `g` (gigabits/s),`m` (megabits/s), `k` (kilobits/s),`b` (bits/s) +iftraffic_warn | **Optional.** Percent of bandwidth usage necessary to result in warning status (defaults to `85`). +iftraffic_crit | **Optional.** Percent of bandwidth usage necessary to result in critical status (defaults to `98`). +iftraffic_max_counter | **Optional.** Maximum counter value of net devices in kilo/mega/giga/bytes. + +#### iftraffic64 + +The [check_iftraffic64](https://exchange.icinga.com/exchange/check_iftraffic64) plugin +checks the utilization of a given interface name using the SNMP protocol. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------- +iftraffic64_address | **Required.** Specifies the remote host. Defaults to "$address$". +iftraffic64_community | **Optional.** SNMP community. Defaults to "public'" if omitted. +iftraffic64_interface | **Required.** Queried interface name. +iftraffic64_bandwidth | **Required.** Interface maximum speed in kilo/mega/giga/bits per second. +iftraffic64_units | **Optional.** Interface units can be one of these values: `g` (gigabits/s),`m` (megabits/s), `k` (kilobits/s),`b` (bits/s) +iftraffic64_warn | **Optional.** Percent of bandwidth usage necessary to result in warning status (defaults to `85`). +iftraffic64_crit | **Optional.** Percent of bandwidth usage necessary to result in critical status (defaults to `98`). +iftraffic64_max_counter | **Optional.** Maximum counter value of net devices in kilo/mega/giga/bytes. + +#### interfaces + +The [check_interfaces](https://git.netways.org/plugins/check_interfaces) plugin +uses SNMP to monitor network interfaces and their utilization. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------|--------------------------------------------------------- +interfaces_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +interfaces_regex | **Optional.** Interface list regexp. +interfaces_exclude_regex | **Optional.** Interface list negative regexp. +interfaces_errors | **Optional.** Number of in errors (CRC errors for cisco) to consider a warning (default 50). +interface_out_errors | **Optional.** Number of out errors (collisions for cisco) to consider a warning (default same as in errors). +interfaces_perfdata | **Optional.** perfdata from last check result. +interfaces_prefix | **Optional.** Prefix interface names with this label. +interfaces_lastcheck | **Optional.** Last checktime (unixtime). +interfaces_bandwidth | **Optional.** Bandwidth warn level in percent. +interfaces_speed | **Optional.** Override speed detection with this value (bits per sec). +interfaces_trim | **Optional.** Cut this number of characters from the start of interface descriptions. +interfaces_mode | **Optional.** Special operating mode (default,cisco,nonbulk,bintec). +interfaces_auth_proto | **Optional.** SNMPv3 Auth Protocol (SHA\|MD5) +interfaces_auth_phrase | **Optional.** SNMPv3 Auth Phrase +interfaces_priv_proto | **Optional.** SNMPv3 Privacy Protocol (AES\|DES) +interfaces_priv_phrase | **Optional.** SNMPv3 Privacy Phrase +interfaces_user | **Optional.** SNMPv3 User +interfaces_down_is_ok | **Optional.** Disables critical alerts for down interfaces. +interfaces_aliases | **Optional.** Retrieves the interface description. +interfaces_match_aliases | **Optional.** Also match against aliases (Option --aliases automatically enabled). +interfaces_timeout | **Optional.** Sets the SNMP timeout (in ms). +interfaces_sleep | **Optional.** Sleep between every SNMP query (in ms). +interfaces_names | **Optional.** If set to true, use ifName instead of ifDescr. + +#### nwc_health + +The [check_nwc_health](https://labs.consol.de/de/nagios/check_nwc_health/index.html) plugin +uses SNMP to monitor network components. The plugin is able to generate interface statistics, +check hardware (CPU, memory, fan, power, etc.), monitor firewall policies, HRSP, load-balancer +pools, processor and memory usage. + +Currently the following network components are supported: Cisco IOS, Cisco Nexus, Cisco ASA, +Cisco PIX, F5 BIG-IP, CheckPoint Firewall1, Juniper NetScreen, HP Procurve, Nortel, Brocade 4100/4900, +EMC DS 4700, EMC DS 24, Allied Telesyn. Blue Coat SG600, Cisco Wireless Lan Controller 5500, +Brocade ICX6610-24-HPOE, Cisco UC Telefonzeugs, FOUNDRY-SN-AGENT-MIB, FRITZ!BOX 7390, FRITZ!DECT 200, +Juniper IVE, Pulse-Gateway MAG4610, Cisco IronPort AsyncOS, Foundry, etc. A complete list can be +found in the plugin [documentation](https://labs.consol.de/nagios/check_nwc_health/index.html). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|--------------------------------------------------------- +nwc_health_hostname | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +nwc_health_mode | **Optional.** The plugin mode. A list of all available modes can be found in the [plugin documentation](https://labs.consol.de/nagios/check_nwc_health/index.html). +nwc_health_timeout | **Optional.** Seconds before plugin times out (default: 15) +nwc_health_blacklist | **Optional.** Blacklist some (missing/failed) components. +nwc_health_port | **Optional.** The SNMP port to use (default: 161). +nwc_health_domain | **Optional.** The transport domain to use (default: udp/ipv4, other possible values: udp6, udp/ipv6, tcp, tcp4, tcp/ipv4, tcp6, tcp/ipv6). +nwc_health_protocol | **Optional.** The SNMP protocol to use (default: 2c, other possibilities: 1,3). +nwc_health_community | **Optional.** SNMP community of the server (SNMP v1/2 only). +nwc_health_username | **Optional.** The securityName for the USM security model (SNMPv3 only). +nwc_health_authpassword | **Optional.** The authentication password for SNMPv3. +nwc_health_authprotocol | **Optional.** The authentication protocol for SNMPv3 (md5\|sha). +nwc_health_privpassword | **Optional.** The password for authPriv security level. +nwc_health_privprotocol | **Optional.** The private protocol for SNMPv3 (des\|aes\|aes128\|3des\|3desde). +nwc_health_contextengineid | **Optional.** The context engine id for SNMPv3 (10 to 64 hex characters). +nwc_health_contextname | **Optional.** The context name for SNMPv3 (empty represents the default context). +nwc_health_community2 | **Optional.** SNMP community which can be used to switch the context during runtime. +nwc_health_name | **Optional.** The name of an interface (ifDescr). +nwc_health_drecksptkdb | **Optional.** This parameter must be used instead of --name, because Devel::ptkdb is stealing the latter from the command line. +nwc_health_alias | **Optional.** The alias name of a 64bit-interface (ifAlias) +nwc_health_regexp | **Optional.** A flag indicating that --name is a regular expression +nwc_health_ifspeedin | **Optional.** Override the ifspeed oid of an interface (only inbound) +nwc_health_ifspeedout | **Optional.** Override the ifspeed oid of an interface (only outbound) +nwc_health_ifspeed | **Optional.** Override the ifspeed oid of an interface +nwc_health_units | **Optional.** One of %, B, KB, MB, GB, Bit, KBi, MBi, GBi. (used for e.g. mode interface-usage) +nwc_health_name2 | **Optional.** The secondary name of a component. +nwc_health_name3 | **Optional.** The tertiary name of a component. +nwc_health_role | **Optional.** The role of this device in a hsrp group (active/standby/listen). +nwc_health_report | **Optional.** Can be used to shorten the output. Possible values are: 'long' (default), 'short' (to shorten if available), or 'html' (to produce some html outputs if available) +nwc_health_lookback | **Optional.** The amount of time you want to look back when calculating average rates. Use it for mode interface-errors or interface-usage. Without --lookback the time between two runs of check_nwc_health is the base for calculations. If you want your checkresult to be based for example on the past hour, use --lookback 3600. +nwc_health_warning | **Optional.** The warning threshold +nwc_health_critical | **Optional.** The critical threshold +nwc_health_warningx | **Optional.** The extended warning thresholds +nwc_health_criticalx | **Optional.** The extended critical thresholds +nwc_health_mitigation | **Optional.** The parameter allows you to change a critical error to a warning (1) or ok (0). +nwc_health_selectedperfdata | **Optional.** The parameter allows you to limit the list of performance data. It's a perl regexp. Only matching perfdata show up in the output. +nwc_health_morphperfdata | **Optional.** The parameter allows you to change performance data labels. It's a perl regexp and a substitution. --morphperfdata '(.*)ISATAP(.*)'='$1patasi$2' +nwc_health_negate | **Optional.** The parameter allows you to map exit levels, such as warning=critical. +nwc_health_mymodules-dyn-dir | **Optional.** A directory where own extensions can be found. +nwc_health_servertype | **Optional.** The type of the network device: cisco (default). Use it if auto-detection is not possible. +nwc_health_statefilesdir | **Optional.** An alternate directory where the plugin can save files. +nwc_health_oids | **Optional.** A list of oids which are downloaded and written to a cache file. Use it together with --mode oidcache. +nwc_health_offline | **Optional.** The maximum number of seconds since the last update of cache file before it is considered too old. +nwc_health_multiline | **Optional.** Multiline output + +#### printer_health + +The [check_printer_health](https://labs.consol.de/nagios/check_printer_health/index.html) plugin +uses SNMP to monitor printer. The plugin is able to generate supply statistics and check hardware. +A complete list can be found in the plugin [documentation](https://labs.consol.de/nagios/check_printer_health/index.html). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|--------------------------------------------------------- +printer_health_hostname | **Required.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +printer_health_mode | **Required.** The plugin mode. A list of all available modes can be found in the [plugin documentation](https://labs.consol.de/nagios/check_printer_health/index.html). +printer_health_timeout | **Optional.** Seconds before plugin times out (default: 15) +printer_health_blacklist | **Optional.** Blacklist some (missing/failed) components. +printer_health_port | **Optional.** The SNMP port to use (default: 161). +printer_health_domain | **Optional.** The transport domain to use (default: udp/ipv4, other possible values: udp6, udp/ipv6, tcp, tcp4, tcp/ipv4, tcp6, tcp/ipv6). +printer_health_protocol | **Optional.** The SNMP protocol to use (default: 2c, other possibilities: 1,3). +printer_health_community | **Optional.** SNMP community of the server (SNMP v1/2 only). +printer_health_username | **Optional.** The securityName for the USM security model (SNMPv3 only). +printer_health_authpassword | **Optional.** The authentication password for SNMPv3. +printer_health_authprotocol | **Optional.** The authentication protocol for SNMPv3 (md5\|sha). +printer_health_privpassword | **Optional.** The password for authPriv security level. +printer_health_privprotocol | **Optional.** The private protocol for SNMPv3 (des\|aes\|aes128\|3des\|3desde). +printer_health_contextengineid | **Optional.** The context engine id for SNMPv3 (10 to 64 hex characters). +printer_health_contextname | **Optional.** The context name for SNMPv3 (empty represents the default context). +printer_health_community2 | **Optional.** SNMP community which can be used to switch the context during runtime. +printer_health_name | **Optional.** The name of an interface (ifDescr). +printer_health_regexp | **Optional.** A flag indicating that --name is a regular expression +printer_health_ifspeedin | **Optional.** Override the ifspeed oid of an interface (only inbound) +printer_health_ifspeedout | **Optional.** Override the ifspeed oid of an interface (only outbound) +printer_health_ifspeed | **Optional.** Override the ifspeed oid of an interface +printer_health_units | **Optional.** One of %, B, KB, MB, GB, Bit, KBi, MBi, GBi. (used for e.g. mode interface-usage) +printer_health_name2 | **Optional.** The secondary name of a component. +printer_health_name3 | **Optional.** The teritary name of a component. +printer_health_role | **Optional.** The role of this device in a hsrp group (active/standby/listen). +printer_health_report | **Optional.** Can be used to shorten the output. Possible values are: 'long' (default), 'short' (to shorten if available), or 'html' (to produce some html outputs if available) +printer_health_lookback | **Optional.** The amount of time you want to look back when calculating average rates. Use it for mode interface-errors or interface-usage. Without --lookback the time between two runs of `check_printer_health` is the base for calculations. If you want your checkresult to be based for example on the past hour, use --lookback 3600. +printer_health_warning | **Optional.** The warning threshold +printer_health_critical | **Optional.** The critical threshold +printer_health_warningx | **Optional.** The extended warning thresholds +printer_health_criticalx | **Optional.** The extended critical thresholds +printer_health_mitigation | **Optional.** The parameter allows you to change a critical error to a warning (1) or ok (0). +printer_health_selectedperfdata | **Optional.** The parameter allows you to limit the list of performance data. It's a perl regexp. Only matching perfdata show up in the output. +printer_health_morphperfdata | **Optional.** The parameter allows you to change performance data labels. It's a perl regexp and a substitution. --morphperfdata '(.*)ISATAP(.*)'='$1patasi$2' +printer_health_negate | **Optional.** The parameter allows you to map exit levels, such as warning=critical. +printer_health_mymodules-dyn-dir | **Optional.** A directory where own extensions can be found. +printer_health_servertype | **Optional.** The type of the network device: cisco (default). Use it if auto-detection is not possible. +printer_health_statefilesdir | **Optional.** An alternate directory where the plugin can save files. +printer_health_oids | **Optional.** A list of oids which are downloaded and written to a cache file. Use it together with --mode oidcache. +printer_health_offline | **Optional.** The maximum number of seconds since the last update of cache file before it is considered too old. +printer_health_multiline | **Optional.** Multiline output + +#### Thola + +The [Thola](https://thola.io) plugin +is a tool for monitoring network devices, that mainly uses SNMP. + +To run these commands you need a server that is running the Thola API. +If you don't know how to do this, you can have a look at the plugin's +[documentation](https://docs.thola.io). Also, you have to +put the Thola-client binary into the `PluginContribDir`. + +##### thola-cpu-load + +Checks the CPU load of a network device. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------------------|-------------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise. +thola_device_snmp_community | **Optional.** SNMP community of the device +thola_device_snmp_protocol | **Optional.** SNMP version to use +thola_cpu_load_critical | **Optional.** Critical threshold for the CPU load in % +thola_cpu_load_warning | **Optional.** Warning threshold for the CPU load in % + +##### thola-interface-metrics + +Checks the interface metrics of a network device. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|----------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device + +##### thola-hardware-health + +Checks the hardware health of a network device. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------------|----------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device + +##### thola-identify + +Checks if a device can be identified by the given properties. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------------|-------------------------------------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device +thola_identify_model | **Optional.** Model that is compared to the actual model of the device +thola_identify_os_version | **Optional.** OS-version that is compared to the actual OS-version of the device +thola_identify_vendor | **Optional.** Vendor that is compared to the actual vendor of the device +thola_identify_serial_number | **Optional.** Serial number that is compared to the actual serial number of the device +thola_identify_discover_retries | **Optional.** The number of discover retries before aborting +thola_identify_discover_timeouts | **Optional.** The number of discover timeouts before aborting + +> **Note**: +> +> One of the variables `thola_identify_model`, `thola_identify_os_version`, +> `thola_identify_vendor` or `thola_identify_serial_number` must be set + +##### thola-memory-usage + +Checks the memory usage of a device. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------------------------|----------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device +thola_memory_usage_critical | **Optional.** Critical threshold for the memory usage in % +thola_memory_usage_warning | **Optional.** Warning threshold for the memory usage in % + +##### thola-sbc + +Checks special metrics from sbc network devices. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------------------|----------------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device +thola_sbc_system_health_score_critical | **Optional.** Critical threshold for the health score in % +thola_sbc_system_health_score_warning | **Optional.** Warning threshold for the health score in % + +##### thola-thola-server + +Checks if a Thola API is running on a given server. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------|----------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to + +##### thola-ups + +Checks whether a UPS device has its main voltage applied. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------------------|----------------------------------------------------------------- +thola_api_address | **Required.** Address of the Thola API to connect to +thola_device_address | **Required.** The host's address. Defaults to "$address$" if the host's address attribute is set, “$address6$” otherwise +thola_device_snmp_community | **Optional.** SNMP community of the device +thola_ups_batt_current_critical_max | **Optional.** High critical threshold for the battery current in Volt +thola_ups_batt_current_critical_min | **Optional.** Low critical threshold for the battery current in Volt +thola_ups_batt_current_warning_max | **Optional.** High warning threshold for the battery current in Volt +thola_ups_batt_current_warning_min | **Optional.** Low warning threshold for the battery current in Volt +thola_ups_batt_temperature_critical_max | **Optional.** High critical threshold for the battery temperature in degree celsius +thola_ups_batt_temperature_critical_min | **Optional.** Low critical threshold for the battery temperature in degree celsius +thola_ups_batt_temperature_warning_max | **Optional.** High warning threshold for the battery temperature in degree celsius +thola_ups_batt_temperature_warning_min | **Optional.** Low warning threshold for the battery temperature in degree celsius +thola_ups_current_load_critical_max | **Optional.** High critical threshold for the current load in percent +thola_ups_current_load_critical_min | **Optional.** Low critical threshold for the current load in percent +thola_ups_current_load_warning_max | **Optional.** High warning threshold for the current load in percent +thola_ups_current_load_warning_min | **Optional.** Low warning threshold for the current load in percent +thola_ups_rectifier_current_critical_max | **Optional.** High critical threshold for the current rectifier in Volt +thola_ups_rectifier_current_critical_min | **Optional.** Low critical threshold for the current rectifier in Volt +thola_ups_rectifier_current_warning_max | **Optional.** High warning threshold for the current rectifier in Volt +thola_ups_rectifier_current_warning_min | **Optional.** Low warning threshold for the current rectifier in Volt +thola_ups_system_voltage_critical_max | **Optional.** High critical threshold for the system voltage in Volt +thola_ups_system_voltage_critical_min | **Optional.** Low critical threshold for the system voltage in Volt +thola_ups_system_voltage_warning_max | **Optional.** High warning threshold for the system voltage in Volt +thola_ups_system_voltage_warning_min | **Optional.** Low warning threshold for the system voltage in Volt + +### Network Services + +This category contains plugins which receive details about network services + +#### lsyncd + +The [check_lsyncd](https://github.com/ohitz/check_lsyncd) plugin, +uses the `lsyncd` status file to monitor [lsyncd](https://axkibe.github.io/lsyncd/). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------------------------- +lsyncd_statfile | **Optional.** Set status file path (default: /var/run/lsyncd.status). +lsyncd_warning | **Optional.** Warning if more than N delays (default: 10). +lsyncd_critical | **Optional.** Critical if more then N delays (default: 100). + +#### fail2ban + +The [check_fail2ban](https://github.com/fail2ban/fail2ban/tree/master/files/nagios) plugin +uses the `fail2ban-client` binary to monitor [fail2ban](https://www.fail2ban.org) jails. + +The plugin requires `sudo` permissions. +You can add a sudoers file to allow your monitoring user to use the plugin, i.e. edit /etc/sudoers.d/icinga and add: +``` +icinga ALL=(root) NOPASSWD:/usr/lib/nagios/plugins/check_fail2ban +``` + +and set the correct permissions: +```bash +chown -c root: /etc/sudoers.d/icinga +chmod -c 0440 /etc/sudoers.d/icinga +``` + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|--------------------------------------------------------------------------- +fail2ban_display | **Optional.** To modify the output display, default is 'CHECK FAIL2BAN ACTIVITY' +fail2ban_path | **Optional.** Specify the path to the tw_cli binary, default value is /usr/bin/fail2ban-client +fail2ban_warning | **Optional.** Specify a warning threshold, default is 1 +fail2ban_critical | **Optional.** Specify a critical threshold, default is 2 +fail2ban_socket | **Optional.** Specify a socket path, default is unset +fail2ban_perfdata | **Optional.** If set to true, activate the perfdata output, default value for the plugin is set to true +fail2ban_jail | **Optional.** Specify the name of the specific jail to monitor; omitted by default, i.e. all jails are being monitored. + +### Operating System + +This category contains plugins which receive details about your operating system +or the guest system. + +#### mem + +The [check_mem.pl](https://github.com/justintime/nagios-plugins) plugin checks the +memory usage on linux and unix hosts. It is able to count cache memory as free when +compared to thresholds. More details can be found on [this blog entry](http://sysadminsjourney.com/content/2009/06/04/new-and-improved-checkmempl-nagios-plugin). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-------------|----------------------------------------------------------------------------------------------------------------------- +mem_used | **Optional.** Tell the plugin to check for used memory in opposite of **mem_free**. Must specify one of these as true. +mem_free | **Optional.** Tell the plugin to check for free memory in opposite of **mem_used**. Must specify one of these as true. +mem_cache | **Optional.** If set to true, plugin will count cache as free memory. Defaults to false. +mem_warning | **Required.** Specify the warning threshold as number interpreted as percent. +mem_critical | **Required.** Specify the critical threshold as number interpreted as percent. + +#### sar-perf + +The [check_sar_perf.py](https://github.com/dnsmichi/check-sar-perf) +plugin collects performance metrics from Linux hosts using the `sar` binary available in the `sysstat` package. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------|----------------------------------------------------------------------------------------------------------------------- +sar_perf_profile | **Required.** Define the run profile: `pagestat`, `cpu`, `memory_util`, `memory_stat`, `io_transfer`, `queueln_load`, `swap_util`, `swap_stat`, `task`, `kernel`, `disk `. Can be a string or an array of multiple profiles. +sar_perf_disk | **Optional.** Disk name for the 'disk' profile. + + +#### running_kernel + +The [check_running_kernel](https://packages.debian.org/stretch/nagios-plugins-contrib) plugin +is provided by the `nagios-plugin-contrib` package on Debian/Ubuntu. + +Custom variables: + +Name | Description +---------------------------|------------- +running\_kernel\_use\_sudo | Whether to run the plugin with `sudo`. Defaults to false except on Ubuntu where it defaults to true. + +#### iostats + +The [check_iostats](https://github.com/dnsmichi/icinga-plugins/blob/master/scripts/check_iostats) plugin +uses the `iostat` binary to monitor I/O on a Linux host. The default thresholds are rather high +so you can use a grapher for baselining before setting your own. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------|----------------------------------------------------------------------------------------------------------------------- +iostats\_disk | **Required.** The device to monitor without path. e.g. sda or vda. (default: sda). +iostats\_warning\_tps | **Required.** Warning threshold for tps (default: 3000). +iostats\_warning\_read | **Required.** Warning threshold for KB/s reads (default: 50000). +iostats\_warning\_write | **Required.** Warning threshold for KB/s writes (default: 10000). +iostats\_warning\_wait | **Required.** Warning threshold for % iowait (default: 50). +iostats\_critical\_tps | **Required.** Critical threshold for tps (default: 5000). +iostats\_critical\_read | **Required.** Critical threshold for KB/s reads (default: 80000). +iostats\_critical\_write | **Required.** Critical threshold for KB/s writes (default: 25000). +iostats\_critical\_wait | **Required.** Critical threshold for % iowait (default: 80). + +#### iostat + +The [check_iostat](https://github.com/dnsmichi/icinga-plugins/blob/master/scripts/check_iostat) plugin +uses the `iostat` binary to monitor disk I/O on a Linux host. The default thresholds are rather high +so you can use a grapher for baselining before setting your own. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------|----------------------------------------------------------------------------------------------------------------------- +iostat\_disk | **Required.** The device to monitor without path. e.g. sda or vda. (default: sda). +iostat\_wtps | **Required.** Warning threshold for tps (default: 100). +iostat\_wread | **Required.** Warning threshold for KB/s reads (default: 100). +iostat\_wwrite | **Required.** Warning threshold for KB/s writes (default: 100). +iostat\_ctps | **Required.** Critical threshold for tps (default: 200). +iostat\_cread | **Required.** Critical threshold for KB/s reads (default: 200). +iostat\_cwrite | **Required.** Critical threshold for KB/s writes (default: 200). + +#### systemd + +The [check_systemd.py](https://github.com/Josef-Friedrich/check_systemd) plugin +will report a degraded system to your monitoring solution. It requires only the [nagiosplugin](https://nagiosplugin.readthedocs.io/en/stable) library. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------- +systemd\_unit | **Optional.** Name of the systemd unit that is being tested. +systemd\_exclude\_unit | **Optional.** Exclude a systemd unit from the checks. This option can be applied multiple times. Also supports regular expressions. +systemd\_no\_startup\_time | **Optional.** Don’t check the startup time. Using this option the options `systemd_warning` and `systemd_critical` have no effect. (Default: `false`) +systemd\_warning | **Optional.** Startup time in seconds to result in a warning status. (Default: `60s`) +systemd\_critical | **Optional.** Startup time in seconds to result in a critical status. (Default: `120s`) +systemd\_dead\_timers | **Optional.** Detect dead / inactive timers. (Default: `false`) +systemd\_dead\_timers\_warning | **Optional.** Time ago in seconds for dead / inactive timers to trigger a warning state (by default 6 days). +systemd\_dead\_timers\_critical | **Optional.** Time ago in seconds for dead / inactive timers to trigger a critical state (by default 7 days). +systemd\_verbose\_level | **Optional.** Increase verbosity level (Accepted values: `1`, `2` or `3`). (Defaults to none) + +#### yum + +The [check_yum](https://github.com/calestyo/check_yum) plugin checks the YUM package +management system for package updates. +The plugin requires the `yum-plugin-security` package to differentiate between security and normal updates. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +yum_all_updates | **Optional.** Set to true to not distinguish between security and non-security updates, but returns critical for any available update. This may be used if the YUM security plugin is absent or you want to maintain every single package at the latest version. You may want to use **yum_warn_on_any_update** instead of this option. Defaults to false. +yum_warn_on_any_update | **Optional.** Set to true to warn if there are any (non-security) package updates available. Defaults to false. +yum_cache_only | **Optional.** If set to true, plugin runs entirely from cache and does not update the cache when running YUM. Useful if you have `yum makecache` cronned. Defaults to false. +yum_no_warn_on_lock | **Optional.** If set to true, returns OK instead of WARNING when YUM is locked and fails to check for updates due to another instance running. Defaults to false. +yum_no_warn_on_updates | **Optional.** If set to true, returns OK instead of WARNING even when updates are available. The plugin output still shows the number of available updates. Defaults to false. +yum_enablerepo | **Optional.** Explicitly enables a repository when calling YUM. Can take a comma separated list of repositories. Note that enabling repositories can lead to unexpected results, for example when protected repositories are enabled. +yum_disablerepo | **Optional.** Explicitly disables a repository when calling YUM. Can take a comma separated list of repositories. Note that enabling repositories can lead to unexpected results, for example when protected repositories are enabled. +yum_installroot | **Optional.** Specifies another installation root directory (for example a chroot). +yum_timeout | **Optional.** Set a timeout in seconds after which the plugin will exit (defaults to 55 seconds). + +### Storage + +This category includes all plugins for various storage and object storage technologies. + +#### glusterfs + +The [glusterfs](https://www.unixadm.org/software/nagios-stuff/checks/check_glusterfs) plugin +is used to check the GlusterFS storage health on the server. +The plugin requires `sudo` permissions. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +---------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +glusterfs_perfdata | **Optional.** Print perfdata of all or the specified volume. +glusterfs_warnonfailedheal | **Optional.** Warn if the *heal-failed* log contains entries. The log can be cleared by restarting glusterd. +glusterfs_volume | **Optional.** Only check the specified *VOLUME*. If --volume is not set, all volumes are checked. +glusterfs_disk_warning | **Optional.** Warn if disk usage is above *DISKWARN*. Defaults to 90 (percent). +glusterfs_disk_critical | **Optional.** Return a critical error if disk usage is above *DISKCRIT*. Defaults to 95 (percent). +glusterfs_inode_warning | **Optional.** Warn if inode usage is above *DISKWARN*. Defaults to 90 (percent). +glusterfs_inode_critical | **Optional.** Return a critical error if inode usage is above *DISKCRIT*. Defaults to 95 (percent). + +#### ceph + +The [ceph plugin](https://github.com/ceph/ceph-nagios-plugins) +is used to check the Ceph storage health on the server. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------|--------------------------------------------------------- +ceph_exec_dir | **Optional.** Ceph executable. Default /usr/bin/ceph. +ceph_conf_file | **Optional.** Alternative ceph conf file. +ceph_mon_address | **Optional.** Ceph monitor address[:port]. +ceph_client_id | **Optional.** Ceph client id. +ceph_client_name | **Optional.** Ceph client name. +ceph_client_key | **Optional.** Ceph client keyring file. +ceph_whitelist | **Optional.** Whitelist regexp for ceph health warnings. +ceph_details | **Optional.** Run 'ceph health detail'. + +#### btrfs + +The [btrfs plugin](https://github.com/knorrie/python-btrfs/) +is used to check the btrfs storage health on the server. + +The plugin requires `sudo` permissions. +You can add a sudoers file to allow your monitoring user to use the plugin, i.e. edit /etc/sudoers.d/icinga and add: +``` +icinga ALL=(root) NOPASSWD:/usr/lib/nagios/plugins/check_btrfs +``` + +and set the correct permissions: +```bash +chown -c root: /etc/sudoers.d/icinga +chmod -c 0440 /etc/sudoers.d/icinga +``` + +[monitoring-plugins-btrfs](https://packages.debian.org/monitoring-plugins-btrfs) provide the necessary binary on debian/ubuntu. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------|--------------------------------------------------------- +btrfs_awg | **Optional.** Exit with WARNING status if less than the specified amount of disk space (in GiB) is unallocated +btrfs_acg | **Optional.** Exit with CRITICAL status if less than the specified amount of disk space (in GiB) is unallocated +btrfs_awp | **Optional.** Exit with WARNING status if more than the specified percent of disk space is allocated +btrfs_acp | **Optional.** Exit with CRITICAL status if more than the specified percent of disk space is allocated +btrfs_mountpoint | **Required.** Path to the BTRFS mountpoint + +### Virtualization + +This category includes all plugins for various virtualization technologies. + +#### esxi_hardware + +The [check_esxi_hardware.py](https://www.claudiokuenzler.com/monitoring-plugins/check_esxi_hardware.php) plugin +uses the [pywbem](https://pywbem.github.io/pywbem/) Python library to monitor the hardware of ESXi servers +through the [VMWare API](https://www.vmware.com/support/pubs/sdk_pubs.html) and CIM service. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +esxi_hardware_host | **Required.** Specifies the host to monitor. Defaults to "$address$". +esxi_hardware_user | **Required.** Specifies the user for polling. Must be a local user of the root group on the system. Can also be provided as a file path file:/path/to/.passwdfile, then first string of file is used. +esxi_hardware_pass | **Required.** Password of the user. Can also be provided as a file path file:/path/to/.passwdfile, then second string of file is used. +esxi_hardware_port | **Optional.** Specifies the CIM port to connect to. Defaults to 5989. +esxi_hardware_sslproto | **Optional.** Specifies the SSL/TLS protocol to use. Defaults to local openssl config. +esxi_hardware_vendor | **Optional.** Defines the vendor of the server: "auto", "dell", "hp", "ibm", "intel", "unknown" (default). +esxi_hardware_html | **Optional.** Add web-links to hardware manuals for Dell servers (use your country extension). Only useful with **esxi_hardware_vendor** = dell. +esxi_hardware_ignore | **Optional.** Comma separated list of CIM elements to ignore. +esxi_hardware_regex | **Optional.** Allow regular expression lookups of elements in ignore list. Defaults to false. +esxi_hardware_perfdata | **Optional.** Add performcedata for graphers like PNP4Nagios to the output. Defaults to false. +esxi_hardware_nopower | **Optional.** Do not collect power performance data, when **esxi_hardware_perfdata** is set to true. Defaults to false. +esxi_hardware_novolts | **Optional.** Do not collect voltage performance data, when **esxi_hardware_perfdata** is set to true. Defaults to false. +esxi_hardware_nocurrent | **Optional.** Do not collect current performance data, when **esxi_hardware_perfdata** is set to true. Defaults to false. +esxi_hardware_notemp | **Optional.** Do not collect temperature performance data, when **esxi_hardware_perfdata** is set to true. Defaults to false. +esxi_hardware_nofan | **Optional.** Do not collect fan performance data, when **esxi_hardware_perfdata** is set to true. Defaults to false. +esxi_hardware_nolcd | **Optional.** Do not collect lcd/display status data. Defaults to false. + +#### VMware + +Check commands for the [check_vmware_esx](https://github.com/BaldMansMojo/check_vmware_esx) plugin. + +**vmware-esx-dc-volumes** + +Check command object for the `check_vmware_esx` plugin. Shows all datastore volumes info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_subselect | **Optional.** Volume name to be checked the free space. +vmware_gigabyte | **Optional.** Output in GB instead of MB. +vmware_usedspace | **Optional.** Output used space instead of free. Defaults to "false". +vmware_alertonly | **Optional.** List only alerting volumes. Defaults to "false". +vmware_exclude | **Optional.** Blacklist volumes name. No value defined as default. +vmware_include | **Optional.** Whitelist volumes name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_dc_volume_used | **Optional.** Output used space instead of free. Defaults to "true". +vmware_warn | **Optional.** The warning threshold for volumes. Defaults to "80%". +vmware_crit | **Optional.** The critical threshold for volumes. Defaults to "90%". + + +**vmware-esx-dc-runtime-info** + +Check command object for the `check_vmware_esx` plugin. Shows all runtime info for the datacenter/Vcenter. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-dc-runtime-listvms** + +Check command object for the `check_vmware_esx` plugin. List of vmware machines and their power state. BEWARE!! In larger environments systems can cause trouble displaying the informations needed due to the mass of data. Use **vmware_alertonly** to avoid this. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_alertonly | **Optional.** List only alerting VMs. Important here to avoid masses of data. +vmware_exclude | **Optional.** Blacklist VMs name. No value defined as default. +vmware_include | **Optional.** Whitelist VMs name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-dc-runtime-listhost** + +Check command object for the `check_vmware_esx` plugin. List of VMware ESX hosts and their power state. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_alertonly | **Optional.** List only alerting hosts. Important here to avoid masses of data. +vmware_exclude | **Optional.** Blacklist VMware ESX hosts. No value defined as default. +vmware_include | **Optional.** Whitelist VMware ESX hosts. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-dc-runtime-listcluster** + +Check command object for the `check_vmware_esx` plugin. List of VMware clusters and their states. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_alertonly | **Optional.** List only alerting hosts. Important here to avoid masses of data. +vmware_exclude | **Optional.** Blacklist VMware cluster. No value defined as default. +vmware_include | **Optional.** Whitelist VMware cluster. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-dc-runtime-issues** + +Check command object for the `check_vmware_esx` plugin. All issues for the host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist issues. No value defined as default. +vmware_include | **Optional.** Whitelist issues. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-dc-runtime-status** + +Check command object for the `check_vmware_esx` plugin. Overall object status (gray/green/red/yellow). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-dc-runtime-tools** + +Check command object for the `check_vmware_esx` plugin. Vmware Tools status. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Required.** Datacenter/vCenter hostname. +vmware_cluster | **Optional.** ESX or ESXi clustername. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_poweredonly | **Optional.** List only VMs which are powered on. No value defined as default. +vmware_alertonly | **Optional.** List only alerting VMs. Important here to avoid masses of data. +vmware_exclude | **Optional.** Blacklist VMs. No value defined as default. +vmware_include | **Optional.** Whitelist VMs. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. +vmware_openvmtools | **Optional** Prevent CRITICAL state for installed and running Open VM Tools. + + +**vmware-esx-soap-host-check** + +Check command object for the `check_vmware_esx` plugin. Simple check to verify a successful connection to VMware SOAP API. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-uptime** + +Check command object for the `check_vmware_esx` plugin. Displays uptime of the VMware host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-cpu** + +Check command object for the `check_vmware_esx` plugin. CPU usage in percentage. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. Defaults to "80%". +vmware_crit | **Optional.** The critical threshold in percent. Defaults to "90%". + + +**vmware-esx-soap-host-cpu-ready** + +Check command object for the `check_vmware_esx` plugin. Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. CPU ready time is dependent on the number of virtual machines on the host and their CPU loads. High or growing ready time can be a hint CPU bottlenecks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-cpu-wait** + +Check command object for the `check_vmware_esx` plugin. CPU time spent in wait state. The wait total includes time spent the CPU idle, CPU swap wait, and CPU I/O wait states. High or growing wait time can be a hint I/O bottlenecks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-cpu-usage** + +Check command object for the `check_vmware_esx` plugin. Actively used CPU of the host, as a percentage of the total available CPU. Active CPU is approximately equal to the ratio of the used CPU to the available CPU. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. Defaults to "80%". +vmware_crit | **Optional.** The critical threshold in percent. Defaults to "90%". + + +**vmware-esx-soap-host-mem** + +Check command object for the `check_vmware_esx` plugin. All mem info(except overall and no thresholds). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-mem-usage** + +Check command object for the `check_vmware_esx` plugin. Average mem usage in percentage. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. Defaults to "80%". +vmware_crit | **Optional.** The critical threshold in percent. Defaults to "90%". + + +**vmware-esx-soap-host-mem-consumed** + +Check command object for the `check_vmware_esx` plugin. Amount of machine memory used on the host. Consumed memory includes Includes memory used by the Service Console, the VMkernel vSphere services, plus the total consumed metrics for all running virtual machines in MB. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. No value defined as default. +vmware_crit | **Optional.** The critical threshold in percent. No value defined as default. + + +**vmware-esx-soap-host-mem-swapused** + +Check command object for the `check_vmware_esx` plugin. Amount of memory that is used by swap. Sum of memory swapped of all powered on VMs and vSphere services on the host in MB. In case of an error all VMs with their swap used will be displayed. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. No value defined as default. +vmware_crit | **Optional.** The critical threshold in percent. No value defined as default. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-mem-overhead** + +Check command object for the `check_vmware_esx` plugin. Additional mem used by VM Server in MB. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Auhentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. No value defined as default. +vmware_crit | **Optional.** The critical threshold in percent. No value defined as default. + + +**vmware-esx-soap-host-mem-memctl** + +Check command object for the `check_vmware_esx` plugin. The sum of all vmmemctl values in MB for all powered-on virtual machines, plus vSphere services on the host. If the balloon target value is greater than the balloon value, the VMkernel inflates the balloon, causing more virtual machine memory to be reclaimed. If the balloon target value is less than the balloon value, the VMkernel deflates the balloon, which allows the virtual machine to consume additional memory if needed (used by VM memory control driver). In case of an error all VMs with their vmmemctl values will be displayed. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in percent. No value defined as default. +vmware_crit | **Optional.** The critical threshold in percent. No value defined as default. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-net** + +Check command object for the `check_vmware_esx` plugin. Shows net info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist NICs. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist expression as regexp. + + +**vmware-esx-soap-host-net-usage** + +Check command object for the `check_vmware_esx` plugin. Overall network usage in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in KBps(Kilobytes per Second). No value defined as default. +vmware_crit | **Optional.** The critical threshold in KBps(Kilobytes per Second). No value defined as default. + + +**vmware-esx-soap-host-net-receive** + +Check command object for the `check_vmware_esx` plugin. Data receive in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in KBps(Kilobytes per Second). No value defined as default. +vmware_crit | **Optional.** The critical threshold in KBps(Kilobytes per Second). No value defined as default. + + +**vmware-esx-soap-host-net-send** + +Check command object for the `check_vmware_esx` plugin. Data send in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold in KBps(Kilobytes per Second). No value defined as default. +vmware_crit | **Optional.** The critical threshold in KBps(Kilobytes per Second). No value defined as default. + + +**vmware-esx-soap-host-net-nic** + +Check command object for the `check_vmware_esx` plugin. Check all active NICs. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist NICs. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist expression as regexp. + + +**vmware-esx-soap-host-volumes** + +Check command object for the `check_vmware_esx` plugin. Shows all datastore volumes info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_subselect | **Optional.** Volume name to be checked the free space. +vmware_gigabyte | **Optional.** Output in GB instead of MB. +vmware_usedspace | **Optional.** Output used space instead of free. Defaults to "false". +vmware_alertonly | **Optional.** List only alerting volumes. Defaults to "false". +vmware_exclude | **Optional.** Blacklist volumes name. No value defined as default. +vmware_include | **Optional.** Whitelist volumes name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_warn | **Optional.** The warning threshold for volumes. Defaults to "80%". +vmware_crit | **Optional.** The critical threshold for volumes. Defaults to "90%". +vmware_spaceleft | **Optional.** This has to be used in conjunction with thresholds as mentioned above. + + +**vmware-esx-soap-host-io** + +Check command object for the `check_vmware_esx` plugin. Shows all disk io info. Without subselect no thresholds can be given. All I/O values are aggregated from historical intervals over the past 24 hours with a 5 minute sample rate. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-io-aborted** + +Check command object for the `check_vmware_esx` plugin. Number of aborted SCSI commands. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-resets** + +Check command object for the `check_vmware_esx` plugin. Number of SCSI bus resets. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-read** + +Check command object for the `check_vmware_esx` plugin. Average number of kilobytes read from the disk each second. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-read-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) to process a SCSI read command issued from the Guest OS to the virtual machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-write** + +Check command object for the `check_vmware_esx` plugin. Average number of kilobytes written to disk each second. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-write-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) taken to process a SCSI write command issued by the Guest OS to the virtual machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-usage** + +Check command object for the `check_vmware_esx` plugin. Aggregated disk I/O rate. For hosts, this metric includes the rates for all virtual machines running on the host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-kernel-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) spent by VMkernel processing each SCSI command. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-device-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) to complete a SCSI command from the physical device. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-queue-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) spent in the VMkernel queue. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-io-total-latency** + +Check command object for the `check_vmware_esx` plugin. Average amount of time (ms) taken during the collection interval to process a SCSI command issued by the guest OS to the virtual machine. The sum of kernelWriteLatency and deviceWriteLatency. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-host-media** + +Check command object for the `check_vmware_esx` plugin. List vm's with attached host mounted media like cd,dvd or floppy drives. This is important for monitoring because a virtual machine with a mount cd or dvd drive can not be moved to another host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist VMs name. No value defined as default. +vmware_include | **Optional.** Whitelist VMs name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-service** + +Check command object for the `check_vmware_esx` plugin. Shows host service info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist services name. No value defined as default. +vmware_include | **Optional.** Whitelist services name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-runtime** + +Check command object for the `check_vmware_esx` plugin. Shows runtime info: VMs, overall status, connection state, health, storagehealth, temperature and sensor are represented as one value and without thresholds. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist VMs name. No value defined as default. +vmware_include | **Optional.** Whitelist VMs name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. + + +**vmware-esx-soap-host-runtime-con** + +Check command object for the `check_vmware_esx` plugin. Shows connection state. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-runtime-listvms** + +Check command object for the `check_vmware_esx` plugin. List of VMware machines and their status. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist VMs name. No value defined as default. +vmware_include | **Optional.** Whitelist VMs name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-runtime-status** + +Check command object for the `check_vmware_esx` plugin. Overall object status (gray/green/red/yellow). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-host-runtime-health** + +Check command object for the `check_vmware_esx` plugin. Checks cpu/storage/memory/sensor status. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist status name. No value defined as default. +vmware_include | **Optional.** Whitelist status name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. + + +**vmware-esx-soap-host-runtime-health-listsensors** + +Check command object for the `check_vmware_esx` plugin. List all available sensors(use for listing purpose only). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist status name. No value defined as default. +vmware_include | **Optional.** Whitelist status name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. + + +**vmware-esx-soap-host-runtime-health-nostoragestatus** + +Check command object for the `check_vmware_esx` plugin. This is to avoid a double alarm if you use **vmware-esx-soap-host-runtime-health** and **vmware-esx-soap-host-runtime-storagehealth**. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist status name. No value defined as default. +vmware_include | **Optional.** Whitelist status name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. + + +**vmware-esx-soap-host-runtime-storagehealth** + +Check command object for the `check_vmware_esx` plugin. Local storage status check. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist storage name. No value defined as default. +vmware_include | **Optional.** Whitelist storage name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-runtime-temp** + +Check command object for the `check_vmware_esx` plugin. Lists all temperature sensors. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist sensor name. No value defined as default. +vmware_include | **Optional.** Whitelist sensor name. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-runtime-issues** + +Check command object for the `check_vmware_esx` plugin. Lists all configuration issues for the host. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist configuration issues. No value defined as default. +vmware_include | **Optional.** Whitelist configuration issues. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-storage** + +Check command object for the `check_vmware_esx` plugin. Shows Host storage info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist adapters, luns and paths. No value defined as default. +vmware_include | **Optional.** Whitelist adapters, luns and paths. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. + + +**vmware-esx-soap-host-storage-adapter** + +Check command object for the `check_vmware_esx` plugin. List host bus adapters. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist adapters. No value defined as default. +vmware_include | **Optional.** Whitelist adapters. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-storage-lun** + +Check command object for the `check_vmware_esx` plugin. List SCSI logical units. The listing will include: LUN, canonical name of the disc, all of displayed name which is not part of the canonical name and status. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_exclude | **Optional.** Blacklist luns. No value defined as default. +vmware_include | **Optional.** Whitelist luns. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +**vmware-esx-soap-host-storage-path** + +Check command object for the `check_vmware_esx` plugin. List multipaths and the associated paths. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_host | **Required.** ESX or ESXi hostname. +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. In case the check is done through a Datacenter/vCenter host. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_alertonly | **Optional.** List only alerting units. Important here to avoid masses of data. Defaults to "false". +vmware_exclude | **Optional.** Blacklist paths. No value defined as default. +vmware_include | **Optional.** Whitelist paths. No value defined as default. +vmware_isregexp | **Optional.** Treat blacklist and whitelist expressions as regexp. +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. +vmware_standbyok | **Optional.** For storage systems where a standby multipath is ok and not a warning. Defaults to false. + + +**vmware-esx-soap-vm-cpu** + +Check command object for the `check_vmware_esx` plugin. Shows all CPU usage info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + + +**vmware-esx-soap-vm-cpu-ready** + +Check command object for the `check_vmware_esx` plugin. Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-cpu-wait** + +Check command object for the `check_vmware_esx` plugin. CPU time spent in wait state. The wait total includes time spent the CPU idle, CPU swap wait, and CPU I/O wait states. High or growing wait time can be a hint I/O bottlenecks. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-cpu-usage** + +Check command object for the `check_vmware_esx` plugin. Amount of actively used virtual CPU, as a percentage of total available CPU. This is the host's view of the CPU usage, not the guest operating system view. It is the average CPU utilization over all available virtual CPUs in the virtual machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** Warning threshold in percent. Defaults to "80%". +vmware_crit | **Optional.** Critical threshold in percent. Defaults to "90%". + + +**vmware-esx-soap-vm-mem** + +Check command object for the `check_vmware_esx` plugin. Shows all memory info, except overall. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-mem-usage** + +Check command object for the `check_vmware_esx` plugin. Average mem usage in percentage of configured virtual machine "physical" memory. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** Warning threshold in percent. Defaults to "80%". +vmware_crit | **Optional.** Critical threshold in percent. Defaults to "90%". + + +**vmware-esx-soap-vm-mem-consumed** + +Check command object for the `check_vmware_esx` plugin. Amount of guest physical memory in MB consumed by the virtual machine for guest memory. Consumed memory does not include overhead memory. It includes shared memory and memory that might be reserved, but not actually used. Use this metric for charge-back purposes.
+**vm consumed memory = memory granted -- memory saved** + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-mem-memctl** + +Check command object for the `check_vmware_esx` plugin. Amount of guest physical memory that is currently reclaimed from the virtual machine through ballooning. This is the amount of guest physical memory that has been allocated and pinned by the balloon driver. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + + +**vmware-esx-soap-vm-net** + +Check command object for the `check_vmware_esx` plugin. Shows net info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-net-usage** + +Check command object for the `check_vmware_esx` plugin. Overall network usage in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-net-receive** + +Check command object for the `check_vmware_esx` plugin. Receive in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-net-send** + +Check command object for the `check_vmware_esx` plugin. Send in KBps(Kilobytes per Second). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-io** + +Check command object for the `check_vmware_esx` plugin. Shows all disk io info. Without subselect no thresholds can be given. All I/O values are aggregated from historical intervals over the past 24 hours with a 5 minute sample rate. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-io-read** + +Check command object for the `check_vmware_esx` plugin. Average number of kilobytes read from the disk each second. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session - IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-io-write** + +Check command object for the `check_vmware_esx` plugin. Average number of kilobytes written to disk each second. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-io-usage** + +Check command object for the `check_vmware_esx` plugin. Aggregated disk I/O rate. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-runtime** + +Check command object for the `check_vmware_esx` plugin. Shows virtual machine runtime info. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-runtime-con** + +Check command object for the `check_vmware_esx` plugin. Shows the connection state. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-runtime-powerstate** + +Check command object for the `check_vmware_esx` plugin. Shows virtual machine power state: poweredOn, poweredOff or suspended. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-runtime-status** + +Check command object for the `check_vmware_esx` plugin. Overall object status (gray/green/red/yellow). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + + +**vmware-esx-soap-vm-runtime-consoleconnections** + +Check command object for the `check_vmware_esx` plugin. Console connections to virtual machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_warn | **Optional.** The warning threshold. No value defined as default. +vmware_crit | **Optional.** The critical threshold. No value defined as default. + + +**vmware-esx-soap-vm-runtime-gueststate** + +Check command object for the `check_vmware_esx` plugin. Guest OS status. Needs VMware Tools installed and running. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd + +**vmware-esx-soap-vm-runtime-tools** + +Check command object for the `check_vmware_esx` plugin. Guest OS status. VMware tools status. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_openvmtools | **Optional** Prevent CRITICAL state for installed and running Open VM Tools. + + +**vmware-esx-soap-vm-runtime-issues** + +Check command object for the `check_vmware_esx` plugin. All issues for the virtual machine. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +vmware_datacenter | **Optional.** Datacenter/vCenter hostname. Conflicts with **vmware_host**. +vmware_host | **Optional.** ESX or ESXi hostname. Conflicts with **vmware_datacenter**. +vmware_vmname | **Required.** Virtual machine name. +vmware_sslport | **Optional.** SSL port connection. Defaults to "443". +vmware_ignoreunknown | **Optional.** Sometimes 3 (unknown) is returned from a component. But the check itself is ok. With this option the plugin will return OK (0) instead of UNKNOWN (3). Defaults to "false". +vmware_ignorewarning | **Optional.** Sometimes 2 (warning) is returned from a component. But the check itself is ok (from an operator view). With this option the plugin will return OK (0) instead of WARNING (1). Defaults to "false". +vmware_timeout | **Optional.** Seconds before plugin times out. Defaults to "90". +vmware_trace | **Optional.** Set verbosity level of vSphere API request/respond trace. +vmware_sessionfile | **Optional.** Session file name enhancement. +vmware_sessionfiledir | **Optional.** Path to store the **vmware_sessionfile** file. Defaults to "/var/spool/icinga2/tmp". +vmware_nosession | **Optional.** No auth session -- IT SHOULD BE USED FOR TESTING PURPOSES ONLY!. Defaults to "false". +vmware_username | **Optional.** The username to connect to Host or vCenter server. No value defined as default. +vmware_password | **Optional.** The username's password. No value defined as default. +vmware_authfile | **Optional.** Use auth file instead username/password to session connect. No effect if **vmware_username** and **vmware_password** are defined
**Authentication file content:**
username=vmuser
password=p@ssw0rd +vmware_multiline | **Optional.** Multiline output in overview. This mean technically that a multiline output uses a HTML **\** for the GUI. No value defined as default. + + +### Web + +This category includes all plugins for web-based checks. + +#### apache-status + +The [check_apache_status.pl](https://github.com/lbetz/check_apache_status) plugin +uses the [/server-status](https://httpd.apache.org/docs/current/mod/mod_status.html) +HTTP endpoint to monitor status metrics for the Apache webserver. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|---------------------------------------------------------------------------------- +apache_status_address | **Optional.** Host address. Defaults to "$address$" if the host's `address` attribute is set, `address6` otherwise. +apache_status_port | **Optional.** HTTP port. +apache_status_uri | **Optional.** URL to use, instead of the default (http://`apache_status_address`/server-status). +apache_status_ssl | **Optional.** Set to use SSL connection. +apache_status_no_validate | **Optional.** Skip SSL certificate validation. +apache_status_username | **Optional.** Username for basic auth. +apache_status_password | **Optional.** Password for basic auth. +apache_status_timeout | **Optional.** Timeout in seconds. +apache_status_unreachable | **Optional.** Return CRITICAL if socket timed out or http code >= 500. +apache_status_warning | **Optional.** Warning threshold (number of open slots, busy workers and idle workers that will cause a WARNING) like ':20,50,:50'. +apache_status_critical | **Optional.** Critical threshold (number of open slots, busy workers and idle workers that will cause a CRITICAL) like ':10,25,:20'. + + +#### ssl_cert + +The [check_ssl_cert](https://github.com/matteocorti/check_ssl_cert) plugin +uses the openssl binary (and optional curl) to check a X.509 certificate. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------|-------------- +ssl_cert_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +ssl_cert_port | **Optional.** TCP port number (default: 443). +ssl_cert_file | **Optional.** Local file path. Works only if `ssl_cert_address` is set to "localhost". +ssl_cert_warn | **Optional.** Minimum number of days a certificate has to be valid. +ssl_cert_critical | **Optional.** Minimum number of days a certificate has to be valid to issue a critical status. +ssl_cert_cn | **Optional.** Pattern to match the CN of the certificate. +ssl_cert_altnames | **Optional.** Matches the pattern specified in -n with alternate +ssl_cert_issuer | **Optional.** Pattern to match the issuer of the certificate. +ssl_cert_org | **Optional.** Pattern to match the organization of the certificate. +ssl_cert_email | **Optional.** Pattern to match the email address contained in the certificate. +ssl_cert_serial | **Optional.** Pattern to match the serial number. +ssl_cert_noauth | **Optional.** Ignore authority warnings (expiration only) +ssl_cert_match_host | **Optional.** Match CN with the host name. +ssl_cert_selfsigned | **Optional.** Allow self-signed certificate. +ssl_cert_sni | **Optional.** Sets the TLS SNI (Server Name Indication) extension. +ssl_cert_timeout | **Optional.** Seconds before connection times out (default: 15) +ssl_cert_protocol | **Optional.** Use the specific protocol {http,smtp,pop3,imap,ftp,xmpp,irc,ldap} (default: http). +ssl_cert_clientcert | **Optional.** Use client certificate to authenticate. +ssl_cert_clientpass | **Optional.** Set passphrase for client certificate. +ssl_cert_ssllabs | **Optional.** SSL Labs assessment +ssl_cert_ssllabs_nocache | **Optional.** Forces a new check by SSL Labs +ssl_cert_rootcert | **Optional.** Root certificate or directory to be used for certificate validation. +ssl_cert_ignore_signature | **Optional.** Do not check if the certificate was signed with SHA1 od MD5. +ssl_cert_ssl_version | **Optional.** Force specific SSL version out of {ssl2,ssl3,tls1,tls1_1,tls1_2}. +ssl_cert_disable_ssl_versions | **Optional.** Disable specific SSL versions out of {ssl2,ssl3,tls1,tls1_1,tls1_2}. Multiple versions can be given as array. +ssl_cert_cipher | **Optional.** Cipher selection: force {ecdsa,rsa} authentication. +ssl_cert_ignore_expiration | **Optional.** Ignore expiration date. +ssl_cert_ignore_ocsp | **Optional.** Do not check revocation with OCSP. +ssl_cert_ignore_sct | **Optional.** Do not check for signed certificate timestamps. + + +#### jmx4perl + +The [check_jmx4perl](https://metacpan.org/pod/distribution/jmx4perl/scripts/check_jmx4perl) plugin +uses the HTTP API exposed by the [Jolokia](https://jolokia.org) +web application and queries Java message beans on an application server. It is +part of the `JMX::Jmx4Perl` Perl module which includes detailed +[documentation](https://metacpan.org/pod/distribution/jmx4perl/scripts/check_jmx4perl). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +-----------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------- +jmx4perl_url | **Required.** URL to agent web application. Defaults to "http://$address$:8080/jolokia". +jmx4perl_product | **Optional.** Name of app server product (e.g. jboss), by default is uses an auto detection facility. +jmx4perl_alias | **Optional.** Alias name for attribute (e.g. MEMORY_HEAP_USED). All available aliases can be viewed by executing `jmx4perl aliases` on the command line. +jmx4perl_mbean | **Optional.** MBean name (e.g. java.lang:type=Memory). +jmx4perl_attribute | **Optional.** Attribute name (e.g. HeapMemoryUsage). +jmx4perl_operation | **Optional.** Operation to execute. +jmx4perl_value | **Optional.** Shortcut for specifying mbean/attribute/path. Slashes within names must be escaped with backslash. +jmx4perl_delta | **Optional.** Switches on incremental mode. Optional argument are seconds used for normalizing. +jmx4perl_path | **Optional.** Inner path for extracting a single value from a complex attribute or return value (e.g. used). +jmx4perl_target | **Optional.** JSR-160 Service URL specifing the target server. +jmx4perl_target_user | **Optional.** Username to use for JSR-160 connection. +jmx4perl_target_password | **Optional.** Password to use for JSR-160 connection. +jmx4perl_proxy | **Optional.** Proxy to use. +jmx4perl_user | **Optional.** User for HTTP authentication. +jmx4perl_password | **Optional.** Password for HTTP authentication. +jmx4perl_name | **Optional.** Name to use for output, by default a standard value based on the MBean and attribute will be used. +jmx4perl_method | **Optional.** HTTP method to use, either get or post. By default a method is determined automatically based on the request type. +jmx4perl_base | **Optional.** Base name, which when given, interprets critical and warning values as relative in the range 0 .. 100%. Must be given in the form mbean/attribute/path. +jmx4perl_base_mbean | **Optional.** Base MBean name, interprets critical and warning values as relative in the range 0 .. 100%. Requires "jmx4perl_base_attribute". +jmx4perl_base_attribute | **Optional.** Base attribute for a relative check. Requires "jmx4perl_base_mbean". +jmx4perl_base_path | **Optional.** Base path for relative checks, where this path is used on the base attribute's value. +jmx4perl_unit | **Optional.** Unit of measurement of the data retrieved. Recognized values are [B\|KB\|MN\|GB\|TB] for memory values and [us\|ms\|s\|m\|h\|d] for time values. +jmx4perl_null | **Optional.** Value which should be used in case of a null return value of an operation or attribute. Defaults to null. +jmx4perl_string | **Optional.** Force string comparison for critical and warning checks. Defaults to false. +jmx4perl_numeric | **Optional.** Force numeric comparison for critical and warning checks. Defaults to false. +jmx4perl_critical | **Optional.** Critical threshold for value. +jmx4perl_warning | **Optional.** Warning threshold for value. +jmx4perl_label | **Optional.** Label to be used for printing out the result of the check. For placeholders which can be used see the documentation. +jmx4perl_perfdata | **Optional.** Whether performance data should be omitted, which are included by default. Defaults to "on" for numeric values, to "off" for strings. +jmx4perl_unknown_is_critical | **Optional.** Map UNKNOWN errors to errors with a CRITICAL status. Defaults to false. +jmx4perl_timeout | **Optional.** Seconds before plugin times out. Defaults to "15". +jmx4perl_config | **Optional.** Path to configuration file. +jmx4perl_server | **Optional.** Symbolic name of server url to use, which needs to be configured in the configuration file. +jmx4perl_check | **Optional.** Name of a check configuration as defined in the configuration file, use array if you need arguments. + + +#### kdc + +The [check_kdc](https://exchange.nagios.org/directory/Plugins/Security/check_kdc/details) plugin +uses the Kerberos `kinit` binary to monitor Kerberos 5 KDC by acquiring a ticket. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------------------------------------------------------------------- +kdc_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, `address6` otherwise. +kdc_port | **Optional** Port on which KDC runs (default 88). +kdc_principal | **Required** Principal name to authenticate as (including realm). +kdc_keytab | **Required** Keytab file containing principal's key. + + +#### nginx_status + +The [check_nginx_status.pl](https://github.com/regilero/check_nginx_status) plugin +uses the [/nginx_status](https://nginx.org/en/docs/http/ngx_http_stub_status_module.html) +HTTP endpoint which provides metrics for monitoring Nginx. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------------|---------------------------------------------------------------------------------- +nginx_status_host_address | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, `address6` otherwise. +nginx_status_port | **Optional.** the http port. +nginx_status_url | **Optional.** URL to use, instead of the default (http://`nginx_status_hostname`/nginx_status). +nginx_status_servername | **Optional.** ServerName to use if you specified an IP to match the good Virtualhost in your target. +nginx_status_ssl | **Optional.** set to use ssl connection. +nginx_status_disable_sslverify | **Optional.** set to disable SSL hostname verification. +nginx_status_user | **Optional.** Username for basic auth. +nginx_status_pass | **Optional.** Password for basic auth. +nginx_status_realm | **Optional.** Realm for basic auth. +nginx_status_maxreach | **Optional.** Number of max processes reached (since last check) that should trigger an alert. +nginx_status_timeout | **Optional.** timeout in seconds. +nginx_status_warn | **Optional.** Warning threshold (number of active connections, ReqPerSec or ConnPerSec that will cause a WARNING) like '10000,100,200'. +nginx_status_critical | **Optional.** Critical threshold (number of active connections, ReqPerSec or ConnPerSec that will cause a CRITICAL) like '20000,200,300'. + + +#### rbl + +The [check_rbl](https://github.com/matteocorti/check_rbl) plugin +uses the `Net::DNS` Perl library to check whether your SMTP server +is blacklisted. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------|-------------------------------------------------------------------------- +rbl_hostname | **Optional.** The address or name of the SMTP server to check. Defaults to "$address$" if the host's `address` attribute is set, `address6` otherwise. +rbl_server | **Required** List of RBL servers as an array. +rbl_warning | **Optional** Number of blacklisting servers for a warning. +rbl_critical | **Optional** Number of blacklisting servers for a critical. +rbl_timeout | **Optional** Seconds before plugin times out (default: 15). + + +#### squid + +The [check_squid](https://exchange.icinga.com/exchange/check_squid) plugin +uses the `squidclient` binary to monitor a [Squid proxy](http://www.squid-cache.org). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------- +squid_hostname | **Optional.** The host's address. Defaults to "$address$" if the host's `address` attribute is set, "$address6$" otherwise. +squid_data | **Optional.** Data to fetch (default: Connections) available data: Connections Cache Resources Memory FileDescriptors. +squid_port | **Optional.** Port number (default: 3128). +squid_user | **Optional.** WWW user. +squid_password | **Optional.** WWW password. +squid_warning | **Optional.** Warning threshold. See http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT for the threshold format. +squid_critical | **Optional.** Critical threshold. See http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT for the threshold format. +squid_client | **Optional.** Path of squidclient (default: /usr/bin/squidclient). +squid_timeout | **Optional.** Seconds before plugin times out (default: 15). + + +#### webinject + +The [check_webinject](https://labs.consol.de/de/nagios/check_webinject/index.html) plugin +uses [WebInject](http://www.webinject.org/manual.html) to test web applications +and web services in an automated fashion. +It can be used to test individual system components that have HTTP interfaces +(JSP, ASP, CGI, PHP, AJAX, Servlets, HTML Forms, XML/SOAP Web Services, REST, etc), +and can be used as a test harness to create a suite of HTTP level automated functional, +acceptance, and regression tests. A test harness allows you to run many test cases +and collect/report your results. WebInject offers real-time results +display and may also be used for monitoring system response times. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|-------------- +webinject_config_file | **Optional.** There is a configuration file named 'config.xml' that is used to store configuration settings for your project. You can use this to specify which test case files to run and to set some constants and settings to be used by WebInject. +webinject_output | **Optional.** This option is followed by a directory name or a prefix to prepended to the output files. This is used to specify the location for writing output files (http.log, results.html, and results.xml). If a directory name is supplied (use either an absolute or relative path and make sure to add the trailing slash), all output files are written to this directory. If the trailing slash is omitted, it is assumed to a prefix and this will be prepended to the output files. You may also use a combination of a directory and prefix. +webinject_no_output | **Optional.** Suppresses all output to STDOUT except the results summary. +webinject_timeout | **Optional.** The value [given in seconds] will be compared to the global time elapsed to run all the tests. If the tests have all been successful, but have taken more time than the 'globaltimeout' value, a warning message is sent back to Icinga. +webinject_report_type | **Optional.** This setting is used to enable output formatting that is compatible for use with specific external programs. The available values you can set this to are: nagios, mrtg, external and standard. +webinject_testcase_file | **Optional.** When you launch WebInject in console mode, you can optionally supply an argument for a testcase file to run. It will look for this file in the directory that webinject.pl resides in. If no filename is passed from the command line, it will look in config.xml for testcasefile declarations. If no files are specified, it will look for a default file named 'testcases.xml' in the current [webinject] directory. If none of these are found, the engine will stop and give you an error. + +#### varnish + +The [check_varnish](https://github.com/varnish/varnish-nagios) plugin, +also available in the [monitoring-plugins-contrib](https://packages.debian.org/sid/nagios-plugins-contrib) on debian, +uses the `varnishstat` binary to monitor [varnish](https://varnish-cache.org/). + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------- +varnish_name | **Optional.** Specify the Varnish instance name +varnish_param | **Optional.** Specify the parameter to check (see below). The default is 'ratio'. +varnish_critical | **Optional.** Set critical threshold: [@][lo:]hi +varnish_warning | **Optional.** Set warning threshold: [@][lo:]hi + +For *varnish_param*, all items reported by varnishstat(1) are available - use the +identifier listed in the left column by `varnishstat -l`. In +addition, the following parameters are available: + +Name | Description +------------------------|---------------------------------------------------------------------------------- +uptime | How long the cache has been running (in seconds) +ratio | The cache hit ratio expressed as a percentage of hits to hits + misses. Default thresholds are 95 and 90. +usage | Cache file usage as a percentage of the total cache space. + +#### haproxy + +The [check_haproxy](https://salsa.debian.org/nagios-team/pkg-nagios-plugins-contrib/blob/master/check_haproxy/check_haproxy) plugin, +also available in the [monitoring-plugins-contrib](https://packages.debian.org/nagios-plugins-contrib) on debian, +uses the `haproxy` csv statistics page to monitor [haproxy](https://www.haproxy.org/) response time. The plugin output performance data for backends sessions and statistics response time. + +This plugin need to access the csv statistics page. You can configure it in haproxy by adding a new frontend: +``` +frontend stats + bind 127.0.0.1:80 + stats enablestats + stats uri /stats +``` + +The statistics page will be available at `http://127.0.0.1/stats;csv;norefresh`. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +------------------------|---------------------------------------------------------------------------------- +haproxy_username | **Optional.** Username for HTTP Auth +haproxy_password | **Optional.** Password for HTTP Auth +haproxy_url | **Required.** URL of the HAProxy csv statistics page. +haproxy_timeout | **Optional.** Seconds before plugin times out (default: 10) +haproxy_warning | **Optional.** Warning request time threshold (in seconds) +haproxy_critical | **Optional.** Critical request time threshold (in seconds) + +#### haproxy_status + +The [check_haproxy_status](https://github.com/jonathanio/monitoring-nagios-haproxy) plugin, +uses the `haproxy` statistics socket to monitor [haproxy](https://www.haproxy.org/) frontends/backends. + +This plugin need read/write access to the statistics socket with an operator level. You can configure it in the global section of haproxy to allow icinga user to use it: +``` +stats socket /run/haproxy/admin.sock user haproxy group icinga mode 660 level operator +``` + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +----------------------------|---------------------------------------------------------------------------------- +haproxy\_status\_default | **Optional.** Set/Override the defaults which will be applied to all checks (unless specifically set by --overrides). +haproxy\_status\_frontends | **Optional.** Enable checks for the frontends in HAProxy (that they're marked as OPEN and the session limits haven't been reached). +haproxy\_status\_nofrontends| **Optional.** Disable checks for the frontends in HAProxy (that they're marked as OPEN and the session limits haven't been reached). +haproxy\_status\_backends | **Optional.** Enable checks for the backends in HAProxy (that they have the required quorum of servers, and that the session limits haven't been reached). +haproxy\_status\_nobackends | **Optional.** Disable checks for the backends in HAProxy (that they have the required quorum of servers, and that the session limits haven't been reached). +haproxy\_status\_servers | **Optional.** Enable checks for the servers in HAProxy (that they haven't reached the limits for the sessions or for queues). +haproxy\_status\_noservers | **Optional.** Disable checks for the servers in HAProxy (that they haven't reached the limits for the sessions or for queues). +haproxy\_status\_overrides | **Optional.** Override the defaults for a particular frontend or backend, in the form {name}:{override}, where {override} is the same format as --defaults above. +haproxy\_status\_socket | **Required.** Path to the socket check_haproxy should connect to + +#### phpfpm_status + +The [check_phpfpm_status](https://github.com/regilero/check_phpfpm_status) plugin, +uses the `php-fpm` status page to monitor php-fpm. + +Custom variables passed as [command parameters](03-monitoring-basics.md#command-passing-parameters): + +Name | Description +--------------------------|---------------------------------------------------------------------------------- +phpfpm\_status\_hostname | **Required.** name or IP address of host to check +phpfpm\_status\_port | **Optional.** Http port, or Fastcgi port when using --fastcgi +phpfpm\_status\_url | **Optional.** Specific URL (only the path part of it in fact) to use, instead of the default /fpm-status +phpfpm\_status\_servername| **Optional.** ServerName, (host header of HTTP request) use it if you specified an IP in -H to match the good Virtualhost in your target +phpfpm\_status\_fastcgi | **Optional.** If set, connect directly to php-fpm via network or local socket, using fastcgi protocol instead of HTTP. +phpfpm\_status\_user | **Optional.** Username for basic auth +phpfpm\_status\_pass | **Optional.** Password for basic auth +phpfpm\_status\_realm | **Optional.** Realm for basic auth +phpfpm\_status\_debug | **Optional.** If set, debug mode (show http request response) +phpfpm\_status\_timeout | **Optional.** timeout in seconds (Default: 15) +phpfpm\_status\_ssl | **Optional.** Wether we should use HTTPS instead of HTTP. Note that you can give some extra parameters to this settings. Default value is 'TLSv1' but you could use things like 'TLSv1_1' or 'TLSV1_2' (or even 'SSLv23:!SSLv2:!SSLv3' for old stuff). +phpfpm\_status\_verifyssl | **Optional.** If set, verify certificate and hostname from ssl cert, default is 0 (no security), set it to 1 to really make SSL peer name and certificater checks. +phpfpm\_status\_cacert | **Optional.** Full path to the cacert.pem certificate authority used to verify ssl certificates (use with --verifyssl). if not given the cacert from Mozilla::CA cpan plugin will be used. +phpfpm\_status\_warn | **Optional.** MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED number of available workers, or max states reached that will cause a warning. -1 for no warning +phpfpm\_status\_critical | **Optional.** MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED number of available workers, or max states reached that will cause an error, -1 for no CRITICAL diff --git a/doc/11-cli-commands.md b/doc/11-cli-commands.md new file mode 100644 index 0000000..2324ab7 --- /dev/null +++ b/doc/11-cli-commands.md @@ -0,0 +1,734 @@ +# Icinga 2 CLI Commands + +Icinga 2 comes with a number of CLI commands which support bash autocompletion. + +These CLI commands will allow you to use certain functionality +provided by and around Icinga 2. + +Each CLI command provides its own help and usage information, so please +make sure to always run them with the `--help` parameter. + +Run `icinga2` without any arguments to get a list of all available global +options. + +``` +# icinga2 +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 [] + +Supported commands: + * api setup (setup for API) + * ca list (lists all certificate signing requests) + * ca restore (restores a removed certificate request) + * ca remove (removes an outstanding certificate request) + * ca sign (signs an outstanding certificate request) + * console (Icinga debug console) + * daemon (starts Icinga 2) + * feature disable (disables specified feature) + * feature enable (enables specified feature) + * feature list (lists all available features) + * node setup (set up node) + * node wizard (wizard for node setup) + * object list (lists all objects) + * pki new-ca (sets up a new CA) + * pki new-cert (creates a new CSR) + * pki request (requests a certificate) + * pki save-cert (saves another Icinga 2 instance's certificate) + * pki sign-csr (signs a CSR) + * pki ticket (generates a ticket) + * pki verify (verify TLS certificates: CN, signed by CA, is CA; Print certificate) + * variable get (gets a variable) + * variable list (lists all variables) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + + +## Icinga 2 CLI Bash Autocompletion + +Bash Auto-Completion (pressing ``) is provided only for the corresponding context. + +While `--config` suggests and auto-completes files and directories on disk, +`feature enable` only suggests disabled features. + +RPM and Debian packages install the bash completion files into +`/etc/bash_completion.d/icinga2`. + +You need to install the `bash-completion` package if not already installed. + +RHEL/CentOS/Fedora: + +```bash +yum install bash-completion +``` + +SUSE: + +```bash +zypper install bash-completion +``` + +Debian/Ubuntu: + +```bash +apt-get install bash-completion +``` + +Ensure that the `bash-completion.d` directory is added to your shell +environment. You can manually source the icinga2 bash-completion file +into your current session and test it: + +```bash +source /etc/bash-completion.d/icinga2 +``` + + +## Icinga 2 CLI Global Options + +### Application Type + +By default the `icinga2` binary loads the `icinga` library. A different application type +can be specified with the `--app` command-line option. +Note: This is not needed by the average Icinga user, only developers. + +### Libraries + +Instead of loading libraries using the [`library` config directive](17-language-reference.md#library) +you can also use the `--library` command-line option. +Note: This is not needed by the average Icinga user, only developers. + +### Constants + +[Global constants](17-language-reference.md#constants) can be set using the `--define` command-line option. + +### Config Include Path + +When including files you can specify that the include search path should be +checked. You can do this by putting your configuration file name in angle +brackets like this: + +``` +include +``` + +This causes Icinga 2 to search its include path for the configuration file +`test.conf`. By default the installation path for the [Icinga Template Library](10-icinga-template-library.md#icinga-template-library) +is the only search directory. + +Using the `--include` command-line option additional search directories can be +added. + +## CLI command: Api + +Provides helper functions to enable and setup the +[Icinga 2 API](12-icinga2-api.md#icinga2-api-setup). + +### CLI command: Api Setup + +``` +# icinga2 api setup --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 api setup [] + +Setup for Icinga 2 API. + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Command options: + --cn arg The certificate's common name + +Report bugs at +Get support: +Documentation: +Icinga home page: +``` + +## CLI command: Ca + +List and manage incoming certificate signing requests. More details +can be found in the [signing methods](06-distributed-monitoring.md#distributed-monitoring-setup-sign-certificates-master) +chapter. This CLI command is available since v2.8. + +``` +# icinga2 ca --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 [] + +Supported commands: + * ca list (lists all certificate signing requests) + * ca sign (signs an outstanding certificate request) + * ca restore (restores a removed certificate request) + * ca remove (removes an outstanding certificate request) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + + +### CLI command: Ca List + +``` +icinga2 ca list --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 ca list [] + +Lists pending certificate signing requests. + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Command options: + --all List all certificate signing requests, including + signed. Note: Old requests are automatically + cleaned by Icinga after 1 week. + --removed List all removed CSRs (for use with 'ca restore') + --json encode output as JSON + +Report bugs at +Get support: +Documentation: +Icinga home page: +``` + +## CLI command: Console + +The CLI command `console` can be used to debug and evaluate Icinga 2 config expressions, +e.g. to test [functions](17-language-reference.md#functions) in your local sandbox. + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => function test(name) { +<1> .. log("Hello " + name) +<1> .. } +null +<2> => test("World") +information/config: Hello World +null +<3> => +``` + +Further usage examples can be found in the [library reference](18-library-reference.md#library-reference) chapter. + +``` +# icinga2 console --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 console [] + +Interprets Icinga script expressions. + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Command options: + -c [ --connect ] arg connect to an Icinga 2 instance + -e [ --eval ] arg evaluate expression and terminate + -r [ --file ] arg evaluate a file and terminate + --syntax-only only validate syntax (requires --eval or --file) + --sandbox enable sandbox mode + +Report bugs at +Icinga home page: +``` + + +On operating systems without the `libedit` library installed there is no +support for line-editing or a command history. However you can +use the `rlwrap` program if you require those features: + +```bash +rlwrap icinga2 console +``` + +The debug console can be used to connect to a running Icinga 2 instance using +the [REST API](12-icinga2-api.md#icinga2-api). [API permissions](12-icinga2-api.md#icinga2-api-permissions) +are required for executing config expressions and auto-completion. + +> **Note** +> +> The debug console does not currently support TLS certificate verification. +> +> Runtime modifications are not validated and might cause the Icinga 2 +> daemon to crash or behave in an unexpected way. Use these runtime changes +> at your own risk and rather *inspect and debug objects read-only*. + +You can specify the API URL using the `--connect` parameter. + +Although the password can be specified there process arguments on UNIX platforms are +usually visible to other users (e.g. through `ps`). In order to securely specify the +user credentials the debug console supports two environment variables: + + Environment variable | Description + ---------------------|------------- + ICINGA2_API_USERNAME | The API username. + ICINGA2_API_PASSWORD | The API password. + +Here's an example: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' +Icinga 2 (version: v2.11.0) +<1> => +``` + +Once connected you can inspect variables and execute other expressions by entering them at the prompt: + +``` +<1> => var h = get_host("icinga2-agent1.localdomain") +null +<2> => h.last_check_result +{ + active = true + check_source = "icinga2-agent1.localdomain" + command = [ "/usr/local/sbin/check_ping", "-H", "127.0.0.1", "-c", "5000,100%", "-w", "3000,80%" ] + execution_end = 1446653527.174983 + execution_start = 1446653523.152673 + exit_status = 0.000000 + output = "PING OK - Packet loss = 0%, RTA = 0.11 ms" + performance_data = [ "rta=0.114000ms;3000.000000;5000.000000;0.000000", "pl=0%;80;100;0" ] + schedule_end = 1446653527.175133 + schedule_start = 1446653583.150000 + state = 0.000000 + type = "CheckResult" + vars_after = { + attempt = 1.000000 + reachable = true + state = 0.000000 + state_type = 1.000000 + } + vars_before = { + attempt = 1.000000 + reachable = true + state = 0.000000 + state_type = 1.000000 + } +} +<3> => +``` + +You can use the `--eval` parameter to evaluate a single expression in batch mode. +Using the `--file` option you can specify a file which should be evaluated. +The output format for batch mode is JSON. + +The `--syntax-only` option can be used in combination with `--eval` or `--file` +to check a script for syntax errors. In this mode the script is parsed to identify +syntax errors but not evaluated. + +Here's an example that retrieves the command that was used by Icinga to check the `icinga2-agent1.localdomain` host: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' --eval 'get_host("icinga2-agent1.localdomain").last_check_result.command' | python -m json.tool +[ + "/usr/local/sbin/check_ping", + "-H", + "127.0.0.1", + "-c", + "5000,100%", + "-w", + "3000,80%" +] +``` + +## CLI command: Daemon + +The CLI command `daemon` provides the functionality to start/stop Icinga 2. +Furthermore it allows to run the [configuration validation](11-cli-commands.md#config-validation). + +``` +# icinga2 daemon --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 daemon [] + +Starts Icinga 2. + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Command options: + -c [ --config ] arg parse a configuration file + -z [ --no-config ] start without a configuration file + -C [ --validate ] exit after validating the configuration + -e [ --errorlog ] arg log fatal errors to the specified log file (only + works in combination with --daemonize or + --close-stdio) + -d [ --daemonize ] detach from the controlling terminal + --close-stdio do not log to stdout (or stderr) after startup + +Report bugs at +Icinga home page: +``` + +### Config Files + +You can specify one or more configuration files with the `--config` option. +Configuration files are processed in the order they're specified on the command-line. + +When no configuration file is specified and the `--no-config` is not used +Icinga 2 automatically falls back to using the configuration file +`ConfigDir + "/icinga2.conf"` (where ConfigDir is usually `/etc/icinga2`). + +### Validation + +The `--validate` option can be used to check if configuration files +contain errors. If any errors are found, the exit status is 1, otherwise 0 +is returned. More details in the [configuration validation](11-cli-commands.md#config-validation) chapter. + +## CLI command: Feature + +The `feature enable` and `feature disable` commands can be used to enable and disable features: + +``` +# icinga2 feature disable +--app --define --include --log-level --version checker graphite mainlog +--color --help --library --script-debugger api command ido-mysql notification +``` + +``` +# icinga2 feature enable +--app --define --include --log-level --version debuglog ido-pgsql livestatus perfdata syslog +--color --help --library --script-debugger compatlog gelf influxdb opentsdb statusdata +``` + +The `feature list` command shows which features are currently enabled: + +``` +# icinga2 feature list +Disabled features: compatlog debuglog gelf ido-pgsql influxdb livestatus opentsdb perfdata statusdata syslog +Enabled features: api checker command graphite ido-mysql mainlog notification +``` + +## CLI command: Node + +Provides the functionality to setup master and client +nodes in a [distributed monitoring](06-distributed-monitoring.md#distributed-monitoring) scenario. + +``` +# icinga2 node --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 [] + +Supported commands: + * node setup (set up node) + * node wizard (wizard for node setup) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + +## CLI command: Object + +The `object` CLI command can be used to list all configuration objects and their +attributes. The command also shows where each of the attributes was modified and as such +provides debug information for further configuration problem analysis. +That way you can also identify which objects have been created from your [apply rules](17-language-reference.md#apply). + +Runtime modifications via the [REST API](12-icinga2-api.md#icinga2-api-config-objects) +are not immediately updated. Furthermore there is a known issue with +[group assign expressions](17-language-reference.md#group-assign) which are not reflected in the host object output. +You need to restart Icinga 2 in order to update the `icinga2.debug` cache file. + +More information can be found in the [troubleshooting](15-troubleshooting.md#troubleshooting-list-configuration-objects) section. + +``` +# icinga2 object --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 [] + +Supported commands: + * object list (lists all objects) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + +## CLI command: Pki + +Provides the CLI commands to + +* generate a new certificate authority (CA) +* generate a new CSR or self-signed certificate +* sign a CSR and return a certificate +* save a master certificate manually +* request a signed certificate from the master +* generate a new ticket for the client setup + +This functionality is used by the [node setup/wizard](11-cli-commands.md#cli-command-node) CLI commands. +You will need them in the [distributed monitoring chapter](06-distributed-monitoring.md#distributed-monitoring). + +``` +# icinga2 pki --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.12.0) + +Usage: + icinga2 [] + +Supported commands: + * pki new-ca (sets up a new CA) + * pki new-cert (creates a new CSR) + * pki request (requests a certificate) + * pki save-cert (saves another Icinga 2 instance's certificate) + * pki sign-csr (signs a CSR) + * pki ticket (generates a ticket) + * pki verify (verify TLS certificates: CN, signed by CA, is CA; Print certificate) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + +## CLI command: Variable + +Lists all configured variables (constants) in a similar fashion like [object list](11-cli-commands.md#cli-command-object). + +``` +# icinga2 variable --help +icinga2 - The Icinga 2 network monitoring daemon (version: v2.11.0) + +Usage: + icinga2 [] + +Supported commands: + * variable get (gets a variable) + * variable list (lists all variables) + +Global options: + -h [ --help ] show this help message + -V [ --version ] show version information + --color use VT100 color codes even when stdout is not a + terminal + -D [ --define ] arg define a constant + -a [ --app ] arg application library name (default: icinga) + -l [ --library ] arg load a library + -I [ --include ] arg add include search directory + -x [ --log-level ] arg specify the log level for the console log. + The valid value is either debug, notice, + information (default), warning, or critical + -X [ --script-debugger ] whether to enable the script debugger + +Report bugs at +Icinga home page: +``` + +## Enabling/Disabling Features + +Icinga 2 provides configuration files for some commonly used features. These +are installed in the `/etc/icinga2/features-available` directory and can be +enabled and disabled using the `icinga2 feature enable` and `icinga2 feature disable` +[CLI commands](11-cli-commands.md#cli-command-feature), respectively. + +The `icinga2 feature enable` CLI command creates symlinks in the +`/etc/icinga2/features-enabled` directory which is included by default +in the example configuration file. + +You can view a list of enabled and disabled features: + +``` +# icinga2 feature list +Disabled features: api command compatlog debuglog graphite icingastatus ido-mysql ido-pgsql livestatus notification perfdata statusdata syslog +Enabled features: checker mainlog notification +``` + +Using the `icinga2 feature enable` command you can enable features: + +``` +# icinga2 feature enable graphite +Enabling feature graphite. Make sure to restart Icinga 2 for these changes to take effect. +``` + +You can disable features using the `icinga2 feature disable` command: + +``` +# icinga2 feature disable ido-mysql livestatus +Disabling feature ido-mysql. Make sure to restart Icinga 2 for these changes to take effect. +Disabling feature livestatus. Make sure to restart Icinga 2 for these changes to take effect. +``` + +The `icinga2 feature enable` and `icinga2 feature disable` commands do not +restart Icinga 2. You will need to restart Icinga 2 using the init script +after enabling or disabling features. + + + +## Configuration Validation + +Once you've edited the configuration files make sure to tell Icinga 2 to validate +the configuration changes. Icinga 2 will log any configuration error including +a hint on the file, the line number and the affected configuration line itself. + +The following example creates an apply rule without any `assign` condition. + +``` +apply Service "my-ping4" { + import "generic-service" + check_command = "ping4" + //assign where host.address +} +``` + +Validate the configuration: + +``` +# icinga2 daemon -C + +[2014-05-22 17:07:25 +0200] critical/ConfigItem: Location: +/etc/icinga2/conf.d/tests/my.conf(5): } +/etc/icinga2/conf.d/tests/my.conf(6): +/etc/icinga2/conf.d/tests/my.conf(7): apply Service "my-ping4" { + ^^^^^^^^^^^^^ +/etc/icinga2/conf.d/tests/my.conf(8): import "test-generic-service" +/etc/icinga2/conf.d/tests/my.conf(9): check_command = "ping4" + +Config error: 'apply' is missing 'assign' +[2014-05-22 17:07:25 +0200] critical/ConfigItem: 1 errors, 0 warnings. +Icinga 2 detected configuration errors. +``` + +If you encounter errors during configuration validation, please make sure +to read the [troubleshooting](15-troubleshooting.md#troubleshooting) chapter. + +You can also use the [CLI command](11-cli-commands.md#cli-command-object) `icinga2 object list` +after validation passes to analyze object attributes, inheritance or created +objects by apply rules. +Find more on troubleshooting with `object list` in [this chapter](15-troubleshooting.md#troubleshooting-list-configuration-objects). + + +## Reload on Configuration Changes + +Every time you have changed your configuration you should first tell Icinga 2 +to [validate](11-cli-commands.md#config-validation). If there are no validation errors, you can +safely reload the Icinga 2 daemon. + +```bash +systemctl reload icinga2 +``` + +The `reload` action will send the `SIGHUP` signal to the Icinga 2 daemon +which will validate the configuration in a separate process and not stop +the other events like check execution, notifications, etc. diff --git a/doc/12-icinga2-api.md b/doc/12-icinga2-api.md new file mode 100644 index 0000000..409e27b --- /dev/null +++ b/doc/12-icinga2-api.md @@ -0,0 +1,2996 @@ +# REST API + +* [Setup](12-icinga2-api.md#icinga2-api-setup) +* [Introduction](12-icinga2-api.md#icinga2-api-introduction) +* Endpoints + * [Config Objects](12-icinga2-api.md#icinga2-api-config-objects) + * [Actions](12-icinga2-api.md#icinga2-api-actions) + * [Event Streams](12-icinga2-api.md#icinga2-api-event-streams) + * [Status and Statistics](12-icinga2-api.md#icinga2-api-status) + * [Config Management](12-icinga2-api.md#icinga2-api-config-management) + * [Types](12-icinga2-api.md#icinga2-api-types) + * [Templates](12-icinga2-api.md#icinga2-api-config-templates) + * [Variables](12-icinga2-api.md#icinga2-api-variables) + * [Debug Console](12-icinga2-api.md#icinga2-api-console) +* [API Clients](12-icinga2-api.md#icinga2-api-clients) + * [Programmatic Examples](12-icinga2-api.md#icinga2-api-clients-programmatic-examples) + + +## Setting up the API + +You can run the CLI command `icinga2 api setup` to enable the +`api` [feature](11-cli-commands.md#enable-features) and set up +certificates as well as a new API user `root` with an auto-generated password in the +`/etc/icinga2/conf.d/api-users.conf` configuration file: + +```bash +icinga2 api setup +``` + +Make sure to restart Icinga 2 to enable the changes you just made: + +```bash +systemctl restart icinga2 +``` + +If you prefer to set up the API manually, you will have to perform the following steps: + +* Set up X.509 TLS certificates for Icinga 2 +* Enable the `api` feature (`icinga2 feature enable api`) +* Create an `ApiUser` object for authentication + +The next chapter provides a quick overview of how you can use the API. + +## Introduction + +The Icinga 2 API allows you to manage configuration objects +and resources in a simple, programmatic way using HTTP requests. + +The URL endpoints are logically separated allowing you to easily +make calls to + +* query, create, modify and delete [config objects](12-icinga2-api.md#icinga2-api-config-objects) +* perform [actions](12-icinga2-api.md#icinga2-api-actions) (reschedule checks, etc.) +* subscribe to [event streams](12-icinga2-api.md#icinga2-api-event-streams) +* [manage configuration packages](12-icinga2-api.md#icinga2-api-config-management) +* evaluate [script expressions](12-icinga2-api.md#icinga2-api-console) + +### Requests + +Any tool capable of making HTTP requests can communicate with +the API, for example [curl](https://curl.haxx.se/). + +Requests are only allowed to use the HTTPS protocol so that +traffic remains encrypted. + +By default the Icinga 2 API listens on port `5665` which is shared with +the cluster stack. The port can be changed by setting the `bind_port` attribute +for the [ApiListener](09-object-types.md#objecttype-apilistener) +object in the `/etc/icinga2/features-available/api.conf` +configuration file. + +Supported request methods: + + Method | Usage + -------|-------- + GET | Retrieve information about configuration objects. Any request using the GET method is read-only and does not affect any objects. + POST | Update attributes of a specified configuration object. + PUT | Create a new object. The PUT request must include all attributes required to create a new object. + DELETE | Remove an object created by the API. The DELETE method is idempotent and does not require any check if the object actually exists. + +All requests except `GET` require the following `Accept` header: + +``` +Accept: application/json +``` + +Each URL is prefixed with the API version (currently "/v1"). + +HTTP header size is limited to 8KB per request. + +### Responses + +Successful requests will send back a response body containing a `results` +list. Depending on the number of affected objects in your request, the +`results` list may contain more than one entry. + +The output will be sent back as a JSON object: + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Object was created." + } + ] +} +``` + +> **Tip** +> +> You can use the [pretty](12-icinga2-api.md#icinga2-api-parameters-global) parameter to beautify the JSON response. + +You can also use [jq](https://stedolan.github.io/jq/) or `python -m json.tool` +in combination with curl on the CLI. + +```bash +curl ... | jq +curl ... | python -m json.tool +``` + +jq also has additional filter capabilities, as shown in [this blogpost](https://www.netways.de/blog/2018/08/24/json-in-bequem/). + +```bash +curl ... |jq '{name: .results[].name}' +``` + +For programmatic examples in various languages, check the chapter +[below](12-icinga2-api.md#icinga2-api-clients). + +> **Note** +> +> Future versions of Icinga 2 might set additional fields. Your application +> should gracefully handle fields it is not familiar with, for example by +> ignoring them. + +### HTTP Statuses + +The API will return standard [HTTP statuses](https://www.ietf.org/rfc/rfc2616.txt) +including error codes. + +When an error occurs, the response body will contain additional information +about the problem and its source. Set `verbose` to true to retrieve more +insights into what may be causing the error. + +A status code between 200 and 299 generally means that the request was +successful. + +Return codes within the 400 range indicate that there was a problem with the +request. Either you did not authenticate correctly, you are missing the authorization +for your requested action, the requested object does not exist or the request +was malformed. + +A status in the range of 500 generally means that there was a server-side problem +and Icinga 2 is unable to process your request. + +### Security + +* HTTPS only. +* TLS v1.2+ is required. +* TLS cipher lists are hardened [by default](09-object-types.md#objecttype-apilistener). +* Authentication is [required](12-icinga2-api.md#icinga2-api-authentication). + +### Authentication + +There are two different ways for authenticating against the Icinga 2 API: + +* Username and password using HTTP basic auth +* X.509 client certificate + +In order to configure a new API user you'll need to add a new [ApiUser](09-object-types.md#objecttype-apiuser) +configuration object. In this example `root` will be the basic auth username +and the `password` attribute contains the basic auth password. + +``` +# vim /etc/icinga2/conf.d/api-users.conf + +object ApiUser "root" { + password = "icinga" +} +``` + +Alternatively you can use X.509 client certificates by specifying the `client_cn` +the API should trust. The X.509 certificate has to be signed by the CA certificate +that is configured in the [ApiListener](09-object-types.md#objecttype-apilistener) object. + +``` +# vim /etc/icinga2/conf.d/api-users.conf + +object ApiUser "root" { + client_cn = "CertificateCommonName" +} +``` + +An `ApiUser` object can have both authentication methods configured. + +#### Authentication Test + +You can test authentication by sending a GET request to the API: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1' +``` + +In case you get an error message make sure to check the API user credentials. + +When using client certificates for authentication you'll need to pass your client certificate +and private key to the curl call: + +```bash +curl -k --cert example.localdomain.crt --key example.localdomain.key 'https://example.localdomain:5665/v1/status' +``` + +In case of an error make sure to verify the client certificate and CA. + +The curl parameter `-k` disables certificate verification and should therefore +only be used for testing. In order to securely check each connection you'll need to +specify the trusted CA certificate using the curl parameter`--cacert`: + +```bash +curl -u root:icinga --cacert ca.crt 'icinga2.node1.localdomain:5665/v1' +``` + +Read the next chapter on [API permissions](12-icinga2-api.md#icinga2-api-permissions) +in order to configure authorization settings for your newly created API user. + +### Permissions + +By default an API user does not have any permissions to perform +actions on the URL endpoints. + +Permissions for API users must be specified in the `permissions` attribute +as array. The array items can be a list of permission strings with wildcard +matches. Please notice, that the permission system that is used by the API differs from the permission system used by the Icinga Web 2 frontend or other parts of Icinga 2. + +The permission system mainly relies on the url scheme of the API endpoints (See listing below). + +Example for an API user with all permissions: + +``` +permissions = [ "*" ] +``` + +Note that you can use wildcards to include all possible hierarchically lower items. Here's another example that only allows the user +to perform read-only object queries for hosts and services: + +``` +permissions = [ "objects/query/Host", "objects/query/Service" ] +``` + +You can also further restrict permissions by specifying a filter expression. The +filter expression has to be a [lambda function](17-language-reference.md#nullary-lambdas) +which must return a boolean value. + +The following example allows the API user to query all hosts and services which have a +custom variable `os` that matches the regular expression `^Linux`. +The [regex function](18-library-reference.md#global-functions-regex) is available as global function. + +``` +permissions = [ + { + permission = "objects/query/Host" + filter = {{ regex("^Linux", host.vars.os) }} + }, + { + permission = "objects/query/Service" + filter = {{ regex("^Linux", service.vars.os) }} + } +] +``` + +More information about filters can be found in the [filters](12-icinga2-api.md#icinga2-api-filters) chapter. + +Prior to setting complex permissions, ensure to always [test](12-icinga2-api.md#icinga2-api-authentication-test) +them step by step. + + +#### Overview + +Permissions are tied to a maximum HTTP request size to prevent abuse, responses sent by Icinga are not limited. +An API user with all permissions ("\*") may send up to 512 MB regardless of the endpoint. + +Available permissions for specific URL endpoints: + + Permissions | URL Endpoint | Supports filters | Max body size in MB + ------------------------------|---------------|-------------------|--------------------- + actions/<action> | /v1/actions | Yes | 1 + config/query | /v1/config | No | 1 + config/modify | /v1/config | No | 512 + console | /v1/console | No | 1 + events/<type> | /v1/events | No | 1 + objects/query/<type> | /v1/objects | Yes | 1 + objects/create/<type> | /v1/objects | No | 1 + objects/modify/<type> | /v1/objects | Yes | 1 + objects/delete/<type> | /v1/objects | Yes | 1 + status/query | /v1/status | Yes | 1 + templates/<type> | /v1/templates | Yes | 1 + types | /v1/types | Yes | 1 + variables | /v1/variables | Yes | 1 + +The required actions or types can be replaced by using a wildcard match ("\*"). + + +### Parameters + +Depending on the request method there are two ways of passing parameters to the request: + +* JSON object as request body (all request methods other than `GET`) +* Query string as URL parameter (all request methods) + +Reserved characters by the HTTP protocol must be [URL-encoded](https://en.wikipedia.org/wiki/Percent-encoding) +as query string, e.g. a space character becomes `%20`. + +Example for a URL-encoded query string: + +``` +/v1/objects/hosts?filter=match(%22example.localdomain*%22,host.name)&attrs=name&attrs=state +``` + +Here are the exact same query parameters as a JSON object: + +```json +{ "filter": "match(\"example.localdomain*\",host.name)", "attrs": [ "host.name", "host.state" ] } +``` + +The [match function](18-library-reference.md#global-functions-match) is available as global function +in Icinga 2. + +Whenever filters and other URL parameters don't work due to encoding issues, +consider passing them in the request body. For GET requests, this method is explained +[here](12-icinga2-api.md#icinga2-api-requests-method-override). + +You can use [jo](https://github.com/jpmens/jo) to format JSON strings on the shell. An example +for API actions shown [here](12-icinga2-api.md#icinga2-api-actions-unix-timestamps). + + +### Global Parameters + +Name | Description +----------------|-------------------- +pretty | Pretty-print the JSON response. +verbose | Add verbose debug information inside the `diagnostic_information` key into the response if available. This helps with troubleshooting failing requests. + +Example as URL parameter: + +``` +/v1/objects/hosts?pretty=1 +``` + +Example as JSON object: + +```json +{ "pretty": true } +``` + +### Request Method Override + +`GET` requests do not allow you to send a request body. In case you cannot pass everything as URL +parameters (e.g. complex filters or JSON-encoded dictionaries) you can use the `X-HTTP-Method-Override` +header. This comes in handy when you are using HTTP proxies disallowing `PUT` or `DELETE` requests too. + +Query an existing object by sending a `POST` request with `X-HTTP-Method-Override: GET` as request header: + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/hosts' +``` + +Delete an existing object by sending a `POST` request with `X-HTTP-Method-Override: DELETE` as request header: + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: DELETE' -X POST \ + 'https://localhost:5665/v1/objects/hosts/example.localdomain' +``` + +Query objects with complex filters. For a detailed introduction into filter, please +read the [following chapter](12-icinga2-api.md#icinga2-api-filters). + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/services' \ + -d '{ "filter": "service.state==2 && match(\"ping*\",service.name)" }' +``` + +### Filters + +#### Simple Filters + +By default actions and queries operate on all objects unless further restricted by the user. For +example, the following query returns all `Host` objects: + +``` +https://localhost:5665/v1/objects/hosts +``` + +If you're only interested in a single object, you can limit the output to that object by specifying its name: + +``` +https://localhost:5665/v1/objects/hosts?host=localhost +``` + +**The name of the URL parameter is the lower-case version of the type the query applies to.** For +example, for `Host` objects the URL parameter therefore is `host`, for `Service` objects it is +`service` and so on. + +You can also specify multiple objects: + +``` +https://localhost:5665/v1/objects/hosts?hosts=first-host&hosts=second-host +``` + +Again -- like in the previous example -- the name of the URL parameter is the lower-case version of the type. However, because we're specifying multiple objects here the **plural form** of the type is used. + +When specifying names for objects which have composite names like for example services the +full name has to be used: + +``` +https://localhost:5665/v1/objects/services?service=localhost!ping6 +``` + +The full name of an object can be obtained by looking at the `__name` attribute. + +#### Advanced Filters + +Most of the information provided in this chapter applies to both permission filters (as used when +configuring `ApiUser` objects) and filters specified in queries. + +Advanced filters allow users to filter objects using lambda expressions. +The syntax for these filters is the same like for [apply rule expressions](03-monitoring-basics.md#using-apply-expressions). + +The `filter` parameter can only be specified once, complex filters must +be defined once in the provided string value. + +> **Note** +> +> Filters used as URL parameter must be URL-encoded. The following examples +> are **not URL-encoded** for better readability. + +Example matching all services in NOT-OK state: + +``` +https://localhost:5665/v1/objects/services?filter=service.state!=ServiceOK +``` + +Example [matching](18-library-reference.md#global-functions-match) all hosts by a name string pattern: + +``` +https://localhost:5665/v1/objects/hosts?filter=match("example.localdomain*",host.name) +``` + +Example for all hosts which are in the host group `linux-servers`: +``` +https://localhost:5665/v1/objects/hosts?filter="linux-servers" in host.groups +``` + +> **Tip** +> +> Best practice for filters is to use [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +> for GET requests and always pass them in the request body. + +User-specified filters are run in a sandbox environment which ensures that filters cannot +modify Icinga's state, for example object attributes or global variables. + +When querying objects of a specific type the filter expression is evaluated for each object +of that type. The object is made available to the filter expression as a variable whose name +is the lower-case version of the object's type name. + +For example when querying objects of type `Host` the variable in the filter expression is named +`host`. Additionally related objects such as the host's check command are also made available +(e.g., via the `check_command` variable). The variable names are the exact same as for the `joins` +query parameter; see [object query joins](12-icinga2-api.md#icinga2-api-config-objects-query-joins) +for details. + +The object is also made available via the `obj` variable. This makes it easier to build +filters which can be used for more than one object type (e.g., for permissions). + +Some queries can be performed for more than just one object type. One example is the 'reschedule-check' +action which can be used for both hosts and services. When using advanced filters you will also have to specify the +type using the `type` parameter: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' -X POST \ + 'https://localhost:5665/v1/actions/reschedule-check' \ + -d '{ "type": "Service", "filter": "service.name==\"ping6\"", "pretty": true }' +``` + +##### Filter Variables + +Filter values need to be escaped in the same way as in the Icinga 2 DSL. + +The example below is not valid: + +``` +-d '{ "type": "Host", "filter": ""linux-servers" in host.groups" }' +``` + +The double quotes need to be escaped with a preceeding backslash: + +``` +-d '{ "type": "Host", "filter": "\"linux-servers\" in host.groups" }' +``` + +You can use the `filter_vars` attribute to avoid additional escaping. +This follows the same principle as with parameter binding known from RDBMS. +Specify a placeholder variable inside the `filter` string, and actually +assign its value inside the `filter_vars` dictionary. + +That way you can also keep the `filter` string the same for different +requests with only changing the `filter_vars`. + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/hosts' \ + -d '{ "filter": "group in host.groups", "filter_vars": { "group": "linux-servers" }, "pretty": true }' +``` + +We're using [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) here because +the HTTP specification does not allow message bodies for GET requests. + +The `filters_vars` attribute can only be used inside the request body, but not as +a URL parameter because there is no way to specify a dictionary in a URL. + +The example from [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +can be enhanced to avoid additional parameter value escaping. + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/services' \ + -d '{ "filter": "service.state==state && match(pattern,service.name)", "filter_vars": { "state": 2, "pattern": "ping*" } }' +``` + +## Config Objects + +Provides methods to manage configuration objects: + +* [creating objects](12-icinga2-api.md#icinga2-api-config-objects-create) +* [querying objects](12-icinga2-api.md#icinga2-api-config-objects-query) +* [modifying objects](12-icinga2-api.md#icinga2-api-config-objects-modify) +* [deleting objects](12-icinga2-api.md#icinga2-api-config-objects-delete) + +### API Objects and Cluster Config Sync + +Newly created or updated objects can be synced throughout your +Icinga 2 cluster. Set the `zone` attribute to the zone this object +belongs to and let the API and cluster handle the rest. + +Objects without a zone attribute are only synced in the same zone the Icinga instance belongs to. + +> **Note** +> +> Cluster nodes must accept configuration for creating, modifying +> and deleting objects. Ensure that `accept_config` is set to `true` +> in the [ApiListener](09-object-types.md#objecttype-apilistener) object +> on each node. + +If you add a new cluster instance, or reconnect an instance which has been offline +for a while, Icinga 2 takes care of the initial object sync for all objects +created by the API. + +### Querying Objects + +You can request information about configuration objects by sending +a `GET` query to the `/v1/objects/` URL endpoint. ` + +Each response entry in the results array contains the following attributes: + + Attribute | Type | Description + -----------|------------|-------------- + name | String | Full object name. + type | String | Object type. + attrs | Dictionary | Object attributes (can be filtered using the URL parameter `attrs`). + joins | Dictionary | [Joined object types](12-icinga2-api.md#icinga2-api-config-objects-query-joins) as key, attributes as nested dictionary. Disabled by default. + meta | Dictionary | Contains `used_by` object references. Disabled by default, enable it using `?meta=used_by` as URL parameter. + +#### Object Query Joins + +Icinga 2 knows about object relations. For example it can optionally return +information about the host when querying service objects. + +The following query retrieves all host attributes: + +``` +https://localhost:5665/v1/objects/services?joins=host +``` + +Instead of requesting all host attributes you can also limit the output to specific +attributes: + +``` +https://localhost:5665/v1/objects/services?joins=host.name&joins=host.address +``` + +You can request that all available joins are returned in the result set by using +the `all_joins` query parameter. + +``` +https://localhost:5665/v1/objects/services?all_joins=1 +``` + +> **Note** +> +> For performance reasons you should only request attributes which your application +> requires. + +Please note that the object type refers to the URL endpoint with `/v1/objects/` +where the following joins are available: + + Object Type | Object Relations (`joins` prefix name) + -------------|------------------------------------------ + Service | host, check\_command, check\_period, event\_command, command\_endpoint + Host | check\_command, check\_period, event\_command, command\_endpoint + Notification | host, service, command, period + Dependency | child\_host, child\_service, parent\_host, parent\_service, period + User | period + Zones | parent + +Here's an example that retrieves all service objects for hosts which have had their `os` +custom variable set to `Linux`. The result set contains the `display_name` and `check_command` +attributes for the service. The query also returns the host's `name` and `address` attribute +via a join: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/objects/services?attrs=display_name&attrs=check_command&joins=host.name&joins=host.address&filter=host.vars.os==%22Linux%22&pretty=1' +``` + +```json +{ + "results": [ + { + "attrs": { + "check_command": "ping4", + "display_name": "ping4" + }, + "joins": { + "host": { + "address": "192.168.1.1", + "name": "example.localdomain" + } + }, + "meta": {}, + "name": "example.localdomain!ping4", + "type": "Service" + }, + { + "attrs": { + "check_command": "ssh", + "display_name": "ssh" + }, + "joins": { + "host": { + "address": "192.168.1.1", + "name": "example.localdomain" + } + }, + "meta": {}, + "name": "example.localdomain!ssh", + "type": "Service" + } + ] +} +``` + +> **Tip** +> +> Use [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +> and pass everything in the request body like this: + +```bash +curl -k -s -S -i -u 'root:icinga' -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/services' \ + -d '{ "attrs": [ "display_name", "check_command" ], "joins": [ "host.name", "host.address" ], "filter": "host.vars.os==\"Linux\"", "pretty": true }' +``` + +In case you want to fetch all [comments](09-object-types.md#objecttype-comment) +for hosts and services, you can use the following query URL (similar example +for downtimes): + +``` +https://localhost:5665/v1/objects/comments?joins=host&joins=service +``` + +This is another example for listing all service objects which are unhandled problems (state is not OK +and no downtime or acknowledgement set). We're using [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +here because we want to pass all query attributes in the request body. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://127.0.0.1:5665/v1/objects/services' \ +-d '{ "joins": [ "host.name", "host.address" ], "attrs": [ "name", "state", "downtime_depth", "acknowledgement" ], "filter": "service.state != ServiceOK && service.downtime_depth == 0.0 && service.acknowledgement == 0.0", "pretty": true }' +``` + +```json +{ + "results": [ + { + "attrs": { + "acknowledgement": 0.0, + "downtime_depth": 0.0, + "name": "10807-service", + "state": 3.0 + }, + "joins": { + "host": { + "address": "", + "name": "10807-host" + } + }, + "meta": {}, + "name": "10807-host!10807-service", + "type": "Service" + } + ] +} +``` + +In order to list all acknowledgements without expire time, you query the `/v1/objects/comments` +URL endpoint with `joins` and `filter` request parameters using the [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +method: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5665/v1/objects/comments' \ + -d '{ "joins": [ "service.name", "service.acknowledgement", "service.acknowledgement_expiry" ], "attrs": [ "author", "text" ], "filter": "service.acknowledgement!=0 && service.acknowledgement_expiry==0", "pretty": true }' +``` + +```json +{ + "results": [ + { + "attrs": { + "author": "icingaadmin", + "text": "maintenance work" + }, + "joins": { + "service": { + "__name": "example.localdomain!disk /", + "acknowledgement": 1.0, + "acknowledgement_expiry": 0.0 + } + }, + "meta": {}, + "name": "example.localdomain!disk /!example.localdomain-1495457222-0", + "type": "Comment" + } + ] +} +``` + +### Creating Config Objects + +New objects must be created by sending a PUT request. The following +parameters need to be passed inside the JSON body: + + Parameters | Type | Description + ------------------|--------------|-------------------------- + templates | Array | **Optional.** Import existing configuration templates for this object type. Note: These templates must either be statically configured or provided in [config packages](12-icinga2-api.md#icinga2-api-config-management)- + attrs | Dictionary | **Required.** Set specific object attributes for this [object type](09-object-types.md#object-types). + ignore\_on\_error | Boolean | **Optional.** Ignore object creation errors and return an HTTP 200 status instead. + +The object name must be specified as part of the URL path. For objects with composite names (e.g. services) +the full name (e.g. `example.localdomain!http`) must be specified. + +If attributes are of the Dictionary type, you can also use the indexer format. This might be necessary to only override specific custom variables and keep all other existing custom variables (e.g. from templates): + +``` +"attrs": { "vars.os": "Linux" } +``` + +Example for creating the new host object `example.localdomain`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X PUT 'https://localhost:5665/v1/objects/hosts/example.localdomain' \ + -d '{ "templates": [ "generic-host" ], "attrs": { "address": "192.168.1.1", "check_command": "hostalive", "vars.os" : "Linux" }, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Object was created." + } + ] +} +``` + +If the configuration validation fails, the new object will not be created and the response body +contains a detailed error message. The following example is missing the `check_command` attribute +which is required for host objects: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X PUT 'https://localhost:5665/v1/objects/hosts/example.localdomain' \ + -d '{ "attrs": { "address": "192.168.1.1", "vars.os" : "Linux" }, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 500.0, + "errors": [ + "Error: Validation failed for object 'example.localdomain' of type 'Host'; Attribute 'check_command': Attribute must not be empty." + ], + "status": "Object could not be created." + } + ] +} +``` + +Service objects must be created using their full name ("hostname!servicename") referencing an existing host object: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X PUT 'https://localhost:5665/v1/objects/services/example.localdomain!realtime-load' \ + -d '{ "templates": [ "generic-service" ], "attrs": { "check_command": "load", "check_interval": 1,"retry_interval": 1 } }' +``` + +Example for a new CheckCommand object: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X PUT 'https://localhost:5665/v1/objects/checkcommands/mytest' \ + -d '{ "templates": [ "plugin-check-command" ], "attrs": { "command": [ "/usr/local/sbin/check_http" ], "arguments": { "-I": "$mytest_iparam$" } } }' +``` + +### Modifying Objects + +Existing objects must be modified by sending a `POST` request. The following +parameters need to be passed inside the JSON body: + + Parameters | Type | Description + -----------|------------|--------------------------- + attrs | Dictionary | **Required.** Set specific object attributes for this [object type](09-object-types.md#object-types). + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) +parameter should be provided. + +> **Note**: +> +> Modified attributes do not trigger a re-evaluation of existing +> static [apply rules](03-monitoring-basics.md#using-apply) and [group assignments](03-monitoring-basics.md#group-assign-intro). +> Delete and re-create the objects if you require such changes or +> consider funding [this feature request](https://github.com/Icinga/icinga2/issues/4084). +> +> Furthermore you cannot modify templates which have already been resolved +> during [object creation](12-icinga2-api.md#icinga2-api-config-objects-create). +> There are attributes which can only be set for [PUT requests](12-icinga2-api.md#icinga2-api-config-objects-create) such as `groups` +> or `zone`. A complete list of `no_user_modify` attributes can be fetched from the [types](12-icinga2-api.md#icinga2-api-types) URL endpoint. + +If attributes are of the [Dictionary](17-language-reference.md#dictionary) type, you can also use the indexer format: + +``` +"attrs": { "vars.os": "Linux" } +``` + +The following example updates the `address` attribute and the custom variable `os` for the `example.localdomain` host: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/objects/hosts/example.localdomain' \ + -d '{ "attrs": { "address": "192.168.1.2", "vars.os" : "Windows" }, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "name": "example.localdomain", + "status": "Attributes updated.", + "type": "Host" + } + ] +} +``` + +### Deleting Objects + +You can delete objects created using the API by sending a `DELETE` +request. + + Parameters | Type | Description + -----------|---------|--------------- + cascade | Boolean | **Optional.** Delete objects depending on the deleted objects (e.g. services on a host). + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) should be provided. + +Example for deleting the host object `example.localdomain`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X DELETE 'https://localhost:5665/v1/objects/hosts/example.localdomain?cascade=1&pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "name": "example.localdomain", + "status": "Object was deleted.", + "type": "Host" + } + ] +} +``` + +## Actions + +There are several actions available for Icinga 2 provided by the `/v1/actions` +URL endpoint. You can run actions by sending a `POST` request. + +The following actions are also used by [Icinga Web 2](https://icinga.com/products/icinga-web-2/): + +* sending check results to Icinga from scripts, remote agents, etc. +* scheduling downtimes from external scripts or cronjobs +* acknowledging problems +* adding comments + +All actions return a 200 `OK` or an appropriate error code for each +action performed on each object matching the supplied filter. + +Actions which affect the Icinga Application itself such as disabling +notification on a program-wide basis must be applied by updating the +[IcingaApplication object](12-icinga2-api.md#icinga2-api-config-objects) +called `app`. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/objects/icingaapplications/app' \ + -d '{ "attrs": { "enable_notifications": false } }' +``` + +### Unix Timestamp Handling + +If you don't want to write JSON manually, especially for adding the `start_time` +and `end_time` parameters, you can use [jo](https://github.com/jpmens/jo) to format this. + +```bash +jo -p pretty=true type=Service filter="service.name==\"ping4\"" author=icingaadmin comment="IPv4 network maintenance" fixed=true start_time=$(date +%s -d "+0 hour") end_time=$(date +%s -d "+1 hour") +``` + +```json +{ + "pretty": true, + "type": "Service", + "filter": "service.name==\"ping4\"", + "author": "icingaadmin", + "comment": "IPv4 network maintenance", + "fixed": true, + "start_time": 1557414097, + "end_time": 1557417697 +} +``` + +Now wrap this into the actual curl command: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/schedule-downtime' \ + -d "$(jo -p pretty=true type=Service filter="service.name==\"ping4\"" author=icingaadmin comment="IPv4 network maintanence" fixed=true start_time=$(date +%s -d "+0 hour") end_time=$(date +%s -d "+1 hour"))" +``` + +Note: This requires GNU date. On macOS, install `coreutils` from Homebrew and use `gdate`. + +### process-check-result + +Process a check result for a host or a service. + +Send a `POST` request to the URL endpoint `/v1/actions/process-check-result`. + + Parameter | Type | Description + ------------------ | -------------- | -------------- + exit\_status | Number | **Required.** For services: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN, for hosts: 0=UP, 1=DOWN. + plugin\_output | String | **Required.** One or more lines of the plugin main output. Does **not** contain the performance data. + performance\_data | Array|String | **Optional.** The performance data as array of strings. The raw performance data string can be used too. + check\_command | Array|String | **Optional.** The first entry should be the check commands path, then one entry for each command line option followed by an entry for each of its argument. Alternativly a single string can be used. + check\_source | String | **Optional.** Usually the name of the `command_endpoint` + execution\_start | Timestamp | **Optional.** The timestamp where a script/process started its execution. + execution\_end | Timestamp | **Optional.** The timestamp where a script/process ended its execution. This timestamp is used in features to determine e.g. the metric timestamp. + ttl | Number | **Optional.** Time-to-live duration in seconds for this check result. The next expected check result is `now + ttl` where freshness checks are executed. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +Example for the service `passive-ping`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/process-check-result' \ +-d '{ "type": "Service", "filter": "host.name==\"icinga2-master1.localdomain\" && service.name==\"passive-ping\"", "exit_status": 2, "plugin_output": "PING CRITICAL - Packet loss = 100%", "performance_data": [ "rta=5000.000000ms;3000.000000;5000.000000;0.000000", "pl=100%;80;100;0" ], "check_source": "example.localdomain", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully processed check result for object 'icinga2-master1.localdomain!passive-ping'." + } + ] +} +``` + +You can avoid URL encoding of white spaces in object names by using the `filter` attribute in the request body. + +Example for using the `Host` type and filter by the host name: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/process-check-result' \ + -d '{ "filter": "host.name==\"example.localdomain\"", "type": "Host", "exit_status": 1, "plugin_output": "Host is not available." }' +``` + + +> **Note** +> +> Multi-line plugin output requires the following format: The first line is treated as `short` plugin output corresponding +> to the first line of the plugin output. Subsequent lines are treated as `long` plugin output. Please note that the +> performance data is separated from the plugin output and has to be passed as `performance_data` attribute. + +### reschedule-check + +Reschedule a check for hosts and services. The check can be forced if required. + +Send a `POST` request to the URL endpoint `/v1/actions/reschedule-check`. + + Parameter | Type | Description + -------------|-----------|-------------- + next\_check | Timestamp | **Optional.** The next check will be run at this time. If omitted, the current time is used. + force | Boolean | **Optional.** Defaults to `false`. If enabled, the checks are executed regardless of time period restrictions and checks being disabled per object or on a global basis. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +The example reschedules all services with the name "ping6" to immediately perform a check +(`next_check` default), ignoring any time periods or whether active checks are +allowed for the service (`force=true`). + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/reschedule-check' \ + -d '{ "type": "Service", "filter": "service.name==\"ping6\"", "force": true, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully rescheduled check for object 'icinga2-master1.localdomain!ping6'." + } + ] +} +``` + +### send-custom-notification + +Send a custom notification for hosts and services. This notification +type can be forced being sent to all users. + +Send a `POST` request to the URL endpoint `/v1/actions/send-custom-notification`. + + Parameter | Type | Description + ----------|---------|-------------- + author | String | **Required.** Name of the author, may be empty. + comment | String | **Required.** Comment text, may be empty. + force | Boolean | **Optional.** Default: false. If true, the notification is sent regardless of downtimes or whether notifications are enabled or not. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +Example for a custom host notification announcing a global maintenance to +host owners: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/send-custom-notification' \ + -d '{ "type": "Host", "author": "icingaadmin", "comment": "System is going down for maintenance", "force": true, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully sent custom notification for object 'host0'." + }, + { + "code": 200.0, + "status": "Successfully sent custom notification for object 'host1'." + } + ] +} +``` + +### delay-notification + +Delay notifications for a host or a service. +Note that this will only have an effect if the service stays in the same problem +state that it is currently in. If the service changes to another state, a new +notification may go out before the time you specify in the `timestamp` argument. + +Send a `POST` request to the URL endpoint `/v1/actions/delay-notification`. + + Parameter | Type | Description + ----------|-----------|-------------- + timestamp | Timestamp | **Required.** Delay notifications until this timestamp. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/delay-notification' \ + -d '{ "type": "Service", "timestamp": 1446389894, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully delayed notifications for object 'host0!service0'." + }, + { + "code": 200.0, + "status": "Successfully delayed notifications for object 'host1!service1'." + } + ] +} +``` + +### acknowledge-problem + +Allows you to acknowledge the current problem for hosts or services. By +acknowledging the current problem, future notifications (for the same state if `sticky` is set to `false`) +are disabled. + +Send a `POST` request to the URL endpoint `/v1/actions/acknowledge-problem`. + + Parameter | Type | Description + ---------------------|-----------|-------------- + author | String | **Required.** Name of the author, may be empty. + comment | String | **Required.** Comment text, may be empty. + expiry | Timestamp | **Optional.** Whether the acknowledgement will be removed at the timestamp. + sticky | Boolean | **Optional.** Whether the acknowledgement will be set until the service or host fully recovers. Defaults to `false`. + notify | Boolean | **Optional.** Whether a notification of the `Acknowledgement` type will be sent. Defaults to `false`. + persistent | Boolean | **Optional.** When the comment is of type `Acknowledgement` and this is set to `true`, the comment will remain after the acknowledgement recovers or expires. Defaults to `false`. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +The following example acknowledges all services which are in a hard critical state and sends out +a notification for them: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/acknowledge-problem' \ + -d '{ "type": "Service", "filter": "service.state==2 && service.state_type==1", "author": "icingaadmin", "comment": "Global outage. Working on it.", "notify": true, "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully acknowledged problem for object 'icinga2-satellite1.localdomain!ping4'." + }, + { + "code": 200.0, + "status": "Successfully acknowledged problem for object 'icinga2-satellite2.localdomain!ping4'." + } + ] +} +``` + +### remove-acknowledgement + +Removes the acknowledgements for services or hosts. Once the acknowledgement has +been removed the next notification will be sent again. + +Send a `POST` request to the URL endpoint `/v1/actions/remove-acknowledgement`. + + Parameter | Type | Description + ----------|--------|-------------- + author | String | **Optional.** Name of the removal requestor. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +The example removes all service acknowledgements: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-acknowledgement' \ + -d '{ "type": "Service", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed acknowledgement for object 'host0!service0'." + }, + { + "code": 200.0, + "status": "Successfully removed acknowledgement for object 'example2.localdomain!aws-health'." + } + ] +} +``` + +### add-comment + +Adds a `comment` from an `author` to services or hosts. + +Send a `POST` request to the URL endpoint `/v1/actions/add-comment`. + + Parameter | Type | Description + ----------|-----------|-------------- + author | string | **Required.** Name of the author, may be empty. + comment | string | **Required.** Comment text, may be empty. + expiry | Timestamp | **Optional.** Comment expiry time. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +The following example adds a comment for all `ping4` services: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/add-comment' \ + -d '{ "type": "Service", "filter": "service.name==\"ping4\"", "author": "icingaadmin", "comment": "Troubleticket #123456789 opened.", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "legacy_id": 26.0, + "name": "icinga2-satellite1.localdomain!ping4!7e7861c8-8008-4e8d-9910-2a0bb26921bd", + "status": "Successfully added comment 'icinga2-satellite1.localdomain!ping4!7e7861c8-8008-4e8d-9910-2a0bb26921bd' for object 'icinga2-satellite1.localdomain!ping4'." + }, + { + "code": 200.0, + "legacy_id": 27.0, + "name": "icinga2-satellite2.localdomain!ping4!9a4c43f5-9407-a536-18bf-4a6cc4b73a9f", + "status": "Successfully added comment 'icinga2-satellite2.localdomain!ping4!9a4c43f5-9407-a536-18bf-4a6cc4b73a9f' for object 'icinga2-satellite2.localdomain!ping4'." + } + ] +} +``` + +### remove-comment + +Remove the comment using its `name` attribute , returns `OK` if the +comment did not exist. +**Note**: This is **not** the legacy ID but the comment name returned by +Icinga 2 when [adding a comment](12-icinga2-api.md#icinga2-api-actions-add-comment). + +Send a `POST` request to the URL endpoint `/v1/actions/remove-comment`. + + Parameter | Type | Description + ----------|--------|-------------- + author | String | **Optional.** Name of the removal requestor. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host`, `Service` and `Comment`. + +Example for a simple filter using the `comment` URL parameter: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-comment' \ + -d '{ "comment": "icinga2-satellite2.localdomain!ping4!9a4c43f5-9407-a536-18bf-4a6cc4b73a9f", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed comment 'icinga2-satellite2.localdomain!ping4!9a4c43f5-9407-a536-18bf-4a6cc4b73a9f'." + } + ] +} +``` + +Example for removing all service comments using a service name filter for `ping4`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-comment' + -d '{ "type": "Service", "filter": "service.name==\"ping4\"", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed all comments for object 'icinga2-satellite1.localdomain!ping4'." + }, + { + "code": 200.0, + "status": "Successfully removed all comments for object 'icinga2-satellite2.localdomain!ping4'." + } + ] +} +``` + +### schedule-downtime + +Schedule a downtime for hosts and services. + +Send a `POST` request to the URL endpoint `/v1/actions/schedule-downtime`. + + Parameter | Type | Description + --------------|-----------|-------------- + author | String | **Required.** Name of the author. + comment | String | **Required.** Comment text. + start\_time | Timestamp | **Required.** Timestamp marking the beginning of the downtime. + end\_time | Timestamp | **Required.** Timestamp marking the end of the downtime. + fixed | Boolean | **Optional.** Defaults to `true`. If true, the downtime is `fixed` otherwise `flexible`. See [downtimes](08-advanced-topics.md#downtimes) for more information. + duration | Number | **Required for flexible downtimes.** Duration of the downtime in seconds if `fixed` is set to false. + all\_services | Boolean | **Optional for host downtimes.** Sets downtime for [all services](12-icinga2-api.md#icinga2-api-actions-schedule-downtime-host-all-services) for the matched host objects. If `child_options` are set, all child hosts and their services will schedule a downtime too. Defaults to `false`. + trigger\_name | String | **Optional.** Sets the trigger for a triggered downtime. See [downtimes](08-advanced-topics.md#downtimes) for more information on triggered downtimes. + child\_options| String | **Optional.** Schedule child downtimes. `DowntimeNoChildren` does not do anything, `DowntimeTriggeredChildren` schedules child downtimes triggered by this downtime, `DowntimeNonTriggeredChildren` schedules non-triggered downtimes. Defaults to `DowntimeNoChildren`. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host` and `Service`. + +Example for scheduling a downtime for all `ping4` services: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/schedule-downtime' \ + -d '{ "type": "Service", "filter": "service.name==\"ping4\"", "start_time": 1446388806, "end_time": 1446389806, "duration": 1000, "author": "icingaadmin", "comment": "IPv4 network maintenance", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "legacy_id": 2.0, + "name": "icinga2-satellite1.localdomain!ping4!ecc5fa55-a5b8-4189-a013-a5d4bb47af34", + "status": "Successfully scheduled downtime 'icinga2-satellite1.localdomain!ping4!ecc5fa55-a5b8-4189-a013-a5d4bb47af34' for object 'icinga2-satellite1.localdomain!ping4'." + }, + { + "code": 200.0, + "legacy_id": 3.0, + "name": "icinga2-satellite2.localdomain!ping4!abc59032-4589-abcd-4567-ecf67856c347", + "status": "Successfully scheduled downtime 'icinga2-satellite2.localdomain!ping4!abc59032-4589-abcd-4567-ecf67856c347' for object 'icinga2-satellite2.localdomain!ping4'." + } + ] +} +``` + +In case you want to target just a single service on a host, modify the filter +like this: + +``` +"filter": "host.name==\"icinga2-satellite1.localdomain\" && service.name==\"ping4\"" +``` + +#### Schedule Host Downtime(s) with all Services + +Schedule a downtime for one (or multiple) hosts and all of their services. +Note the `all_services` attribute. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/schedule-downtime' \ + -d "$(jo -p pretty=true type=Host filter="match(\"*satellite*\", host.name)" all_services=true author=icingaadmin comment="Cluster upgrade maintenance" fixed=true start_time=$(date +%s -d "+0 hour") end_time=$(date +%s -d "+1 hour"))" +``` + +### remove-downtime + +Remove the downtime using its `name` attribute , returns `OK` if the +downtime did not exist. +**Note**: This is **not** the legacy ID but the downtime name returned by +Icinga 2 when [scheduling a downtime](12-icinga2-api.md#icinga2-api-actions-schedule-downtime). + +Send a `POST` request to the URL endpoint `/v1/actions/remove-downtime`. + + Parameter | Type | Description + ----------|--------|-------------- + author | String | **Optional.** Name of the removal requestor. + +In addition to these parameters a [filter](12-icinga2-api.md#icinga2-api-filters) must be provided. The valid types for this action are `Host`, `Service` and `Downtime`. + +When removing a host downtime, service downtimes on this host are automatically deleted if they were created using +the `all_services` option. Other downtimes created using the `child_options` option are not affected. + +Example for a simple filter using the `downtime` URL parameter: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-downtime' \ + -d '{ "downtime": "icinga2-satellite2.localdomain!ping4!abc59032-4589-abcd-4567-ecf67856c347", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed downtime 'icinga2-satellite2.localdomain!ping4!abc59032-4589-abcd-4567-ecf67856c347'." + } + ] +} +``` + +Example for removing all host downtimes using a host name filter for `icinga2-satellite2.localdomain`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-downtime' \ + -d '{ "type": "Host", "filter": "host.name==\"icinga2-satellite2.localdomain\"", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed all downtimes for object 'icinga2-satellite2.localdomain'." + } + ] +} +``` + +Example for removing a downtime from a host but not the services filtered by the author name. This example uses +filter variables explained in the [advanced filters](12-icinga2-api.md#icinga2-api-advanced-filters) chapter. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/remove-downtime' \ + -d $'{ + "type": "Downtime", + "filter": "host.name == filterHost && !service && downtime.author == filterAuthor", + "filter_vars": { + "filterHost": "icinga2-satellite1.localdomain", + "filterAuthor": "icingaadmin" + }, + "pretty": true +}' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Successfully removed downtime 'icinga2-satellite1.localdomain!ecc5fa55-a5b8-ef34-abcd-a5d41234af34'." + } + ] +} +``` + +### shutdown-process + +Shuts down Icinga. May or may not return. + +Send a `POST` request to the URL endpoint `/v1/actions/shutdown-process`. + +This action does not support a target type or filter. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/shutdown-process?pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Shutting down Icinga 2." + } + ] +} +``` + +### restart-process + +Restarts Icinga. May or may not return. + +Send a `POST` request to the URL endpoint `/v1/actions/restart-process`. + +This action does not support a target type or filter. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/restart-process?pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Restarting Icinga 2." + } + ] +} +``` + +### generate-ticket + +Generates a PKI ticket for [CSR auto-signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing). +This can be used in combination with satellite/client setups requesting this ticket number. + +> **Note** +> +> This must be used on the local host, or e.g. by a Puppet master. +> Doing so remotely may result in security issues with cluster +> trust relationships. + +Send a `POST` request to the URL endpoint `/v1/actions/generate-ticket`. + + Parameter | Type | Description + --------------|-----------|-------------- + cn | String | **Required.** The host's common name for which the ticket should be generated. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/generate-ticket' \ + -d '{ "cn": "icinga2-agent1.localdomain", "pretty": true }' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Generated PKI ticket '4f75d2ecd253575fe9180938ebff7cbca262f96e' for common name 'icinga2-agent1.localdomain'.", + "ticket": "4f75d2ecd253575fe9180938ebff7cbca262f96e" + } + ] +} +``` + +### execute-command + +Executes a particular check/notification/event-command on a particular +endpoint in the context of a particular checkable. Example use cases: + +* Test a check command without actually triggering notifications +* Reboot a node via an event command +* Test a notification command without actually reproducing the notification reason + +Send a `POST` request to the URL endpoint `/v1/actions/execute-command`. + + Parameter | Type | Description + --------------|------------|-------------- + ttl | Number | **Required.** The time to live of the execution expressed in seconds. + command_type | String | **Optional.** The command type: `CheckCommand` or `EventCommand` or `NotificationCommand`. Default: `EventCommand` + command | String | **Optional.** The command to execute. Its type must the same as `command_type`. It can be a macro string. Default: depending on the `command_type` it's either `$check_command$`, `$event_command$` or `$notification_command$` + endpoint | String | **Optional.** The endpoint to execute the command on. It can be a macro string. Default: `$command_endpoint$`. + macros | Dictionary | **Optional.** Macro overrides. Default: `{}` + user | String | **Optional.** The user used for the notification command. + notification | String | **Optional.** The notification used for the notification command. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/actions/execute-command' \ + -d '{"type": "Service", "service": "agent!custom_service", "ttl": 15, "macros": { "command_endpoint": "master", "ls_dir": "/tmp/foo" }, "command": "custom_command", "command_type": "CheckCommand" }' +``` + +```json +{ + "results": [ + { + "checkable": "agent!custom_service", + "code": 202.0, + "execution": "3541d906-9afe-4c0e-ae6d-f549ee9bb3e7", + "status": "Accepted" + } + ] +} +``` + +You may poll the state of the execution by [querying](#icinga2-api-config-objects-query) the checkable's attribute `executions`. + +## Event Streams + +Event streams can be used to receive check results, downtimes, comments, +acknowledgements, etc. as a "live stream" from Icinga. + +You can for example forward these types into your own backend. Process the +metrics and correlate them with notifications and state changes e.g. in Elasticsearch +with the help of [Icingabeat](https://icinga.com/docs/icingabeat/latest/). Another use +case are aligned events and creating/resolving tickets automatically in your ticket system. + +You can subscribe to event streams by sending a `POST` request to the URL endpoint `/v1/events`. +The following parameters need to be specified (either as URL parameters or in a JSON-encoded message body): + + Parameter | Type | Description + -----------|--------------|------------- + types | Array | **Required.** Event type(s). Multiple types as URL parameters are supported. + queue | String | **Required.** Unique queue name. Multiple HTTP clients can use the same queue as long as they use the same event types and filter. + filter | String | **Optional.** Filter for specific event attributes using [filter expressions](12-icinga2-api.md#icinga2-api-filters). + +### Event Stream Types + +The following event stream types are available: + + Type | Description + -----------------------|-------------- + CheckResult | Check results for hosts and services. + StateChange | Host/service state changes. + Notification | Notification events including notified users for hosts and services. + AcknowledgementSet | Acknowledgement set on hosts and services. + AcknowledgementCleared | Acknowledgement cleared on hosts and services. + CommentAdded | Comment added for hosts and services. + CommentRemoved | Comment removed for hosts and services. + DowntimeAdded | Downtime added for hosts and services. + DowntimeRemoved | Downtime removed for hosts and services. + DowntimeStarted | Downtime started for hosts and services. + DowntimeTriggered | Downtime triggered for hosts and services. + ObjectCreated | Object created for all Icinga 2 objects. + ObjectDeleted | Object deleted for all Icinga 2 objects. + ObjectModified | Object modified for all Icinga 2 objects. + +Note: Each type requires [API permissions](12-icinga2-api.md#icinga2-api-permissions) +being set. + +Example for all downtime events: + +``` +&types=DowntimeAdded&types=DowntimeRemoved&types=DowntimeTriggered + +-d '{ "types": ["DowntimeAdded", "DowntimeRemoved", "DowntimeTriggered"] }' +``` + +Example for all object events: + +``` +&types=ObjectCreated&types=ObjectDeleted&types=ObjectModified + +-d '{ "types": ["ObjectCreated", "ObjectDeleted", "ObjectModified"] }' +``` + +#### Event Stream Type: CheckResult + + Name | Type | Description + -----------------|---------------|-------------------------- + type | String | Event type `CheckResult`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host check result. + check\_result | CheckResult | Serialized [CheckResult](08-advanced-topics.md#advanced-value-types-checkresult) value type. + downtime\_depth | Number | Amount of active downtimes on the checkable. + acknowledgement | Boolean | Whether the object is acknowledged. + +#### Event Stream Type: StateChange + + Name | Type | Description + -----------------|---------------|-------------------------- + type | String | Event type `StateChange`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host state change. + state | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state. + state\_type | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state type. + check\_result | CheckResult | Serialized [CheckResult](08-advanced-topics.md#advanced-value-types-checkresult) value type. + downtime\_depth | Number | Amount of active downtimes on the checkable. + acknowledgement | Boolean | Whether the object is acknowledged. + +#### Event Stream Type: Notification + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `Notification`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host notification. + command | String | [NotificationCommand](09-object-types.md#objecttype-notificationcommand) name. + users | Array | List of notified [user](09-object-types.md#objecttype-user) names. + notification\_type | String | [$notification.type$](03-monitoring-basics.md#notification-runtime-macros) runtime macro value. + author | String | [$notification.author$](03-monitoring-basics.md#notification-runtime-macros) runtime macro value. + text | String | [$notification.comment$](03-monitoring-basics.md#notification-runtime-macros) runtime macro value. + check\_result | CheckResult | Serialized [CheckResult](08-advanced-topics.md#advanced-value-types-checkresult) value type. + +#### Event Stream Type: Flapping + + Name | Type | Description + ------------------|---------------|-------------------------- + type | String | Event type `Flapping`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host flapping event. + state | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state. + state\_type | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state type. + is\_flapping | Boolean | Whether this object is flapping. + current\_flapping | Number | Current flapping value in percent (added in 2.8). + threshold\_low | Number | Low threshold in percent (added in 2.8). + threshold\_high | Number | High threshold in percent (added in 2.8). + +#### Event Stream Type: AcknowledgementSet + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `AcknowledgementSet`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host acknowledgement. + state | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state. + state\_type | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state type. + author | String | Acknowledgement author set via [acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) action. + comment | String | Acknowledgement comment set via [acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) action. + acknowledgement\_type | Number | 0 = None, 1 = Normal, 2 = Sticky. `sticky` can be set via [acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) action. + notify | Boolean | Notifications were enabled via [acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) action. + expiry | Timestamp | Acknowledgement expire time set via [acknowledge-problem](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) action. + +#### Event Stream Type: AcknowledgementCleared + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `AcknowledgementCleared`. + timestamp | Timestamp | Unix timestamp when the event happened. + host | String | [Host](09-object-types.md#objecttype-host) name. + service | String | [Service](09-object-types.md#objecttype-service) name. Optional if this is a host acknowledgement. + state | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state. + state\_type | Number | [Host](09-object-types.md#objecttype-host) or [service](09-object-types.md#objecttype-service) state type. + +#### Event Stream Type: CommentAdded + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `CommentAdded`. + timestamp | Timestamp | Unix timestamp when the event happened. + comment | Dictionary | Serialized [Comment](09-object-types.md#objecttype-comment) object. + +#### Event Stream Type: CommentRemoved + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `CommentRemoved`. + timestamp | Timestamp | Unix timestamp when the event happened. + comment | Dictionary | Serialized [Comment](09-object-types.md#objecttype-comment) object. + +#### Event Stream Type: DowntimeAdded + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `DowntimeAdded`. + timestamp | Timestamp | Unix timestamp when the event happened. + downtime | Dictionary | Serialized [Comment](09-object-types.md#objecttype-downtime) object. + +#### Event Stream Type: DowntimeRemoved + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `DowntimeRemoved`. + timestamp | Timestamp | Unix timestamp when the event happened. + downtime | Dictionary | Serialized [Comment](09-object-types.md#objecttype-downtime) object. + + +#### Event Stream Type: DowntimeStarted + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `DowntimeStarted`. + timestamp | Timestamp | Unix timestamp when the event happened. + downtime | Dictionary | Serialized [Comment](09-object-types.md#objecttype-downtime) object. + + +#### Event Stream Type: DowntimeTriggered + + Name | Type | Description + --------------|---------------|-------------------------- + type | String | Event type `DowntimeTriggered`. + timestamp | Timestamp | Unix timestamp when the event happened. + downtime | Dictionary | Serialized [Comment](09-object-types.md#objecttype-downtime) object. + + +### Event Stream Filter + +Event streams can be filtered by attributes using the prefix `event.`. + +Example for the `CheckResult` type with the `exit_code` set to `2`: + +``` +&types=CheckResult&filter=event.check_result.exit_status==2 + +-d '{ "types": [ "CheckResult" ], "filter": "event.check_result.exit_status==2" }' +``` + +Example for the `CheckResult` type with the service [matching](18-library-reference.md#global-functions-match) +the string pattern "random\*": + +``` +&types=CheckResult&filter=match%28%22random*%22,event.service%29 + +-d { "types": [ "CheckResult" ], "filter": "match(\"random*\", event.service)" } +``` + +### Event Stream Response + +The event stream response is separated with new lines. The HTTP client +must support long-polling and HTTP/1.1. HTTP/1.0 is not supported. + +Example: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/events' \ + -d '{ "queue": "myqueue", "types": [ "CheckResult" ], "filter": "event.check_result.exit_status==2" }' +``` + +``` +{"check_result":{ ... },"host":"example.localdomain","service":"ping4","timestamp":1445421319.7226390839,"type":"CheckResult"} +{"check_result":{ ... },"host":"example.localdomain","service":"ping4","timestamp":1445421324.7226390839,"type":"CheckResult"} +{"check_result":{ ... },"host":"example.localdomain","service":"ping4","timestamp":1445421329.7226390839,"type":"CheckResult"} +``` + +## Status and Statistics + +Send a `GET` request to the URL endpoint `/v1/status` to retrieve status information and statistics for Icinga 2. + +Example: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/status?pretty=1' +``` + +``` +{ + "results": [ + { + "name": "ApiListener", + "perfdata": [ ... ], + "status": [ ... ] + }, + ... + { + "name": "IcingaAplication", + "perfdata": [ ... ], + "status": [ ... ] + }, + ... + ] +} +``` + +You can limit the output by specifying a status type in the URL, e.g. `IcingaApplication`: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/status/IcingaApplication?pretty=1' +``` + +```json +{ + "results": [ + { + "perfdata": [], + "status": { + "icingaapplication": { + "app": { + "enable_event_handlers": true, + "enable_flapping": true, + "enable_host_checks": true, + "enable_notifications": true, + "enable_perfdata": true, + "enable_service_checks": true, + "node_name": "example.localdomain", + "pid": 59819.0, + "program_start": 1443019345.093372, + "version": "v2.3.0-573-g380a131" + } + } + } + } + ] +} +``` + +## Configuration Management + +The main idea behind configuration management is that external applications +can create configuration packages and stages based on configuration files and +directory trees. This replaces any additional SSH connection and whatnot to +dump configuration files to Icinga 2 directly. + +In case you are pushing a new configuration stage to a package, Icinga 2 will +validate the configuration asynchronously and populate a status log which +can be fetched in a separated request. Once the validation succeeds, +a reload is triggered by default. + +This functionality was primarly developed for the [Icinga Director](https://icinga.com/docs/director/latest/) +but can be used with your own deployments too. It also solves the problem +with certain runtime objects (zones, endpoints) and can be used to +deploy global templates in [global cluster zones](06-distributed-monitoring.md#distributed-monitoring-global-zone-config-sync). + + +### Create a Config Package + +Send a `POST` request to a new config package called `example-cmdb` in this example. This +creates a new empty configuration package. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ +-X POST 'https://localhost:5665/v1/config/packages/example-cmdb?pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "package": "example-cmdb", + "status": "Created package." + } + ] +} +``` + +Package names with the `_` prefix are reserved for internal packages and must not be used. +You can recognize `_api`, `_etc` and `_cluster` when querying specific objects and packages. + +Each configuration object stores the package source in the `package` attribute. + +### Create a Stage: Upload Configuration + +Configuration files in packages are managed in stages. Stages provide a way +to maintain multiple configuration versions for a package. Once a new stage +is deployed, the content is validated and set as active stage on success. + +On failure, the older stage remains active, and the caller can fetch the `startup.log` +from this stage deployment attempt to see what exactly failed. You can see that +in the Director's deployment log. + +Send a `POST` request to the URL endpoint `/v1/config/stages` and add the name of an existing +configuration package to the URL path (e.g. `example-cmdb`). +The request body must contain the `files` attribute with the value being +a dictionary of file targets and their content. + +Optional attributes include `reload` (defaults to `true`) and `activate` (defaults to `true`). +The `reload` attribute will tell icinga2 to reload after stage config validation. +The `activate` attribute will tell icinga2 to activate the stage if it validates. +If `activate` is set to `false`, `reload` must also be `false`. + +The file path requires one of these two directories inside its path: + + Directory | Description + ------------|------------------------------------ + conf.d | Local configuration directory. + zones.d | Configuration directory for cluster zones, each zone must be put into its own zone directory underneath. Supports the [cluster config sync](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync). + +Example for a local configuration in the `conf.d` directory: + +``` +"files": { "conf.d/host1.conf": "object Host \"local-host\" { address = \"127.0.0.1\", check_command = \"hostalive\" }" } +``` + +Example for a host configuration inside the `satellite` zone in the `zones.d` directory: + +``` +"files": { "zones.d/satellite/host2.conf": "object Host \"satellite-host\" { address = \"192.168.1.100\", check_command = \"hostalive\" }" } +``` + + +The example below will create a new file called `test.conf` in the `conf.d` +directory. Note: This example contains an error (`chec_command`). This is +intentional. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' -X POST \ +-d '{ "files": { "conf.d/test.conf": "object Host \"cmdb-host\" { chec_command = \"dummy\" }" }, "pretty": true }' \ +'https://localhost:5665/v1/config/stages/example-cmdb' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "package": "example-cmdb", + "stage": "7e7861c8-8008-4e8d-9910-2a0bb26921bd", + "status": "Created stage. Reload triggered." + } + ] +} +``` + +The Icinga 2 API returns the `package` name this stage was created for, and also +generates a unique name for the `stage` attribute you'll need for later requests. + +Icinga 2 automatically restarts the daemon in order to activate the new config stage. This +can be disabled by setting `reload` to `false` in the request. +If the validation for the new config stage failed, the old stage +and its configuration objects will remain active. + +Activation may be inhibited even for stages that validate correctly by setting +`activate` to `false`. This may be useful for validating the contents of a stage +without making it active, for example in a CI (continuous integration) system. + +> **Note** +> +> Old stages are not purged automatically. You can [remove stages](12-icinga2-api.md#icinga2-api-config-management-delete-config-stage) that are no longer in use. + +Icinga 2 creates the following files in the configuration package +stage after configuration validation: + + File | Description + ------------|-------------- + status | Contains the [configuration validation](11-cli-commands.md#config-validation) exit code (everything else than 0 indicates an error). + startup.log | Contains the [configuration validation](11-cli-commands.md#config-validation) output. + +You can [fetch these files](12-icinga2-api.md#icinga2-api-config-management-fetch-config-package-stage-files) +in order to verify that the new configuration was deployed successfully. Please follow the chapter below +to learn more about this. + + +### List Configuration Packages and their Stages + +A list of packages and their stages can be retrieved by sending a `GET` request to the URL endpoint `/v1/config/packages`. + +The following example contains one configuration package `example-cmdb`. The package does not currently +have an active stage. + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/config/packages?pretty=1' +``` + +```json +{ + "results": [ + { + "active-stage": "", + "name": "example-cmdb", + "stages": [ + "7e7861c8-8008-4e8d-9910-2a0bb26921bd" + ] + } + ] +} +``` + +### List Configuration Package Stage Files + +In order to retrieve a list of files for a stage you can send a `GET` request to +the URL endpoint `/v1/config/stages`. You need to include +the package name (`example-cmdb`) and stage name (`7e7861c8-8008-4e8d-9910-2a0bb26921bd`) in the URL: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/config/stages/example-cmdb/7e7861c8-8008-4e8d-9910-2a0bb26921bd?pretty=1' +``` + +``` +{ + "results": [ +... + { + "name": "startup.log", + "type": "file" + }, + { + "name": "status", + "type": "file" + }, + { + "name": "conf.d", + "type": "directory" + }, + { + "name": "zones.d", + "type": "directory" + }, + { + "name": "conf.d/test.conf", + "type": "file" + } + ] +} +``` + +### Fetch Configuration Package Stage Files + +Send a `GET` request to the URL endpoint `/v1/config/files` and add +the package name, the stage name and the relative path to the file to the URL path. + +> **Note** +> +> The returned files are plain-text instead of JSON-encoded. + +The following example fetches the configuration file `conf.d/test.conf`: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/config/files/example-cmdb/7e7861c8-8008-4e8d-9910-2a0bb26921bd/conf.d/test.conf' +``` + +``` +object Host "cmdb-host" { chec_command = "dummy" } +``` + +You can fetch a [list of existing files](12-icinga2-api.md#icinga2-api-config-management-list-config-package-stage-files) +in a configuration stage and then specifically request their content. + +### Configuration Package Stage Errors + +Now that we don't have an active stage for `example-cmdb` yet seen [here](12-icinga2-api.md#icinga2-api-config-management-list-config-packages), +there must have been an error. + +In order to check for validation errors you can fetch the `startup.log` file +by sending a `GET` request to the URL endpoint `/v1/config/files`. You must include +the package name, stage name and the `startup.log` in the URL path. + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/config/files/example-cmdb/7e7861c8-8008-4e8d-9910-2a0bb26921bd/startup.log' +``` + +``` +[...] +critical/config: Error: Attribute 'chec_command' does not exist. +Location: +/var/lib/icinga2/api/packages/example-cmdb/7e7861c8-8008-4e8d-9910-2a0bb26921bd/conf.d/test.conf(1): object Host "cmdb-host" { chec_command = "dummy" } + ^^^^^^^^^^^^^^^^^^^^^^ + +critical/config: 1 error +``` + +The output is the exact as known from [configuration validation](11-cli-commands.md#config-validation). + +> **Note** +> +> The returned output is plain-text instead of JSON-encoded. + + +### Deleting Configuration Package Stage + +You can send a `DELETE` request to the URL endpoint `/v1/config/stages` +in order to purge a configuration stage. You must include the package and +stage name inside the URL path. + +The following example removes the failed configuration stage `7e7861c8-8008-4e8d-9910-2a0bb26921bd` +in the `example-cmdb` configuration package: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X DELETE 'https://localhost:5665/v1/config/stages/example-cmdb/7e7861c8-8008-4e8d-9910-2a0bb26921bd?pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Stage deleted." + } + ] +} +``` + +### Deleting Configuration Package + +In order to completely purge a configuration package and its stages +you can send a `DELETE` request to the URL endpoint `/v1/config/packages` +with the package name in the URL path. + +This example entirely deletes the configuration package `example-cmdb`: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' -X DELETE \ +'https://localhost:5665/v1/config/packages/example-cmdb?pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "package": "example-cmdb", + "status": "Deleted package." + } + ] +} +``` + +## Types + +You can retrieve the configuration object types by sending a `GET` request to URL +endpoint `/v1/types`. + +Each response entry in the results array contains the following attributes: + + Attribute | Type | Description + ----------------|--------------|--------------------- + name | String | The type name. + plural\_name | String | The plural type name. + fields | Dictionary | Available fields including details on e.g. the type and attribute accessibility. + abstract | Boolean | Whether objects can be instantiated for this type. + base | Boolean | The base type (e.g. `Service` inherits fields and prototype methods from `Checkable`). + prototype\_keys | Array | Available prototype methods. + +In order to view a specific configuration object type specify its name inside the URL path: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/types/Object?pretty=1' +``` + +```json +{ + "results": [ + { + "abstract": false, + "fields": { + "type": { + "array_rank": 0.0, + "attributes": { + "config": false, + "navigation": false, + "no_user_modify": false, + "no_user_view": false, + "required": false, + "state": false + }, + "id": 0.0, + "type": "String" + } + }, + "name": "Object", + "plural_name": "Objects", + "prototype_keys": [ + "clone", + "notify_attribute", + "to_string" + ] + } + ] +} +``` + +## Config Templates + +Provides methods to manage configuration templates: + +* [querying templates](12-icinga2-api.md#icinga2-api-config-templates-query) + +Creation, modification and deletion of templates at runtime is not supported. + +### Querying Templates + +You can request information about configuration templates by sending +a `GET` query to the `/v1/templates/` URL endpoint. ` + +Provides methods to manage global variables: + +* [querying variables](12-icinga2-api.md#icinga2-api-variables-query) + +### Querying Variables + +You can request information about global variables by sending +a `GET` query to the `/v1/variables/` URL endpoint: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/variables' +``` + +A [filter](12-icinga2-api.md#icinga2-api-filters) may be provided for this query type. The +variable information object can be accessed in the filter using the `variable` variable. +The `filter` attribute is passed inside the request body thus requiring to use [X-HTTP-Method-Override](12-icinga2-api.md#icinga2-api-requests-method-override) +here. + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -H 'X-HTTP-Method-Override: GET' -X POST \ + 'https://localhost:5661/v1/variables' \ + -d '{ "filter": "variable.type in [ \"String\", \"Number\" ]" }' +``` + +Instead of using a filter you can optionally specify the variable name in the +URL path when querying a single variable: + +```bash +curl -k -s -S -i -u root:icinga 'https://localhost:5665/v1/variables/PrefixDir' +``` + +The result set contains the type, name and value of the global variable. + +## Debug Console + +You can inspect variables and execute other expressions by sending a `POST` request to the URL endpoint `/v1/console/execute-script`. +In order to receive auto-completion suggestions, send a `POST` request to the URL endpoint `/v1/console/auto-complete-script`. + +> **Note** +> +> This functionality is used by the [debug console](11-cli-commands.md#cli-command-console). Do not use this in production, unless +> you are aware of the fact that expressions and commands may crash the daemon, or lead into +> unwanted behaviour. Use this URL endpoint **read-only** when needed. + +The following parameters need to be specified (either as URL parameters or in a JSON-encoded message body): + + Parameter | Type | Description + -----------|--------------|------------- + session | String | **Optional.** The session ID. Ideally this should be a GUID or some other unique identifier. + command | String | **Required.** Command expression for execution or auto-completion. + sandboxed | Number | **Optional.** Whether runtime changes are allowed or forbidden. Defaults to disabled. + +The [API permission](12-icinga2-api.md#icinga2-api-permissions) `console` is required for executing +expressions. + +> **Note** +> +> Runtime modifications via `execute-script` calls are not validated and might cause the Icinga 2 +> daemon to crash or behave in an unexpected way. Use these runtime changes at your own risk. + +If you specify a session identifier, the same script context can be reused for multiple requests. This allows you to, for example, set a local variable in a request and use that local variable in another request. Sessions automatically expire after a set period of inactivity (currently 30 minutes). + +Example for fetching the command line from the local host's last check result: + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/console/execute-script?command=get_host(NodeName).last_check_result.command&sandboxed=0&session=bb75fd7c-c686-407d-9688-582c04227756&pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "result": [ + "/usr/local/sbin/check_ping", + "-H", + "127.0.0.1", + "-c", + "5000,100%", + "-w", + "3000,80%" + ], + "status": "Executed successfully." + } + ] +} +``` + +Example for fetching auto-completion suggestions for the `Host.` type. This works in a +similar fashion when pressing TAB inside the [console CLI command](11-cli-commands.md#cli-command-console): + +```bash +curl -k -s -S -i -u root:icinga -H 'Accept: application/json' \ + -X POST 'https://localhost:5665/v1/console/auto-complete-script?command=Host.&sandboxed=0&session=bb75fd7c-c686-407d-9688-582c04227756&pretty=1' +``` + +```json +{ + "results": [ + { + "code": 200.0, + "status": "Auto-completed successfully.", + "suggestions": [ + "Host.type", + "Host.name", + "Host.prototype", + "Host.base", + "Host.register_attribute_handler", + "Host.clone", + "Host.notify_attribute", + "Host.to_string" + ] + } + ] +} +``` + +## API Clients + +After its initial release in 2015, community members +and developers have been working hard to add more REST API +clients and integrations into DevOps tools. + +* [Libraries](12-icinga2-api.md#icinga2-api-clients-libraries) +* [Status](12-icinga2-api.md#icinga2-api-clients-status) +* [Management](12-icinga2-api.md#icinga2-api-clients-management) +* [Event Streams](12-icinga2-api.md#icinga2-api-clients-event-streams) +* [Actions](12-icinga2-api.md#icinga2-api-clients-actions) +* [REST API Apps](12-icinga2-api.md#icinga2-api-clients-apps) + +Additional [programmatic examples](12-icinga2-api.md#icinga2-api-clients-programmatic-examples) +will help you getting started using the Icinga 2 API in your environment. + +### Libraries + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +[ruby-icinga2](https://github.com/bodsch/ruby-icinga2) | Ruby | Ruby library +[python-icinga2_api](https://github.com/KevinHonka/Icinga2_Python_API) | Python | Python library +[python-icinga2-api](https://github.com/fmnisme/python-icinga2api) | Python | Python bindings for Icinga 2 interaction +[python-icinga2-api-continued](https://github.com/joni1993/icinga2apic) | Python | Python bindings for Icinga 2 interaction forked and continued from fmnisme's python binding +[go-icinga2](https://github.com/xert/go-icinga2) | Golang | Golang functions and type definitions +[go-icinga2-api](https://github.com/lrsmith/go-icinga2-api/) | Golang | Golang implementation used inside the Terraform provider +[go-icinga2-client](https://github.com/Nexinto/go-icinga2-client) | Golang | Golang implementation for the Rancher integration. +[Monitoring::Icinga2::Client::REST](https://metacpan.org/release/THESEAL/Monitoring-Icinga2-Client-REST-2.0.0) | Perl | Perl bindings. +[Icinga 2 API in PHP](https://github.com/uniwue-rz/icinga2-api) | PHP | PHP implementation. For other examples, look into Icinga Web 2 and Director. + +### Status + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +[Dashing](https://github.com/dnsmichi/dashing-icinga2) | Ruby, HTML | Dashboard for Dashing querying the REST API for current host/service/global status +[InfluxDB Telegraf Input](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/icinga2/README.md) | Golang | [Telegraf](https://github.com/influxdata/telegraf) is an agent written in Go for collecting, processing, aggregating, and writing metrics. +[Icinga Slack Bot](https://github.com/bb-Ricardo/icinga-slack-bot) | Python | It can be used to interact with Icinga2 from your Slack client. It uses the Icinga2 API to get Host/Service status details. Simple status filters can be used to narrow down the returned status list. +[Icinga 2 Slack Bot](https://github.com/mlabouardy/icinga2-slack-bot) | Golang | Query host/service details from a [Slack](https://slack.com/) channel +[icinga2bot](https://github.com/reikoNeko/icinga2bot) | Python | [Errbot](https://errbot.io/en/latest/user_guide/setup.html) plugin to fetch status and event stream information and forward to XMPP, IRC, etc. +[IcingaBusyLightAgent](https://github.com/stdevel/IcingaBusylightAgent) | C# | Notification Agent in Systray +[BitBar for OSX](https://getbitbar.com/plugins/Dev/Icinga2/icinga2.24m.py) | Python | macOS tray app for highlighting the host/service status +[Icinga 2 Multistatus](https://chrome.google.com/webstore/detail/icinga-multi-status/khabbhcojgkibdeipanmiphceeoiijal/related) | - | Chrome Extension +[Naglite4](https://github.com/wftech/icinga2-naglite4) | Python | Naglite3 rewrite using the Icinga 2 REST API. +[icinga-telegram-bot](https://github.com/joni1993/icinga-telegram-bot) | Python | Telegram Bot using the Icinga 2 REST API + +### Manage Objects + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +[Icinga Director](https://icinga.com/docs/director/latest) | PHP, JS | Icinga 2 configuration interface with a nice frontend, and automated imports for nearly any source. +[Terraform Provider](https://github.com/terraform-providers/terraform-provider-icinga2) | Golang | Register hosts from Terraform in Icinga 2. [Official docs](https://www.terraform.io/docs/providers/icinga2/index.html). +[Kube Icinga](https://github.com/gyselroth/kube-icinga) | Typescript | Monitor Kubernetes services / resources using icinga2 (including autodiscovery support) +[Logstash output for Icinga](https://www.icinga.com/products/integrations/elastic/) | Ruby | Forward check results and create objects from log events +[Foreman Smart Proxy Monitoring](https://github.com/theforeman/smart_proxy_monitoring) | Ruby | Smart Proxy extension for Foreman creating and deleting hosts and services in Icinga 2 +[Rancher integration](https://github.com/Nexinto/rancher-icinga) | Golang | Registers [Rancher](https://rancher.com) resources in Icinga 2 for monitoring. +[AWS/EC2](https://github.com/Icinga/icinga2-api-examples/tree/master/aws-ec2) | Ruby | Example script for creating and deleting AWS instances in Icinga 2 +[Ansible Host Module](https://docs.ansible.com/ansible/latest/modules/icinga2_host_module.html) | Python | In progress, [Ansible Feature](https://docs.ansible.com/ansible/latest/modules/icinga2_feature_module.html#icinga2-feature-module) is also there. +[gocinga](https://gitlab.com/sambadevi/gocinga) | Golang | CLI Tool for Icinga, written in go + +### Event Streams + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +[Elastic Icingabeat](https://icinga.com/docs/icingabeat/latest/) | Golang | Process events and send to Elasticsearch/Logstash outputs +[Request Tracker ticket integration](https://github.com/bytemine/icinga2rt) | Golang | Create and update RT tickets +[Logstash input event stream](https://github.com/bobapple/logstash-input-icinga_eventstream) | Ruby | Forward events as Logstash input +[Flapjack events](https://github.com/sol1/flapjack-icinga2) | Golang | Dumping events into Redis for Flapjack processing +[Stackstorm integration](https://github.com/StackStorm-Exchange/stackstorm-icinga2) | Python | Processing events and fetching status information +[NodeJS consumer](https://community.icinga.com/t/consume-api-event-stream/1010/6) | NodeJS | Example from our community :) + +### Actions + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +[Icinga Web 2](https://icinga.com/docs/icingaweb2/latest/) | PHP | Trigger actions via command transport +[Logstash output for Icinga](https://www.icinga.com/products/integrations/elastic/) | Ruby | Forward check results and create objects from log events +[OTRS SystemMonitoring](https://github.com/OTRS/SystemMonitoring) | Perl | Acknowledge problems in Icinga 2 from OTRS tickets +[mqttwarn](https://github.com/jpmens/mqttwarn#icinga2) | Python | Forward check results from mqttwarn to Icinga 2 +[Lita handler](https://github.com/tuxmea/lita-icinga2) | Ruby | List, recheck and acknowledge through a #chatops bot called [Lita](https://github.com/litaio/lita) +[Sakuli forwarder](http://sakuli.readthedocs.io/en/latest/forwarder-icinga2api/) | Java | Forward check results from tests from [Sakuli](https://github.com/ConSol/sakuli) to Icinga 2 +[OpsGenie actions](https://www.opsgenie.com/docs/integrations/icinga2-integration) | Golang, Java | Integrate Icinga 2 into OpsGenie + + +### REST API Apps + +Name | Language | Description +------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------- +Browser plugins | - | [Postman for Chrome](https://www.getpostman.com), [RESTED for Firefox](https://addons.mozilla.org/en-US/firefox/addon/rested/) +[Postman](https://www.getpostman.com/) | - | App instead of browser plugin +[Cocoa Rest Client](https://mmattozzi.github.io/cocoa-rest-client/) | - | macOS app +[Paw for MacOS](https://paw.cloud) | (exported) | Paw is a full-featured HTTP client that lets you test and describe the APIs you build or consume. It has a beautiful native macOS interface to compose requests, inspect server responses, generate client code and export API definitions. + + +### Programmatic Examples + +The following languages are covered: + +* [Python](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-python) +* [Ruby](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-ruby) +* [PHP](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-php) +* [Perl](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-perl) +* [Golang](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-golang) +* [Powershell](12-icinga2-api.md#icinga2-api-clients-programmatic-examples-powershell) + +The [request method](icinga2-api-requests) is `POST` using [X-HTTP-Method-Override: GET](12-icinga2-api.md#icinga2-api-requests-method-override) +which allows you to send a JSON request body. The examples request specific service +attributes joined with host attributes. `attrs` and `joins` are therefore specified +as array. +The `filter` attribute [matches](18-library-reference.md#global-functions-match) +on all services with `ping` in their name. + +#### Example API Client in Python + +The following example uses **Python** and the `requests` and `json` module: + +``` +# pip install requests +# pip install json + +$ vim icinga.py + +#!/usr/bin/env python + +import requests, json + +# Replace 'localhost' with your FQDN and certificate CN +# for TLS verification +request_url = "https://localhost:5665/v1/objects/services" +headers = { + 'Accept': 'application/json', + 'X-HTTP-Method-Override': 'GET' + } +data = { + "attrs": [ "name", "state", "last_check_result" ], + "joins": [ "host.name", "host.state", "host.last_check_result" ], + "filter": "match(\"ping*\", service.name)", +} + +r = requests.post(request_url, + headers=headers, + auth=('root', 'icinga'), + data=json.dumps(data), + verify="pki/icinga2-ca.crt") + +print "Request URL: " + str(r.url) +print "Status code: " + str(r.status_code) + +if (r.status_code == 200): + print "Result: " + json.dumps(r.json()) +else: + print r.text + r.raise_for_status() + +$ python icinga.py +``` + +#### Example API Client in Ruby + +The following example uses **Ruby** and the `rest_client` gem: + +``` +# gem install rest_client + +$ vim icinga.rb + +#!/usr/bin/ruby + +require 'rest_client' + +# Replace 'localhost' with your FQDN and certificate CN +# for TLS verification +request_url = "https://localhost:5665/v1/objects/services" +headers = { + "Accept" => "application/json", + "X-HTTP-Method-Override" => "GET" +} +data = { + "attrs" => [ "name", "state", "last_check_result" ], + "joins" => [ "host.name", "host.state", "host.last_check_result" ], + "filter" => "match(\"ping*\", service.name)", +} + +r = RestClient::Resource.new( + URI.encode(request_url), + :headers => headers, + :user => "root", + :password => "icinga", + :ssl_ca_file => "pki/icinga2-ca.crt") + +begin + response = r.post(data.to_json) +rescue => e + response = e.response +end + +puts "Status: " + response.code.to_s +if response.code == 200 + puts "Result: " + (JSON.pretty_generate JSON.parse(response.body)) +else + puts "Error: " + response +end + +$ ruby icinga.rb +``` + +A more detailed example can be found in the [Dashing demo](https://github.com/Icinga/dashing-icinga2). + +#### Example API Client in PHP + +The following example uses **PHP** and its `curl` library: + +``` +$ vim icinga.php + +#!/usr/bin/env php + array('name', 'state', 'last_check_result'), + joins => array('host.name', 'host.state', 'host.last_check_result'), + filter => 'match("ping*", service.name)', +); + +$ch = curl_init(); +curl_setopt_array($ch, array( + CURLOPT_URL => $request_url, + CURLOPT_HTTPHEADER => $headers, + CURLOPT_USERPWD => $username . ":" . $password, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_CAINFO => "pki/icinga2-ca.crt", + CURLOPT_POST => count($data), + CURLOPT_POSTFIELDS => json_encode($data) +)); + +$response = curl_exec($ch); +if ($response === false) { + print "Error: " . curl_error($ch) . "(" . $response . ")\n"; +} + +$code = curl_getinfo($ch, CURLINFO_HTTP_CODE); +curl_close($ch); +print "Status: " . $code . "\n"; + +if ($code == 200) { + $response = json_decode($response, true); + print_r($response); +} +?> + +$ php icinga.php +``` + +#### Example API Client in Perl + +The following example uses **Perl** and the `Rest::Client` module: + +``` +# perl -MCPAN -e 'install REST::Client' +# perl -MCPAN -e 'install JSON' +# perl -MCPAN -e 'install MIME::Base64' +# perl -MCPAN -e 'install Data::Dumper' + +$ vim icinga.pl + +#!/usr/bin/env perl + +use strict; +use warnings; +use REST::Client; +use MIME::Base64; +use JSON; +use Data::Dumper; + +# Replace 'localhost' with your FQDN and certificate CN +# for TLS verification +my $request_host = "https://localhost:5665"; +my $userpass = "root:icinga"; + +my $client = REST::Client->new(); +$client->setHost($request_host); +$client->setCa("pki/icinga2-ca.crt"); +$client->addHeader("Accept", "application/json"); +$client->addHeader("X-HTTP-Method-Override", "GET"); +$client->addHeader("Authorization", "Basic " . encode_base64($userpass)); +my %json_data = ( + attrs => ['name', 'state', 'last_check_result'], + joins => ['host.name', 'host.state', 'host.last_check_result'], + filter => 'match("ping*", service.name)', +); +my $data = encode_json(\%json_data); +$client->POST("/v1/objects/services", $data); + +my $status = $client->responseCode(); +print "Status: " . $status . "\n"; +my $response = $client->responseContent(); +if ($status == 200) { + print "Result: " . Dumper(decode_json($response)) . "\n"; +} else { + print "Error: " . $response . "\n"; +} + +$ perl icinga.pl +``` + + +#### Example API Client in Golang + +Requires the Golang build chain. + +``` +$ vim icinga.go + +package main + +import ( + "bytes" + "crypto/tls" + "log" + "io/ioutil" + "net/http" +) + +func main() { + var urlBase= "https://localhost:5665" + var apiUser= "root" + var apiPass= "icinga" + + urlEndpoint := urlBase + "/v1/objects/services" + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + httpClient := &http.Client{Transport: tr} + + var requestBody = []byte(`{ + "attrs": [ "name", "state", "last_check_result" ], + "joins": [ "host.name", "host.state", "host.last_check_result" ], + "filter": "match(\"ping*\", service.name)" + }`) + + req, err := http.NewRequest("POST", urlEndpoint, bytes.NewBuffer(requestBody)) + req.Header.Set("Accept", "application/json") + req.Header.Set("X-HTTP-Method-Override", "GET") + + req.SetBasicAuth(apiUser, apiPass) + + resp, err := httpClient.Do(req) + if err != nil { + log.Fatal("Server error:", err) + return + } + defer resp.Body.Close() + + log.Print("Response status:", resp.Status) + + bodyBytes, _ := ioutil.ReadAll(resp.Body) + bodyString := string(bodyBytes) + + if resp.StatusCode == http.StatusOK { + log.Print("Result: " + bodyString) + } else { + log.Fatal(bodyString) + } +} +``` + +Build the binary: + +```bash +go build icinga.go +./icinga +``` + +#### Example API Client in Powershell + +This example compares the given certificate with the certificate from icinga2 for a trusted connection. +More info: https://stackoverflow.com/a/58494718/9397788 + +Invoke-RestMethod with PUT is buggy with Powershell 3.0. So we need at least Powershell 4.0. +https://stackoverflow.com/questions/18278977/powershell-v3-invoke-restmethod-headers + + +``` +$icingaApiHost = "icinga.master.local" +$IcingaApiPort = 5665 +$icingaApiUser = "root" +$icingaApiPassword = "icinga" + +$requestUrl = "https://{0}:{1}/v1/objects/services" -f $icingaApiHost,$IcingaApiPort + + +# Put the certificate from your master (/etc/icinga2/pki/*.crt) here. +# You will get it with "openssl s_client -connect :5665" too. + +$cert64=@" + -----BEGIN CERTIFICATE----- + MIIE5TCCAs2gAwIBAgIBAjANBgkqhkiG9w0BAQsFADAUMRIwEAYDVQQDDAlJY2lu + Z2EgQ0EwHhcNMTYwNzA3MDYxOTM4WhcNMzEwNzA0MDYxOTM4WjAiMSAwHgYDVQQD + DBdpY2luZ2EuZXh0ZXJuMS56bXQuaW5mbzCCAiIwDQYJKoZIhvcNAQEBBQADggIP + ADCCAgoCggIBAJ2/ufxCb1m8PbUCxLkZqZNLxZ/vpulOcKmOGYm6VBWbOXQA50on + IewnMRUDGF9DHajLk1nyUu1TyKxGzBbja+06/kVd/8Muv0MUNF6iC1U3F3h0W9da + kk5rK1L+A534csHCFcG3dZkbdOMrh5hy4kMf9c2FEpviL54Fo4e+b3ZJFA6rv5D9 + 7LzaxfLcsMwXIZ/WRnxjjfnA+RenHeYLlNM8Uk3vqI6tBc1qpFzFeRWMbclFzSLN + 5x+J6cuyFjVi+Vv8c6SU6W3ykw8Vvq1QQUixl9lwxqNJNsWWfGR8ycmFiv1ZYxiu + HpmuLslExZ2qxdGe/raMBEOGgVsUTDZNyTm/9TxgOa3m9cv3R0YIFUmfoBQ3d51S + wburJG2eC0ETpnt0TptwMdTfL+HYVWB71djg2Pb8R3vldnhFVpy9vyx3FyHoN7ZQ + h7+r6HK2jpwWo7/jK9ExpglVoV8vUbNYqXiA/lZGEkT3YLwTyUhqXKei3Xu2CGGR + UId6fAj6OWk9TLW+OaR9BcS74mpiTWNDlbEP+/LQnUhte8scX5cSqBzy4vpuG1G+ + NGDbYcG4xn6Pc6qt/QddKU/pB/GbJv9SlHU8SjSt09oG9GtuXVjPoZX5msi3NmMy + DpAcab5Lx4MgOS/GwRLRI3IjZ3ZK+UkLvRgesSH5/JPUIgfTdr/Eg5dVAgMBAAGj + NDAyMAwGA1UdEwEB/wQCMAAwIgYDVR0RBBswGYIXaWNpbmdhLmV4dGVybjEuem10 + LmluZm8wDQYJKoZIhvcNAQELBQADggIBAEpEJt35KZuvDzU/xrVaVC3ct6CHmXOh + DDj5PdwkYtO0vw9WE7aVc88Fs6uhW2BxFkLvm7TpJ6g05egtBozHYrhTEir/fPna + rVAD9wEQU6KuSayeToXlgWhKDRAAv1lrQwU4xAAdJP8faxQGc7nAwN/h0g14UTmU + LSkyJU4a+1SkEUOs2YCq9vChS3MowO+6I35e98dIA1swHLeQ/QJowspODQvi6pGX + VH8FaaqfGwhv+gMwDoAW9hB74VZXO8I3mueZUccPiJXlaojx5hpaHRNRvpdBPclA + HHLRQniEOkai2Wg2cft/wq6/fYLE/yv5ej15MNyt3Wjj41DEK5B/bvmN/chOrZlv + 8rh3ek12ngVtXF+Jcmfsij8+hj/IOM6SeELtW+c0KRaPoVR7oR9o6ce/dyfiw6Hv + iQsAV6x//kytpRnUY3VAH4QTJzQ5bgz1Cwr6H+cWE2ca4MHCtPYaZnDiOv4b/Yz7 + 97Nrc7QPGewMl0hYeykpLP2hBJldw01NXhztuq1j38vYY38lKCN6v1INUujEUZg7 + NwgfHUvJmGIE/fwLAvP7do8gf+1MGPEimsgvias5YtDtrEOz7K/oF3Qgk3sepwAz + XXlNLnJAY4p0d/sgCCFQnstQMM95X0Y6cfITzkz3HIUcNF2sbvVnn8xHi0TSH/8J + tPLHO1xOLz7N + -----END CERTIFICATE----- +"@ + +# register callback for comparing the certificate +function set-SSLCertificate { + param( + $Cert + ) + + if (-not("validateCert" -as [type])) { + add-type -TypeDefinition @" + using System.Net.Security; + using System.Security.Cryptography.X509Certificates; + + public static class ValidateCert { + static X509Certificate2 MyCert; + + public static bool Validate(object sender, + X509Certificate cert, + X509Chain chain, + SslPolicyErrors sslPolicyErrors) { + if (MyCert.Equals(cert)) { + return true; + } else { + return false; + } + } + + public static RemoteCertificateValidationCallback GetDelegate(X509Certificate2 Cert) { + MyCert = Cert; + return new RemoteCertificateValidationCallback(ValidateCert.Validate); + } + } +"@ + } + [System.Net.ServicePointManager]::ServerCertificateValidationCallback = [validateCert]::GetDelegate($Cert) +} + +# convert base64 based certificate to X509 certificate +function get-x509 { + param( + [string] + $Cert64 + ) + + $CertBin=[System.Convert]::FromBase64String(($Cert64.Trim(" ") -replace "-.*-","")) + + Write-Host ($Cert64.Trim(" ") -replace "-.*-","") + + [System.Security.Cryptography.X509Certificates.X509Certificate2]$CertBin +} + +# Allow TLS 1.2. Old powershell (.net) uses TLS 1.0 only. Icinga2 >2.10 needs TLS 1.2 +[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]'Ssl3,Tls,Tls11,Tls12' + +$SecPass = ConvertTo-SecureString $icingaApiPassword -AsPlainText -Force +$Cred = New-Object System.Management.Automation.PSCredential($icingaApiUser, $SecPass) + +$Cert = get-x509 $Cert64 +set-SSLCertificate $Cert + +$httpHeaders = @{ + "X-HTTP-Method-Override" = "GET" + "accept" = "application/json" +} + +$attrs = @( "name", "state", "last_check_result" ) +$joins = @( "host.name", "host.state", "host.last_check_result") +$filter = 'match("ping*", service.name)' + +$data = @{ + "attrs" = $attrs + "joins" = $joins + "filter" = $filter +} + +$result = Invoke-RestMethod -Uri $requestUrl -Method "POST" -Body (ConvertTo-Json -InputObject $data) -Credential $Cred -ContentType "application/json" -Headers $httpHeaders + +foreach ($s in $result.results) { + Write-Host "Service " $s.attrs.name " on Host " $s.joins.host.name "State " $s.attrs.state " Output: " $s.attrs.last_check_result.output + # Debug + Write-Host "Debug: Attributes " $s.attrs | ConvertTo-Json + Write-Host "Debug: Joins Host" $s.joins.host | ConvertTo-Json + Write-Host "`n" +} +``` + +Run the Powershell ISE as administrator, and execute the script as you change it. + +![Icinga 2 API Windows Powershell ISE Script](images/api/icinga2_api_powershell_ise.png) + + +Alternatively, save the code and run it in Powershell: + +``` +.\icinga.ps1 +``` diff --git a/doc/13-addons.md b/doc/13-addons.md new file mode 100644 index 0000000..953b7f0 --- /dev/null +++ b/doc/13-addons.md @@ -0,0 +1,258 @@ +# Icinga 2 Addons and Integrations + +For an uptodate overview of all integrations and modules, +please visit [https://icinga.com/products/](https://icinga.com/products/). + +## Syntax Highlighting + +Icinga 2 provides configuration examples for syntax highlighting using the `vim` and `nano` editors. + +### Using Vim + +Install the package `vim-icinga2` with your distribution's package manager. + +Ensure that syntax highlighting is enabled e.g. by editing the user's `vimrc` +configuration file: + +``` +# vim ~/.vimrc +syntax on +``` + +Test it: + +```bash +vim /etc/icinga2/conf.d/templates.conf +``` + +![Vim with syntax highlighting](images/addons/vim-syntax.png "Vim with Icinga 2 syntax highlighting") + + +### Using Nano + +Install the package `nano-icinga2` with your distribution's package manager. + +**Note:** On Debian, Ubuntu and Raspbian, the syntax files are installed with the `icinga2-common` package already. + +Copy the `/etc/nanorc` sample file to your home directory. + +```bash +cp /etc/nanorc ~/.nanorc +``` + +Include the `icinga2.nanorc` file. + +``` +$ vim ~/.nanorc + +## Icinga 2 +include "/usr/share/nano/icinga2.nanorc" +``` + +Test it: + +```bash +nano /etc/icinga2/conf.d/templates.conf +``` + +![Nano with syntax highlighting](images/addons/nano-syntax.png "Nano with Icinga 2 syntax highlighting") + +## Icinga Reporting + +The [Icinga Reporting Module](https://icinga.com/docs/reporting/latest/) +is the framework and foundation we created to handle data collected +by Icinga 2 and other data providers. By definition Icinga Reporting does not collect +or calculate any data. The framework processes usable data from data providers such as +Icinga’s IDO or Icinga Web 2 modules and makes them available in different formats. + +It can display the data directly within the Icinga web interface or export it to PDF, +JSON or CSV format. With scheduled reports you can receive the prepared data periodically +via email. + +![Icinga Reporting](images/addons/icinga_reporting.png) + +Follow along in this [hands-on blog post](https://icinga.com/2019/06/17/icinga-reporting-hands-on/). + + +## Graphs and Metrics + +### Graphite + +[Graphite](https://graphite.readthedocs.org/en/latest/) is a time-series database +storing collected metrics and making them available through restful apis +and web interfaces. + +Graphite consists of 3 software components: + +* carbon -- a Twisted daemon that listens for time-series data +* whisper -- a simple database library for storing time-series data (similar in design to RRD) +* graphite webapp -- a Django webapp that renders graphs on-demand using Cairo + +You need to install Graphite first, then proceed with configuring it in Icinga 2. + +Use the [GraphiteWriter](14-features.md#graphite-carbon-cache-writer) feature +for sending real-time metrics from Icinga 2 to Graphite. + +```bash +icinga2 feature enable graphite +``` + +A popular alternative frontend for Graphite is for example [Grafana](https://grafana.org). + +Integration in Icinga Web 2 is possible by installing the official [graphite module](https://icinga.com/docs/graphite/latest/). + +![Icinga Web 2 Detail View with Graphite](images/addons/icingaweb2_graphite.png) + + +### InfluxDB + +[InfluxDB](https://influxdb.com) is a time series, metrics, and analytics database. +It’s written in Go and has no external dependencies. + +Use the [InfluxdbWriter](14-features.md#influxdb-writer) feature +for sending real-time metrics from Icinga 2 to InfluxDB v1. + +```bash +icinga2 feature enable influxdb +``` + +Use the [Influxdb2Writer](14-features.md#influxdb-writer) feature +for sending real-time metrics from Icinga 2 to InfluxDB v2. + +```bash +icinga2 feature enable influxdb2 +``` + +A popular frontend for InfluxDB is for example [Grafana](https://grafana.org). + +Integration in Icinga Web 2 is possible by installing the community [Grafana module](https://github.com/Mikesch-mp/icingaweb2-module-grafana). + +![Icinga Web 2 Detail View with Grafana](images/addons/icingaweb2_grafana.png) + + +### PNP + +[PNP](https://www.pnp4nagios.org) is a graphing addon. + +[PNP](https://www.pnp4nagios.org) is an addon which adds a graphical representation of the performance data collected +by the monitoring plugins. The data is stored as rrd (round robin database) files. + +Use your distribution's package manager to install the `pnp4nagios` package. + +If you're planning to use it, configure it to use the +[bulk mode with npcd and npcdmod](https://docs.pnp4nagios.org/pnp-0.6/modes#bulk_mode_with_npcd_and_npcdmod) +in combination with Icinga 2's [PerfdataWriter](14-features.md#writing-performance-data-files). NPCD collects the performance +data files which Icinga 2 generates. + +Enable performance data writer in icinga 2 + +```bash +icinga2 feature enable perfdata +``` + +Configure npcd to use the performance data created by Icinga 2: + +```bash +vim /etc/pnp4nagios/npcd.cfg +``` + +Set `perfdata_spool_dir = /var/spool/icinga2/perfdata` and restart the `npcd` daemon. + +There's also an Icinga Web 2 module for direct PNP graph integration +available at [Icinga Exchange](https://exchange.icinga.com/icinga/PNP). + +## Visualization + +### Maps + +This community module displays host objects as markers on openstreetmap in Icinga Web 2. +It uses the data provided by the monitoring module and as such the [DB IDO](14-features.md#db-ido) +from Icinga 2. + +If you configure multiple hosts with the same coordinates, i.e. servers in a datacenter, a clustered view is rendered. + +Check the [Map module docs](https://github.com/nbuchwitz/icingaweb2-module-map) for more details on +installation, configuration and integration. + +![Icinga Web 2 Maps](images/addons/icingaweb2_maps.png) + +### Business Process + +Create top-level views of your applications in a graphical editor. +Rules express dependencies between existing hosts and services and +let you alert on application level. Business processes are displayed +in a tree or list overview and can be added to any dashboard. + +![Icinga Web 2 Business Process](images/addons/icingaweb2_businessprocess.png) + +Read more [here](https://icinga.com/products/icinga-business-process-modelling/). + +### Certificate Monitoring + +Monitor your certificates in an efficient and comfortable way. Be aware of required +actions and view all details at a glance. + +![Icinga Certificate Monitoring](images/addons/icinga_certificate_monitoring.png) + +Read more [here](https://icinga.com/products/icinga-certificate-monitoring/) +and [here](https://icinga.com/2019/06/03/monitoring-automation-with-icinga-certificate-monitoring/). + +### Dashing Dashboard + +The [Icinga 2 dashboard](https://github.com/dnsmichi/dashing-icinga2) is built +on top of Dashing and uses the [REST API](12-icinga2-api.md#icinga2-api) to visualize what's going +on with your monitoring. It combines several popular widgets and provides development +instructions for your own implementation. + +The dashboard also allows to embed the [Icinga Web 2](https://icinga.com/products/icinga-web-2/) +host and service problem lists as Iframe. + +![Dashing dashboard](images/addons/dashing_icinga2.png) + + +## Log Monitoring + +Using [Logstash](https://www.elastic.co/guide/en/logstash/current/introduction.html) or +[Graylog](https://www.graylog.org) in your infrastructure and correlate events with your monitoring +is even simpler these days. + +* Use the `GelfWriter` feature to write Icinga 2's check and notification events to Graylog or Logstash. +* Configure the logstash `nagios` output to send passive traps to Icinga 2 using the external command pipe. +* Execute a plugin to check Graylog alert streams. + +More details can be found in [this blog post](https://icinga.com/2014/12/02/team-icinga-at-osmc-2014/). + +## Notification Scripts and Interfaces + +There's a variety of resources available, for example different notification scripts such as: + +* E-Mail ([examples](03-monitoring-basics.md#alert-notifications) provided) +* SMS +* Pager (XMPP, etc.) +* Twitter +* IRC +* Ticket systems +* etc. + +Blog posts and howtos: + +* [Environmental Monitoring and Alerting](https://icinga.com/2019/09/02/environmental-monitoring-and-alerting-via-text-message/) + +Additionally external services can be [integrated with Icinga 2](https://icinga.com/products/integrations/): + +* [Pagerduty](https://icinga.com/products/integrations/pagerduty/) +* [VictorOps](https://icinga.com/products/integrations/victorops/) +* [StackStorm](https://icinga.com/products/integrations/stackstorm/) + +More information can be found on the [Icinga Website](https://icinga.com/). + +## Configuration Management Tools + +Checkout these specific integrations: + +* [Ansible Roles](https://icinga.com/products/integrations/) +* [Puppet Module](https://icinga.com/products/integrations/puppet/) +* [Chef Cookbook](https://icinga.com/products/integrations/chef/) + +If you're looking for different config management integrations -- we're happy +to add them upstream, so please get in touch with the [Icinga team](https://icinga.com/community/). diff --git a/doc/14-features.md b/doc/14-features.md new file mode 100644 index 0000000..9436217 --- /dev/null +++ b/doc/14-features.md @@ -0,0 +1,1482 @@ +# Icinga 2 Features + +## Logging + +Icinga 2 supports three different types of logging: + +* File logging +* Syslog (on Linux/UNIX) +* Console logging (`STDOUT` on tty) + +You can enable additional loggers using the `icinga2 feature enable` +and `icinga2 feature disable` commands to configure loggers: + +Feature | Description +----------------|------------ +debuglog | Debug log (path: `/var/log/icinga2/debug.log`, severity: `debug` or higher) +mainlog | Main log (path: `/var/log/icinga2/icinga2.log`, severity: `information` or higher) +syslog | Syslog (severity: `warning` or higher) +windowseventlog | Windows Event Log (severity: `information` or higher) + +By default file the `mainlog` feature is enabled. When running Icinga 2 +on a terminal log messages with severity `information` or higher are +written to the console. + +### Log Rotation + +Packages provide a configuration file for [logrotate](https://linux.die.net/man/8/logrotate) +on Linux/Unix. Typically this is installed into `/etc/logrotate.d/icinga2` +and modifications won't be overridden on upgrade. + +Instead of sending the reload HUP signal, logrotate +sends the USR1 signal to notify the Icinga daemon +that it has rotate the log file. Icinga reopens the log +files then: + +* `/var/log/icinga2/icinga2.log` (requires `mainlog` enabled) +* `/var/log/icinga2/debug.log` (requires `debuglog` enabled) +* `/var/log/icinga2/error.log` + +By default, log files will be rotated daily. + +## Core Backends + +### REST API + +The REST API is documented [here](12-icinga2-api.md#icinga2-api) as a core feature. + +### Icinga DB + +Icinga DB is a set of components for publishing, synchronizing and +visualizing monitoring data in the Icinga ecosystem, consisting of: + +* Icinga 2 with its `icingadb` feature enabled, + responsible for publishing monitoring data to a Redis server, i.e. configuration and its runtime updates, + check results, state changes, downtimes, acknowledgements, notifications, and other events such as flapping +* The [Icinga DB daemon](https://icinga.com/docs/icinga-db), + which synchronizes the data between the Redis server and a database +* And Icinga Web with the + [Icinga DB Web](https://icinga.com/docs/icinga-db-web) module enabled, + which connects to both Redis and the database to display and work with the most up-to-date data + +![Icinga DB Architecture](images/icingadb/icingadb-architecture.png) + +To set up a Redis server and the Icinga DB feature, please follow the steps from the +Icinga 2 [Installation](02-installation.md) guide. For the feature configuration options, +see its [Icinga DB object type](09-object-types.md#icingadb) documentation. + +## Metrics + +Whenever a host or service check is executed, or received via the REST API, +best practice is to provide performance data. + +This data is parsed by features sending metrics to time series databases (TSDB): + +* [Graphite](14-features.md#graphite-carbon-cache-writer) +* [InfluxDB](14-features.md#influxdb-writer) +* [OpenTSDB](14-features.md#opentsdb-writer) + +Metrics, state changes and notifications can be managed with the following integrations: + +* [Elastic Stack](14-features.md#elastic-stack-integration) +* [Graylog](14-features.md#graylog-integration) + + +### Graphite Writer + +[Graphite](13-addons.md#addons-graphing-graphite) is a tool stack for storing +metrics and needs to be running prior to enabling the `graphite` feature. + +Icinga 2 writes parsed metrics directly to Graphite's Carbon Cache +TCP port, defaulting to `2003`. + +You can enable the feature using + +```bash +icinga2 feature enable graphite +``` + +By default the [GraphiteWriter](09-object-types.md#objecttype-graphitewriter) feature +expects the Graphite Carbon Cache to listen at `127.0.0.1` on TCP port `2003`. + +#### Graphite Schema + +The current naming schema is defined as follows. The [Icinga Web 2 Graphite module](https://icinga.com/products/integrations/graphite/) +depends on this schema. + +The default prefix for hosts and services is configured using +[runtime macros](03-monitoring-basics.md#runtime-macros)like this: + +``` +icinga2.$host.name$.host.$host.check_command$ +icinga2.$host.name$.services.$service.name$.$service.check_command$ +``` + +You can customize the prefix name by using the `host_name_template` and +`service_name_template` configuration attributes. + +The additional levels will allow fine granular filters and also template +capabilities, e.g. by using the check command `disk` for specific +graph templates in web applications rendering the Graphite data. + +The following characters are escaped in prefix labels: + + Character | Escaped character + --------------|-------------------------- + whitespace | _ + . | _ + \ | _ + / | _ + +Metric values are stored like this: + +``` +.perfdata..value +``` + +The following characters are escaped in performance labels +parsed from plugin output: + + Character | Escaped character + --------------|-------------------------- + whitespace | _ + \ | _ + / | _ + :: | . + +Note that labels may contain dots (`.`) allowing to +add more subsequent levels inside the Graphite tree. +`::` adds support for [multi performance labels](http://my-plugin.de/wiki/projects/check_multi/configuration/performance) +and is therefore replaced by `.`. + +By enabling `enable_send_thresholds` Icinga 2 automatically adds the following threshold metrics: + +``` +.perfdata..min +.perfdata..max +.perfdata..warn +.perfdata..crit +``` + +By enabling `enable_send_metadata` Icinga 2 automatically adds the following metadata metrics: + +``` +.metadata.current_attempt +.metadata.downtime_depth +.metadata.acknowledgement +.metadata.execution_time +.metadata.latency +.metadata.max_check_attempts +.metadata.reachable +.metadata.state +.metadata.state_type +``` + +Metadata metric overview: + + metric | description + -------------------|------------------------------------------ + current_attempt | current check attempt + max_check_attempts | maximum check attempts until the hard state is reached + reachable | checked object is reachable + downtime_depth | number of downtimes this object is in + acknowledgement | whether the object is acknowledged or not + execution_time | check execution time + latency | check latency + state | current state of the checked object + state_type | 0=SOFT, 1=HARD state + +The following example illustrates how to configure the storage schemas for Graphite Carbon +Cache. + +``` +[icinga2_default] +# intervals like PNP4Nagios uses them per default +pattern = ^icinga2\. +retentions = 1m:2d,5m:10d,30m:90d,360m:4y +``` + +#### Graphite in Cluster HA Zones + +The Graphite feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing metrics to a Carbon Cache socket. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes metrics, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that metrics are written even if the cluster fails. + +The recommended way of running Graphite in this scenario is a dedicated server +where Carbon Cache/Relay is running as receiver. + + +### InfluxDB Writer + +Once there are new metrics available, Icinga 2 will directly write them to the +defined InfluxDB v1/v2 HTTP API. + +You can enable the feature using + +```bash +icinga2 feature enable influxdb +``` + +or + +```bash +icinga2 feature enable influxdb2 +``` + +By default the +[InfluxdbWriter](09-object-types.md#objecttype-influxdbwriter)/[Influxdb2Writer](09-object-types.md#objecttype-influxdb2writer) +features expect the InfluxDB daemon to listen at `127.0.0.1` on port `8086`. + +Measurement names and tags are fully configurable by the end user. The Influxdb(2)Writer +object will automatically add a `metric` tag to each data point. This correlates to the +perfdata label. Fields (value, warn, crit, min, max, unit) are created from data if available +and the configuration allows it. If a value associated with a tag is not able to be +resolved, it will be dropped and not sent to the target host. + +Backslashes are allowed in tag keys, tag values and field keys, however they are also +escape characters when followed by a space or comma, but cannot be escaped themselves. +As a result all trailling slashes in these fields are replaced with an underscore. This +predominantly affects Windows paths e.g. `C:\` becomes `C:_`. + +The database/bucket is assumed to exist so this object will make no attempt to create it currently. + +If [SELinux](22-selinux.md#selinux) is enabled, it will not allow access for Icinga 2 to InfluxDB until the [boolean](22-selinux.md#selinux-policy-booleans) +`icinga2_can_connect_all` is set to true as InfluxDB is not providing its own policy. + +More configuration details can be found [here for v1](09-object-types.md#objecttype-influxdbwriter) +and [here for v2](09-object-types.md#objecttype-influxdb2writer). + +#### Instance Tagging + +Consider the following service check: + +``` +apply Service "disk" for (disk => attributes in host.vars.disks) { + import "generic-service" + check_command = "disk" + display_name = "Disk " + disk + vars.disk_partitions = disk + assign where host.vars.disks +} +``` + +This is a typical pattern for checking individual disks, NICs, TLS certificates etc associated +with a host. What would be useful is to have the data points tagged with the specific instance +for that check. This would allow you to query time series data for a check on a host and for a +specific instance e.g. /dev/sda. To do this quite simply add the instance to the service variables: + +``` +apply Service "disk" for (disk => attributes in host.vars.disks) { + ... + vars.instance = disk + ... +} +``` + +Then modify your writer configuration to add this tag to your data points if the instance variable +is associated with the service: + +``` +object InfluxdbWriter "influxdb" { + ... + service_template = { + measurement = "$service.check_command$" + tags = { + hostname = "$host.name$" + service = "$service.name$" + instance = "$service.vars.instance$" + } + } + ... +} +``` + +#### InfluxDB in Cluster HA Zones + +The InfluxDB feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing metrics to the InfluxDB HTTP API. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes metrics, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that metrics are written even if the cluster fails. + +The recommended way of running InfluxDB in this scenario is a dedicated server +where the InfluxDB HTTP API or Telegraf as Proxy are running. + +### Elastic Stack Integration + +[Icingabeat](https://icinga.com/products/integrations/elastic/) is an Elastic Beat that fetches data +from the Icinga 2 API and sends it either directly to [Elasticsearch](https://www.elastic.co/products/elasticsearch) +or [Logstash](https://www.elastic.co/products/logstash). + +More integrations: + +* [Logstash output](https://icinga.com/products/integrations/elastic/) for the Icinga 2 API. +* [Logstash Grok Pattern](https://icinga.com/products/integrations/elastic/) for Icinga 2 logs. + +#### Elasticsearch Writer + +This feature forwards check results, state changes and notification events +to an [Elasticsearch](https://www.elastic.co/products/elasticsearch) installation over its HTTP API. + +The check results include parsed performance data metrics if enabled. + +> **Note** +> +> Elasticsearch 5.x or 6.x are required. This feature has been successfully tested with +> Elasticsearch 5.6.7 and 6.3.1. + + + +Enable the feature and restart Icinga 2. + +```bash +icinga2 feature enable elasticsearch +``` + +The default configuration expects an Elasticsearch instance running on `localhost` on port `9200` + and writes to an index called `icinga2`. + +More configuration details can be found [here](09-object-types.md#objecttype-elasticsearchwriter). + +#### Current Elasticsearch Schema + +The following event types are written to Elasticsearch: + +* icinga2.event.checkresult +* icinga2.event.statechange +* icinga2.event.notification + +Performance data metrics must be explicitly enabled with the `enable_send_perfdata` +attribute. + +Metric values are stored like this: + +``` +check_result.perfdata..value +``` + +The following characters are escaped in perfdata labels: + + Character | Escaped character + ------------|-------------------------- + whitespace | _ + \ | _ + / | _ + :: | . + +Note that perfdata labels may contain dots (`.`) allowing to +add more subsequent levels inside the tree. +`::` adds support for [multi performance labels](http://my-plugin.de/wiki/projects/check_multi/configuration/performance) +and is therefore replaced by `.`. + +Icinga 2 automatically adds the following threshold metrics +if existing: + +``` +check_result.perfdata..min +check_result.perfdata..max +check_result.perfdata..warn +check_result.perfdata..crit +``` + +#### Elasticsearch in Cluster HA Zones + +The Elasticsearch feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing events to the Elasticsearch HTTP API. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes events, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that events are written even if the cluster fails. + +The recommended way of running Elasticsearch in this scenario is a dedicated server +where you either have the Elasticsearch HTTP API, or a TLS secured HTTP proxy, +or Logstash for additional filtering. + +### Graylog Integration + +#### GELF Writer + +The `Graylog Extended Log Format` (short: [GELF](https://docs.graylog.org/en/latest/pages/gelf.html)) +can be used to send application logs directly to a TCP socket. + +While it has been specified by the [Graylog](https://www.graylog.org) project as their +[input resource standard](https://docs.graylog.org/en/latest/pages/sending_data.html), other tools such as +[Logstash](https://www.elastic.co/products/logstash) also support `GELF` as +[input type](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-gelf.html). + +You can enable the feature using + +```bash +icinga2 feature enable gelf +``` + +By default the `GelfWriter` object expects the GELF receiver to listen at `127.0.0.1` on TCP port `12201`. +The default `source` attribute is set to `icinga2`. You can customize that for your needs if required. + +Currently these events are processed: +* Check results +* State changes +* Notifications + +#### Graylog/GELF in Cluster HA Zones + +The Gelf feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing events to the Graylog HTTP API. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes events, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that events are written even if the cluster fails. + +The recommended way of running Graylog in this scenario is a dedicated server +where you have the Graylog HTTP API listening. + +### OpenTSDB Writer + +While there are some OpenTSDB collector scripts and daemons like tcollector available for +Icinga 1.x it's more reasonable to directly process the check and plugin performance +in memory in Icinga 2. Once there are new metrics available, Icinga 2 will directly +write them to the defined TSDB TCP socket. + +You can enable the feature using + +```bash +icinga2 feature enable opentsdb +``` + +By default the `OpenTsdbWriter` object expects the TSD to listen at +`127.0.0.1` on port `4242`. + +The current default naming schema is: + +``` +icinga.host. +icinga.service.. +``` + +for host and service checks. The tag `host` is always applied. + +Icinga also sends perfdata warning, critical, minimum and maximum threshold values to OpenTSDB. +These are stored as new OpenTSDB metric names appended with `_warn`, `_crit`, `_min`, `_max`. +Values are only stored when the corresponding threshold exists in Icinga's perfdata. + +Example: +``` +icinga.service.. +icinga.service..._warn +icinga.service..._crit +icinga.service..._min +icinga.service..._max +``` + +To make sure Icinga 2 writes a valid metric into OpenTSDB some characters are replaced +with `_` in the target name: + +``` +\ : (and space) +``` + +The resulting name in OpenTSDB might look like: + +``` +www-01 / http-cert / response time +icinga.http_cert.response_time +``` + +In addition to the performance data retrieved from the check plugin, Icinga 2 sends +internal check statistic data to OpenTSDB: + + metric | description + -------------------|------------------------------------------ + current_attempt | current check attempt + max_check_attempts | maximum check attempts until the hard state is reached + reachable | checked object is reachable + downtime_depth | number of downtimes this object is in + acknowledgement | whether the object is acknowledged or not + execution_time | check execution time + latency | check latency + state | current state of the checked object + state_type | 0=SOFT, 1=HARD state + +While reachable, state and state_type are metrics for the host or service the +other metrics follow the current naming schema + +``` +icinga.check. +``` + +with the following tags + + tag | description + --------|------------------------------------------ + type | the check type, one of [host, service] + host | hostname, the check ran on + service | the service name (if type=service) + +> **Note** +> +> You might want to set the tsd.core.auto_create_metrics setting to `true` +> in your opentsdb.conf configuration file. + +#### OpenTSDB Metric Prefix +Functionality exists to modify the built in OpenTSDB metric names that the plugin +writes to. By default this is `icinga.host` and `icinga.service.`. + +These prefixes can be modified as necessary to any arbitary string. The prefix +configuration also supports Icinga macros, so if you rather use `` +or any other variable instead of `` you may do so. + +To configure OpenTSDB metric name prefixes, create or modify the `host_template` and/or +`service_template` blocks in the `opentsdb.conf` file, to add a `metric` definition. +These modifications go hand in hand with the **OpenTSDB Custom Tag Support** detailed below, +and more information around macro use can be found there. + +Additionally, using custom Metric Prefixes or your own macros in the prefix may be +helpful if you are using the **OpenTSDB Generic Metric** functionality detailed below. + +An example configuration which includes prefix name modification: + +``` +object OpenTsdbWriter "opentsdb" { + host = "127.0.0.1" + port = 4242 + host_template = { + metric = "icinga.myhost" + tags = { + location = "$host.vars.location$" + checkcommand = "$host.check_command$" + } + } + service_template = { + metric = "icinga.service.$service.check_command$" + } +} +``` + +The above configuration will output the following naming schema: +``` +icinga.myhost. +icinga.service.. +``` +Note how `` is always appended in the default naming schema mode. + +#### OpenTSDB Generic Metric Naming Schema + +An alternate naming schema (`Generic Metrics`) is available where OpenTSDB metric names are more generic +and do not include the Icinga perfdata label in the metric name. Instead, +perfdata labels are stored in a tag `label` which is stored along with each perfdata value. + +This ultimately reduces the number of unique OpenTSDB metric names which may make +querying aggregate data easier. This also allows you to store all perfdata values for a +particular check inside one OpenTSDB metric name for each check. + +This alternate naming schema can be enabled by setting the following in the OpenTSDBWriter config: +`enable_generic_metrics = true` + +> **Tip** +> Consider using `Generic Metrics` along with the **OpenTSDB Metric Prefix** naming options +> described above + +An example of this naming schema when compared to the default is: + +``` +icinga.host +icinga.service. +``` + +> **Note** +> Note how `` does not appear in the OpenTSDB metric name +> when using `Generic Metrics`. Instead, a new tag `label` appears on each value written +> to OpenTSDB which contains the perfdata label. + +#### Custom Tags + +In addition to the default tags listed above, it is possible to send +your own custom tags with your data to OpenTSDB. + +Note that custom tags are sent **in addition** to the default hostname, +type and service name tags. If you do not include this section in the +config file, no custom tags will be included. + +Custom tags can be custom attributes or built in attributes. + +Consider a host object: + +``` +object Host "my-server1" { + address = "10.0.0.1" + check_command = "hostalive" + vars.location = "Australia" +} +``` + +and a service object: + +``` +object Service "ping" { + host_name = "localhost" + check_command = "my-ping" + + vars.ping_packets = 10 +} +``` + +It is possible to send `vars.location` and `vars.ping_packets` along +with performance data. Additionally, any other attribute can be sent +as a tag, such as `check_command`. + +You can make use of the `host_template` and `service_template` blocks +in the `opentsdb.conf` configuration file. + +An example OpenTSDB configuration file which makes use of custom tags: + +``` +object OpenTsdbWriter "opentsdb" { + host = "127.0.0.1" + port = 4242 + host_template = { + tags = { + location = "$host.vars.location$" + checkcommand = "$host.check_command$" + } + } + service_template = { + tags = { + location = "$host.vars.location$" + pingpackets = "$service.vars.ping_packets$" + checkcommand = "$service.check_command$" + } + } +} +``` + +Depending on what keyword the macro begins with, will determine what +attributes are available in the macro context. The below table explains +what attributes are available with links to each object type. + + start of macro | description + ---------------|------------------------------------------ + \$host...$ | Attributes available on a [Host object](09-object-types.md#objecttype-host) + \$service...$ | Attributes available on a [Service object](09-object-types.md#objecttype-service) + \$icinga...$ | Attributes available on the [IcingaApplication object](09-object-types.md#objecttype-icingaapplication) + +> **Note** +> +> Ensure you do not name your custom attributes with a dot in the name. +> Dots located inside a macro tell the interpreter to expand a +> dictionary. +> +> Do not do this in your object configuration: +> +> `vars["my.attribute"]` +> +> as you will be unable to reference `my.attribute` because it is not a +> dictionary. +> +> Instead, use underscores or another character: +> +> `vars.my_attribute` or `vars["my_attribute"]` + + + +#### OpenTSDB in Cluster HA Zones + +The OpenTSDB feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing events to the OpenTSDB listener. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes metrics, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that metrics are written even if the cluster fails. + +The recommended way of running OpenTSDB in this scenario is a dedicated server +where you have OpenTSDB running. + + +### Writing Performance Data Files + +PNP and Graphios use performance data collector daemons to fetch +the current performance files for their backend updates. + +Therefore the Icinga 2 [PerfdataWriter](09-object-types.md#objecttype-perfdatawriter) +feature allows you to define the output template format for host and services helped +with Icinga 2 runtime vars. + +``` +host_format_template = "DATATYPE::HOSTPERFDATA\tTIMET::$icinga.timet$\tHOSTNAME::$host.name$\tHOSTPERFDATA::$host.perfdata$\tHOSTCHECKCOMMAND::$host.check_command$\tHOSTSTATE::$host.state$\tHOSTSTATETYPE::$host.state_type$" +service_format_template = "DATATYPE::SERVICEPERFDATA\tTIMET::$icinga.timet$\tHOSTNAME::$host.name$\tSERVICEDESC::$service.name$\tSERVICEPERFDATA::$service.perfdata$\tSERVICECHECKCOMMAND::$service.check_command$\tHOSTSTATE::$host.state$\tHOSTSTATETYPE::$host.state_type$\tSERVICESTATE::$service.state$\tSERVICESTATETYPE::$service.state_type$" +``` + +The default templates are already provided with the Icinga 2 feature configuration +which can be enabled using + +```bash +icinga2 feature enable perfdata +``` + +By default all performance data files are rotated in a 15 seconds interval into +the `/var/spool/icinga2/perfdata/` directory as `host-perfdata.` and +`service-perfdata.`. +External collectors need to parse the rotated performance data files and then +remove the processed files. + +#### Perfdata Files in Cluster HA Zones + +The Perfdata feature supports [high availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-features) +in cluster zones since 2.11. + +By default, all endpoints in a zone will activate the feature and start +writing metrics to the local spool directory. In HA enabled scenarios, +it is possible to set `enable_ha = true` in all feature configuration +files. This allows each endpoint to calculate the feature authority, +and only one endpoint actively writes metrics, the other endpoints +pause the feature. + +When the cluster connection breaks at some point, the remaining endpoint(s) +in that zone will automatically resume the feature. This built-in failover +mechanism ensures that metrics are written even if the cluster fails. + +The recommended way of running Perfdata is to mount the perfdata spool +directory via NFS on a central server where PNP with the NPCD collector +is running on. + + + + + +## Deprecated Features + +### IDO Database (DB IDO) + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +The IDO (Icinga Data Output) feature for Icinga 2 takes care of exporting all +configuration and status information into a database. The IDO database is used +by Icinga Web 2 as data backend. You can either use a +[MySQL](#ido-with-mysql) or [PostgreSQL](#ido-with-postgresql) database. + +#### IDO with MySQL + +##### Install IDO Feature + +The next step is to install the `icinga2-ido-mysql` package using your +distribution's package manager. + +###### Debian / Ubuntu + +```bash +apt-get install icinga2-ido-mysql +``` + +!!! note + + The packages provide a database configuration wizard by + default. You can skip the automated setup and install/upgrade the + database manually if you prefer. + +###### CentOS 7 + +!!! info + + Note that installing `icinga2-ido-mysql` is only supported on CentOS 7 as CentOS 8 is EOL. + +```bash +yum install icinga2-ido-mysql +``` + +###### RHEL 8 + +```bash +dnf install icinga2-ido-mysql +``` + +###### RHEL 7 + +```bash +yum install icinga2-ido-mysql +``` + +###### SLES + +```bash +zypper install icinga2-ido-mysql +``` + +###### Amazon Linux 2 + +```bash +yum install icinga2-ido-mysql +``` + +##### Set up MySQL database + +Set up a MySQL database for Icinga 2: + +```bash +# mysql -u root -p + +CREATE DATABASE icinga; +GRANT ALTER, CREATE, SELECT, INSERT, UPDATE, DELETE, DROP, CREATE VIEW, INDEX, EXECUTE ON icinga.* TO 'icinga'@'localhost' IDENTIFIED BY 'icinga'; +quit +``` + +Please note that the example above uses the very simple password 'icinga' (in `IDENTIFIED BY 'icinga'`). +Please choose a better password for your installation. + +After creating the database you can import the Icinga 2 IDO schema using the +following command. Enter the icinga password into the prompt when asked. + +```bash +mysql -u icinga -p icinga < /usr/share/icinga2-ido-mysql/schema/mysql.sql +``` + +##### Enable the IDO MySQL feature + +The package provides a new configuration file that is installed in +`/etc/icinga2/features-available/ido-mysql.conf`. You can update +the database credentials in this file. + +All available attributes are explained in the +[IdoMysqlConnection object](09-object-types.md#objecttype-idomysqlconnection) +chapter. + +Enable the `ido-mysql` feature configuration file using the `icinga2` command: + +```bash +# icinga2 feature enable ido-mysql +Module 'ido-mysql' was enabled. +Make sure to restart Icinga 2 for these changes to take effect. +``` + +Restart Icinga 2. + +```bash +systemctl restart icinga2 +``` + +#### IDO with PostgreSQL + +##### Install IDO Feature + +The next step is to install the `icinga2-ido-pgsql` package using your +distribution's package manager. + +###### Debian / Ubuntu + +```bash +apt-get install icinga2-ido-pgsql +``` + +!!! note + + Upstream Debian packages provide a database configuration wizard by default. + You can skip the automated setup and install/upgrade the database manually + if you prefer that. + +###### CentOS 7 + +!!! info + + Note that installing `icinga2-ido-pgsql` is only supported on CentOS 7 as CentOS 8 is EOL. + +```bash +yum install icinga2-ido-pgsql +``` + +###### RHEL 8 + +```bash +dnf install icinga2-ido-pgsql +``` + +###### RHEL 7 + +```bash +yum install icinga2-ido-pgsql +``` + +###### SLES + +```bash +zypper install icinga2-ido-pgsql +``` + +###### Amazon Linux 2 + +```bash +yum install icinga2-ido-pgsql +``` + +##### Set up PostgreSQL database + +Set up a PostgreSQL database for Icinga 2: + +```bash +cd /tmp +sudo -u postgres psql -c "CREATE ROLE icinga WITH LOGIN PASSWORD 'icinga'" +sudo -u postgres createdb -O icinga -E UTF8 icinga +``` + +!!! note + + It is assumed here that your locale is set to utf-8, you may run into problems otherwise. + +Locate your `pg_hba.conf` configuration file and add the icinga user with `md5` as authentication method +and restart the postgresql server. Common locations for `pg_hba.conf` are either +`/etc/postgresql/*/main/pg_hba.conf` or `/var/lib/pgsql/data/pg_hba.conf`. + +``` +# icinga +local icinga icinga md5 +host icinga icinga 127.0.0.1/32 md5 +host icinga icinga ::1/128 md5 + +# "local" is for Unix domain socket connections only +local all all ident +# IPv4 local connections: +host all all 127.0.0.1/32 ident +# IPv6 local connections: +host all all ::1/128 ident +``` + +Restart PostgreSQL: + +```bash +systemctl restart postgresql +``` + +After creating the database and permissions you need to import the IDO database +schema using the following command: + +```bash +export PGPASSWORD=icinga +psql -U icinga -d icinga < /usr/share/icinga2-ido-pgsql/schema/pgsql.sql +``` + +##### Enable the IDO PostgreSQL feature + +The package provides a new configuration file that is installed in +`/etc/icinga2/features-available/ido-pgsql.conf`. You can update +the database credentials in this file. + +All available attributes are explained in the +[IdoPgsqlConnection object](09-object-types.md#objecttype-idopgsqlconnection) +chapter. + +Enable the `ido-pgsql` feature configuration file using the `icinga2` command: + +``` +# icinga2 feature enable ido-pgsql +Module 'ido-pgsql' was enabled. +Make sure to restart Icinga 2 for these changes to take effect. +``` + +Restart Icinga 2. + +```bash +systemctl restart icinga2 +``` + +#### Configuration + +Details on the configuration can be found in the +[IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection) and +[IdoPgsqlConnection](09-object-types.md#objecttype-idopgsqlconnection) +object configuration documentation. + +#### DB IDO Health + +If the monitoring health indicator is critical in Icinga Web 2, +you can use the following queries to manually check whether Icinga 2 +is actually updating the IDO database. + +Icinga 2 writes its current status to the `icinga_programstatus` table +every 10 seconds. The query below checks 60 seconds into the past which is a reasonable +amount of time -- adjust it for your requirements. If the condition is not met, +the query returns an empty result. + +> **Tip** +> +> Use [check plugins](05-service-monitoring.md#service-monitoring-plugins) to monitor the backend. + +Replace the `default` string with your instance name if different. + +Example for MySQL: + +``` +# mysql -u root -p icinga -e "SELECT status_update_time FROM icinga_programstatus ps + JOIN icinga_instances i ON ps.instance_id=i.instance_id + WHERE (UNIX_TIMESTAMP(ps.status_update_time) > UNIX_TIMESTAMP(NOW())-60) + AND i.instance_name='default';" + ++---------------------+ +| status_update_time | ++---------------------+ +| 2014-05-29 14:29:56 | ++---------------------+ +``` + +Example for PostgreSQL: + +``` +# export PGPASSWORD=icinga; psql -U icinga -d icinga -c "SELECT ps.status_update_time FROM icinga_programstatus AS ps + JOIN icinga_instances AS i ON ps.instance_id=i.instance_id + WHERE ((SELECT extract(epoch from status_update_time) FROM icinga_programstatus) > (SELECT extract(epoch from now())-60)) + AND i.instance_name='default'"; + +status_update_time +------------------------ + 2014-05-29 15:11:38+02 +(1 Zeile) +``` + +A detailed list on the available table attributes can be found in the [DB IDO Schema documentation](24-appendix.md#schema-db-ido). + +#### DB IDO in Cluster HA Zones + +The DB IDO feature supports [High Availability](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido) in +the Icinga 2 cluster. + +By default, both endpoints in a zone calculate the +endpoint which activates the feature, the other endpoint +automatically pauses it. If the cluster connection +breaks at some point, the paused IDO feature automatically +does a failover. + +You can disable this behaviour by setting `enable_ha = false` +in both feature configuration files. + +#### DB IDO Cleanup + +Objects get deactivated when they are deleted from the configuration. +This is visible with the `is_active` column in the `icinga_objects` table. +Therefore all queries need to join this table and add `WHERE is_active=1` as +condition. Deleted objects preserve their history table entries for later SLA +reporting. + +Historical data isn't purged by default. You can enable the least +kept data age inside the `cleanup` configuration attribute for the +IDO features [IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection) +and [IdoPgsqlConnection](09-object-types.md#objecttype-idopgsqlconnection). + +Example if you prefer to keep notification history for 30 days: + +``` + cleanup = { + notifications_age = 30d + contactnotifications_age = 30d + } +``` + +The historical tables are populated depending on the data `categories` specified. +Some tables are empty by default. + +#### DB IDO Tuning + +As with any application database, there are ways to optimize and tune the database performance. + +General tips for performance tuning: + +* [MariaDB KB](https://mariadb.com/kb/en/library/optimization-and-tuning/) +* [PostgreSQL Wiki](https://wiki.postgresql.org/wiki/Performance_Optimization) + +Re-creation of indexes, changed column values, etc. will increase the database size. Ensure to +add health checks for this, and monitor the trend in your Grafana dashboards. + +In order to optimize the tables, there are different approaches. Always keep in mind to have a +current backup and schedule maintenance downtime for these kind of tasks! + +MySQL: + +``` +mariadb> OPTIMIZE TABLE icinga_statehistory; +``` + +> **Important** +> +> Tables might not support optimization at runtime. This can take a **long** time. +> +> `Table does not support optimize, doing recreate + analyze instead`. + +If you want to optimize all tables in a specified database, there is a script called `mysqlcheck`. +This also allows to repair broken tables in the case of emergency. + +```bash +mysqlcheck --optimize icinga +``` + +PostgreSQL: + +``` +icinga=# vacuum; +VACUUM +``` + +> **Note** +> +> Don't use `VACUUM FULL` as this has a severe impact on performance. + +### Status Data Files + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Icinga 1.x writes object configuration data and status data in a cyclic +interval to its `objects.cache` and `status.dat` files. Icinga 2 provides +the `StatusDataWriter` object which dumps all configuration objects and +status updates in a regular interval. + +```bash +icinga2 feature enable statusdata +``` + +If you are not using any web interface or addon which uses these files, +you can safely disable this feature. + +### Compat Log Files + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +The Icinga 1.x log format is considered being the `Compat Log` +in Icinga 2 provided with the `CompatLogger` object. + +These logs are used for informational representation in +external web interfaces parsing the logs, but also to generate +SLA reports and trends. +The [Livestatus](14-features.md#setting-up-livestatus) feature uses these logs +for answering queries to historical tables. + +The `CompatLogger` object can be enabled with + +```bash +icinga2 feature enable compatlog +``` + +By default, the Icinga 1.x log file called `icinga.log` is located +in `/var/log/icinga2/compat`. Rotated log files are moved into +`var/log/icinga2/compat/archives`. + +### External Command Pipe + +> **Note** +> +> Please use the [REST API](12-icinga2-api.md#icinga2-api) as modern and secure alternative +> for external actions. + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Icinga 2 provides an external command pipe for processing commands +triggering specific actions (for example rescheduling a service check +through the web interface). + +In order to enable the `ExternalCommandListener` configuration use the +following command and restart Icinga 2 afterwards: + +```bash +icinga2 feature enable command +``` + +Icinga 2 creates the command pipe file as `/var/run/icinga2/cmd/icinga2.cmd` +using the default configuration. + +Web interfaces and other Icinga addons are able to send commands to +Icinga 2 through the external command pipe, for example for rescheduling +a forced service check: + +``` +# /bin/echo "[`date +%s`] SCHEDULE_FORCED_SVC_CHECK;localhost;ping4;`date +%s`" >> /var/run/icinga2/cmd/icinga2.cmd + +# tail -f /var/log/messages + +Oct 17 15:01:25 icinga-server icinga2: Executing external command: [1382014885] SCHEDULE_FORCED_SVC_CHECK;localhost;ping4;1382014885 +Oct 17 15:01:25 icinga-server icinga2: Rescheduling next check for service 'ping4' +``` + +A list of currently supported external commands can be found [here](24-appendix.md#external-commands-list-detail). + +Detailed information on the commands and their required parameters can be found +on the [Icinga 1.x documentation](https://docs.icinga.com/latest/en/extcommands2.html). + + +### Check Result Files + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +Icinga 1.x writes its check result files to a temporary spool directory +where they are processed in a regular interval. +While this is extremely inefficient in performance regards it has been +rendered useful for passing passive check results directly into Icinga 1.x +skipping the external command pipe. + +Several clustered/distributed environments and check-aggregation addons +use that method. In order to support step-by-step migration of these +environments, Icinga 2 supports the `CheckResultReader` object. + +There is no feature configuration available, but it must be defined +on-demand in your Icinga 2 objects configuration. + +``` +object CheckResultReader "reader" { + spool_dir = "/data/check-results" +} +``` + +### Livestatus + +> **Note** +> +> This feature is DEPRECATED and may be removed in future releases. +> Check the [roadmap](https://github.com/Icinga/icinga2/milestones). + +The [MK Livestatus](https://mathias-kettner.de/checkmk_livestatus.html) project +implements a query protocol that lets users query their Icinga instance for +status information. It can also be used to send commands. + +The Livestatus component that is distributed as part of Icinga 2 is a +re-implementation of the Livestatus protocol which is compatible with MK +Livestatus. + +> **Tip** +> +> Only install the Livestatus feature if your web interface or addon requires +> you to do so. +> [Icinga Web 2](https://icinga.com/docs/icinga-web-2/latest/doc/02-Installation/) does not need +> Livestatus. + +Details on the available tables and attributes with Icinga 2 can be found +in the [Livestatus Schema](24-appendix.md#schema-livestatus) section. + +You can enable Livestatus using icinga2 feature enable: + +```bash +icinga2 feature enable livestatus +``` + +After that you will have to restart Icinga 2: + +```bash +systemctl restart icinga2 +``` + +By default the Livestatus socket is available in `/var/run/icinga2/cmd/livestatus`. + +In order for queries and commands to work you will need to add your query user +(e.g. your web server) to the `icingacmd` group: + +```bash +usermod -a -G icingacmd www-data +``` + +The Debian packages use `nagios` as the user and group name. Make sure to change `icingacmd` to +`nagios` if you're using Debian. + +Change `www-data` to the user you're using to run queries. + +In order to use the historical tables provided by the livestatus feature (for example, the +`log` table) you need to have the `CompatLogger` feature enabled. By default these logs +are expected to be in `/var/log/icinga2/compat`. A different path can be set using the +`compat_log_path` configuration attribute. + +```bash +icinga2 feature enable compatlog +``` + +#### Livestatus Sockets + +Other to the Icinga 1.x Addon, Icinga 2 supports two socket types + +* Unix socket (default) +* TCP socket + +Details on the configuration can be found in the [LivestatusListener](09-object-types.md#objecttype-livestatuslistener) +object configuration. + +#### Livestatus GET Queries + +> **Note** +> +> All Livestatus queries require an additional empty line as query end identifier. +> The `nc` tool (`netcat`) provides the `-U` parameter to communicate using +> a unix socket. + +There also is a Perl module available in CPAN for accessing the Livestatus socket +programmatically: [Monitoring::Livestatus](https://metacpan.org/release/NIERLEIN/Monitoring-Livestatus-0.74) + + +Example using the unix socket: + +``` +# echo -e "GET services\n" | /usr/bin/nc -U /var/run/icinga2/cmd/livestatus + +Example using the tcp socket listening on port `6558`: + +# echo -e 'GET services\n' | netcat 127.0.0.1 6558 + +# cat servicegroups < + +A list of available external commands and their parameters can be found [here](24-appendix.md#external-commands-list-detail) + +```bash +echo -e 'COMMAND ' | netcat 127.0.0.1 6558 +``` + +#### Livestatus Filters + +and, or, negate + + Operator | Negate | Description + ----------|----------|------------- + = | != | Equality + ~ | !~ | Regex match + =~ | !=~ | Equality ignoring case + ~~ | !~~ | Regex ignoring case + < | | Less than + > | | Greater than + <= | | Less than or equal + >= | | Greater than or equal + + +#### Livestatus Stats + +Schema: "Stats: aggregatefunction aggregateattribute" + + Aggregate Function | Description + -------------------|-------------- + sum |   + min |   + max |   + avg | sum / count + std | standard deviation + suminv | sum (1 / value) + avginv | suminv / count + count | ordinary default for any stats query if not aggregate function defined + +Example: + +``` +GET hosts +Filter: has_been_checked = 1 +Filter: check_type = 0 +Stats: sum execution_time +Stats: sum latency +Stats: sum percent_state_change +Stats: min execution_time +Stats: min latency +Stats: min percent_state_change +Stats: max execution_time +Stats: max latency +Stats: max percent_state_change +OutputFormat: json +ResponseHeader: fixed16 +``` + +#### Livestatus Output + +* CSV + +CSV output uses two levels of array separators: The members array separator +is a comma (1st level) while extra info and host|service relation separator +is a pipe (2nd level). + +Separators can be set using ASCII codes like: + +``` +Separators: 10 59 44 124 +``` + +* JSON + +Default separators. + +#### Livestatus Error Codes + + Code | Description + ----------|-------------- + 200 | OK + 404 | Table does not exist + 452 | Exception on query + +#### Livestatus Tables + + Table | Join |Description + --------------|-----------|---------------------------- + hosts |   | host config and status attributes, services counter + hostgroups |   | hostgroup config, status attributes and host/service counters + services | hosts | service config and status attributes + servicegroups |   | servicegroup config, status attributes and service counters + contacts |   | contact config and status attributes + contactgroups |   | contact config, members + commands |   | command name and line + status |   | programstatus, config and stats + comments | services | status attributes + downtimes | services | status attributes + timeperiods |   | name and is inside flag + endpoints |   | config and status attributes + log | services, hosts, contacts, commands | parses [compatlog](09-object-types.md#objecttype-compatlogger) and shows log attributes + statehist | hosts, services | parses [compatlog](09-object-types.md#objecttype-compatlogger) and aggregates state change attributes + hostsbygroup | hostgroups | host attributes grouped by hostgroup and its attributes + servicesbygroup | servicegroups | service attributes grouped by servicegroup and its attributes + servicesbyhostgroup | hostgroups | service attributes grouped by hostgroup and its attributes + +The `commands` table is populated with `CheckCommand`, `EventCommand` and `NotificationCommand` objects. + +A detailed list on the available table attributes can be found in the [Livestatus Schema documentation](24-appendix.md#schema-livestatus). diff --git a/doc/15-troubleshooting.md b/doc/15-troubleshooting.md new file mode 100644 index 0000000..727c56c --- /dev/null +++ b/doc/15-troubleshooting.md @@ -0,0 +1,1914 @@ +# Icinga 2 Troubleshooting + +## Required Information + +Please ensure to provide any detail which may help reproduce and understand your issue. +Whether you ask on the [community channels](https://community.icinga.com) or you +create an issue at [GitHub](https://github.com/Icinga), make sure +that others can follow your explanations. If necessary, draw a picture and attach it for +better illustration. This is especially helpful if you are troubleshooting a distributed +setup. + +We've come around many community questions and compiled this list. Add your own +findings and details please. + +* Describe the expected behavior in your own words. +* Describe the actual behavior in one or two sentences. +* Ensure to provide general information such as: + * How was Icinga 2 installed (and which repository in case) and which distribution are you using + * `icinga2 --version` + * `icinga2 feature list` + * `icinga2 daemon -C` + * [Icinga Web 2](https://icinga.com/products/icinga-web-2/) version (screenshot from System - About) + * [Icinga Web 2 modules](https://icinga.com/products/icinga-web-2-modules/) e.g. the Icinga Director (optional) +* Configuration insights: + * Provide complete configuration snippets explaining your problem in detail + * Your [icinga2.conf](04-configuration.md#icinga2-conf) file + * If you run multiple Icinga 2 instances, the [zones.conf](04-configuration.md#zones-conf) file (or `icinga2 object list --type Endpoint` and `icinga2 object list --type Zone`) from all affected nodes. +* Logs + * Relevant output from your main and [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) in `/var/log/icinga2`. Please add step-by-step explanations with timestamps if required. + * The newest Icinga 2 crash log if relevant, located in `/var/log/icinga2/crash` +* Additional details + * If the check command failed, what's the output of your manual plugin tests? + * In case of [debugging](21-development.md#development) Icinga 2, the full back traces and outputs + +## Analyze your Environment + +There are many components involved on a server running Icinga 2. When you +analyze a problem, keep in mind that basic system administration knowledge +is also key to identify bottlenecks and issues. + +> **Tip** +> +> [Monitor Icinga 2](08-advanced-topics.md#monitoring-icinga) and use the hints for further analysis. + +* Analyze the system's performance and dentify bottlenecks and issues. +* Collect details about all applications (e.g. Icinga 2, MySQL, Apache, Graphite, Elastic, etc.). +* If data is exchanged via network (e.g. central MySQL cluster) ensure to monitor the bandwidth capabilities too. +* Add graphs from Grafana or Graphite as screenshots to your issue description + +Install tools which help you to do so. Opinions differ, let us know if you have any additions here! + +### Analyse your Linux/Unix Environment + +[htop](https://hisham.hm/htop/) is a better replacement for `top` and helps to analyze processes +interactively. + +```bash +yum install htop +apt-get install htop +``` + +If you are for example experiencing performance issues, open `htop` and take a screenshot. +Add it to your question and/or bug report. + +Analyse disk I/O performance in Grafana, take a screenshot and obfuscate any sensitive details. +Attach it when posting a question to the community channels. + +The [sysstat](https://github.com/sysstat/sysstat) package provides a number of tools to +analyze the performance on Linux. On FreeBSD you could use `systat` for example. + +```bash +yum install sysstat +apt-get install sysstat +``` + +Example for `vmstat` (summary of memory, processes, etc.): + +```bash +# summary +vmstat -s +# print timestamps, format in MB, stats every 1 second, 5 times +vmstat -t -S M 1 5 +``` + +Example for `iostat`: + +```bash +watch -n 1 iostat +``` + +Example for `sar`: + +```bash +sar # cpu +sar -r # ram +sar -q # load avg +sar -b # I/O +``` + +`sysstat` also provides the `iostat` binary. On FreeBSD you could use `systat` for example. + +If you are missing checks and metrics found in your analysis, add them to your monitoring! + +### Analyze your Windows Environment + +A good tip for Windows are the tools found inside the [Sysinternals Suite](https://technet.microsoft.com/en-us/sysinternals/bb842062.aspx). + +You can also start `perfmon` and analyze specific performance counters. +Keep notes which could be important for your monitoring, and add service +checks later on. + +> **Tip** +> +> Use an administrative Powershell to gain more insights. + +``` +cd C:\ProgramData\icinga2\var\log\icinga2 + +Get-Content .\icinga2.log -tail 10 -wait +``` + +## Enable Debug Output + +### Enable Debug Output on Linux/Unix + +Enable the `debuglog` feature: + +```bash +icinga2 feature enable debuglog +service icinga2 restart +``` + +The debug log file can be found in `/var/log/icinga2/debug.log`. + +You can tail the log files with an administrative shell: + +```bash +cd /var/log/icinga2 +tail -f debug.log +``` + +Alternatively you may run Icinga 2 in the foreground with debugging enabled. Specify the console +log severity as an additional parameter argument to `-x`. + +```bash +/usr/sbin/icinga2 daemon -x notice +``` + +The [log severity](09-object-types.md#objecttype-filelogger) can be one of `critical`, `warning`, `information`, `notice` +and `debug`. + +### Enable Debug Output on Windows + +Open a Powershell with administrative privileges and enable the debug log feature. + +``` +C:\> cd C:\Program Files\ICINGA2\sbin + +C:\Program Files\ICINGA2\sbin> .\icinga2.exe feature enable debuglog +``` + +Ensure that the Icinga 2 service already writes the main log into `C:\ProgramData\icinga2\var\log\icinga2`. +Restart the Icinga 2 service in an administrative Powershell and open the newly created `debug.log` file. + +``` +C:\> Restart-Service icinga2 + +C:\> Get-Service icinga2 +``` + +You can tail the log files with an administrative Powershell: + +``` +C:\> cd C:\ProgramData\icinga2\var\log\icinga2 + +C:\ProgramData\icinga2\var\log\icinga2> Get-Content .\debug.log -tail 10 -wait +``` + +## Configuration Troubleshooting + +### List Configuration Objects + +The `icinga2 object list` CLI command can be used to list all configuration objects and their +attributes. The tool also shows where each of the attributes was modified. + +> **Tip** +> +> Use the Icinga 2 API to access [config objects at runtime](12-icinga2-api.md#icinga2-api-config-objects) directly. + +That way you can also identify which objects have been created from your [apply rules](17-language-reference.md#apply). + +``` +# icinga2 object list + +Object 'localhost!ssh' of type 'Service': + * __name = 'localhost!ssh' + * check_command = 'ssh' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 5:3-5:23 + * check_interval = 60 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 24:3-24:21 + * host_name = 'localhost' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 4:3-4:25 + * max_check_attempts = 3 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 23:3-23:24 + * name = 'ssh' + * retry_interval = 30 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 25:3-25:22 + * templates = [ 'ssh', 'generic-service' ] + % += modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 1:0-7:1 + % += modified in '/etc/icinga2/conf.d/templates.conf', lines 22:1-26:1 + * type = 'Service' + * vars + % += modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 6:3-6:19 + * sla = '24x7' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 6:3-6:19 + +[...] +``` + +On Windows, use an administrative Powershell: + +``` +C:\> cd C:\Program Files\ICINGA2\sbin + +C:\Program Files\ICINGA2\sbin> .\icinga2.exe object list +``` + +You can also filter by name and type: + +``` +# icinga2 object list --name *ssh* --type Service +Object 'localhost!ssh' of type 'Service': + * __name = 'localhost!ssh' + * check_command = 'ssh' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 5:3-5:23 + * check_interval = 60 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 24:3-24:21 + * host_name = 'localhost' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 4:3-4:25 + * max_check_attempts = 3 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 23:3-23:24 + * name = 'ssh' + * retry_interval = 30 + % = modified in '/etc/icinga2/conf.d/templates.conf', lines 25:3-25:22 + * templates = [ 'ssh', 'generic-service' ] + % += modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 1:0-7:1 + % += modified in '/etc/icinga2/conf.d/templates.conf', lines 22:1-26:1 + * type = 'Service' + * vars + % += modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 6:3-6:19 + * sla = '24x7' + % = modified in '/etc/icinga2/conf.d/hosts/localhost/ssh.conf', lines 6:3-6:19 + +Found 1 Service objects. + +[2014-10-15 14:27:19 +0200] information/cli: Parsed 175 objects. +``` + +Runtime modifications via the [REST API](12-icinga2-api.md#icinga2-api-config-objects) +are not immediately updated. Furthermore there is a known issue with +[group assign expressions](17-language-reference.md#group-assign) which are not reflected in the host object output. +You need to restart Icinga 2 in order to update the `icinga2.debug` cache file. + +### Apply rules do not match + +You can analyze apply rules and matching objects by using the [script debugger](20-script-debugger.md#script-debugger). + +### Where are the check command definitions? + +Icinga 2 features a number of built-in [check command definitions](10-icinga-template-library.md#icinga-template-library) which are +included with + +``` +include +include +``` + +in the [icinga2.conf](04-configuration.md#icinga2-conf) configuration file. These files are not considered +configuration files and will be overridden on upgrade, so please send modifications as proposed patches upstream. +The default include path is set to `/usr/share/icinga2/includes` with the constant `IncludeConfDir`. + +You should add your own command definitions to a new file in `conf.d/` called `commands.conf` +or similar. + +### Configuration is ignored + +* Make sure that the line(s) are not [commented out](17-language-reference.md#comments) (starting with `//` or `#`, or +encapsulated by `/* ... */`). +* Is the configuration file included in [icinga2.conf](04-configuration.md#icinga2-conf)? + +Run the [configuration validation](11-cli-commands.md#config-validation) and add `notice` as log severity. +Search for the file which should be included i.e. using the `grep` CLI command. + +```bash +icinga2 daemon -C -x notice | grep command +``` + +### Configuration attributes are inherited from + +Icinga 2 allows you to import templates using the [import](17-language-reference.md#template-imports) keyword. If these templates +contain additional attributes, your objects will automatically inherit them. You can override +or modify these attributes in the current object. + +The [object list](15-troubleshooting.md#troubleshooting-list-configuration-objects) CLI command allows you to verify the attribute origin. + +### Configuration Value with Single Dollar Sign + +In case your configuration validation fails with a missing closing dollar sign error message, you +did not properly escape the single dollar sign preventing its usage as [runtime macro](03-monitoring-basics.md#runtime-macros). + +``` +critical/config: Error: Validation failed for Object 'ping4' (Type: 'Service') at /etc/icinga2/zones.d/global-templates/windows.conf:24: Closing $ not found in macro format string 'top-syntax=${list}'. +``` + +Correct the custom variable value to + +``` +"top-syntax=$${list}" +``` + + +## Checks Troubleshooting + +### Executed Command for Checks + +* Use the Icinga 2 API to [query](12-icinga2-api.md#icinga2-api-config-objects-query) host/service objects +for their check result containing the executed shell command. +* Use the Icinga 2 [console cli command](11-cli-commands.md#cli-command-console) +to fetch the checkable object, its check result and the executed shell command. +* Alternatively enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) and look for the executed command. + +Example for a service object query using a [regex match](18-library-reference.md#global-functions-regex) +on the name: + +``` +$ curl -k -s -u root:icinga -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' -X POST 'https://localhost:5665/v1/objects/services' \ +-d '{ "filter": "regex(pattern, service.name)", "filter_vars": { "pattern": "^http" }, "attrs": [ "__name", "last_check_result" ], "pretty": true }' +{ + "results": [ + { + "attrs": { + "__name": "example.localdomain!http", + "last_check_result": { + "active": true, + "check_source": "example.localdomain", + "command": [ + "/usr/local/sbin/check_http", + "-I", + "127.0.0.1", + "-u", + "/" + ], + + ... + + } + }, + "joins": {}, + "meta": {}, + "name": "example.localdomain!http", + "type": "Service" + } + ] +} +``` + +Alternatively when using the Director, navigate into the Service Detail View +in Icinga Web and pick `Inspect` to query the details. + +Example for using the `icinga2 console` CLI command evaluation functionality: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' \ +--eval 'get_service("example.localdomain", "http").last_check_result.command' | python -m json.tool +[ + "/usr/local/sbin/check_http", + "-I", + "127.0.0.1", + "-u", + "/" +] +``` + +Example for searching the debug log: + +```bash +icinga2 feature enable debuglog +systemctl restart icinga2 +tail -f /var/log/icinga2/debug.log | grep "notice/Process" +``` + + +### Checks are not executed + +* First off, decide whether the checks are executed locally, or remote in a distributed setup. + +If the master does not receive check results from the satellite, move your analysis to the satellite +and verify why the checks are not executed there. + +* Check the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) to see if the check command gets executed. +* Verify that failed dependencies do not prevent command execution. +* Make sure that the plugin is executable by the Icinga 2 user (run a manual test). +* Make sure the [checker](11-cli-commands.md#enable-features) feature is enabled. +* Use the Icinga 2 API [event streams](12-icinga2-api.md#icinga2-api-event-streams) to receive live check result streams. + +Test a plugin as icinga user. + +```bash +sudo -u icinga /usr/lib/nagios/plugins/check_ping -4 -H 127.0.0.1 -c 5000,100% -w 3000,80% +``` + +> **Note** +> +> **Never test plugins as root, but the icinga daemon user.** The environment and permissions differ. +> +> Also, the daemon user **does not** spawn a terminal shell (Bash, etc.) so it won't read anything from .bashrc +> and variants. The Icinga daemon only relies on sysconfig environment variables being set. + + +Enable the checker feature. + +``` +# icinga2 feature enable checker +The feature 'checker' is already enabled. +``` + +Fetch all check result events matching the `event.service` name `random`: + +```bash +curl -k -s -u root:icinga -H 'Accept: application/json' -X POST \ + 'https://localhost:5665/v1/events?queue=debugchecks&types=CheckResult&filter=match%28%22random*%22,event.service%29' +``` + + +### Analyze Check Source + +Sometimes checks are not executed on the remote host, but on the master and so on. +This could lead into unwanted results or NOT-OK states. + +The `check_source` attribute is the best indication where a check command +was actually executed. This could be a satellite with synced configuration +or a client as remote command bridge -- both will return the check source +as where the plugin is called. + +Example for retrieving the check source from all `disk` services using a +[regex match](18-library-reference.md#global-functions-regex) on the name: + +``` +$ curl -k -s -u root:icinga -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' -X POST 'https://localhost:5665/v1/objects/services' \ +-d '{ "filter": "regex(pattern, service.name)", "filter_vars": { "pattern": "^disk" }, "attrs": [ "__name", "last_check_result" ], "pretty": true }' +{ + "results": [ + { + "attrs": { + "__name": "icinga2-agent1.localdomain!disk", + "last_check_result": { + "active": true, + "check_source": "icinga2-agent1.localdomain", + + ... + + } + }, + "joins": {}, + "meta": {}, + "name": "icinga2-agent1.localdomain!disk", + "type": "Service" + } + ] +} +``` + +Alternatively when using the Director, navigate into the Service Detail View +in Icinga Web and pick `Inspect` to query the details. + +Example with the debug console: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' \ +--eval 'get_service("icinga2-agent1.localdomain", "disk").last_check_result.check_source' | python -m json.tool + +"icinga2-agent1.localdomain" +``` + + +### NSClient++ Check Errors with nscp-local + +The [nscp-local](10-icinga-template-library.md#nscp-check-local) CheckCommand object definitions call the local `nscp.exe` command. +If a Windows client service check fails to find the `nscp.exe` command, the log output would look like this: + +``` +Command ".\nscp.exe" "client" "-a" "drive=d" "-a" "show-all" "-b" "-q" "check_drivesize" failed to execute: 2, "The system cannot find the file specified." +``` + +or + +``` +Command ". +scp.exe" "client" "-a" "drive=d" "-a" "show-all" "-b" "-q" "check_drivesize" failed to execute: 2, "The system cannot find the file specified." +``` + +The above actually prints `.\\nscp.exe` where the escaped `\n` character gets interpreted as new line. + +Both errors lead to the assumption that the `NscpPath` constant is empty or set to a `.` character. +This could mean the following: + +* The command is **not executed on the Windows client**. Check the [check_source](15-troubleshooting.md#checks-check-source) attribute from the check result. +* You are using an outdated NSClient++ version (0.3.x or 0.4.x) which is not compatible with Icinga 2. +* You are using a custom NSClient++ installer which does not register the correct GUID for NSClient++ + +More troubleshooting: + +Retrieve the `NscpPath` constant on your Windows client: + +``` +C:\Program Files\ICINGA2\sbin\icinga2.exe variable get NscpPath +``` + +If the variable is returned empty, manually test how Icinga 2 would resolve +its path (this can be found inside the ITL): + +``` +C:\Program Files\ICINGA2\sbin\icinga2.exe console --eval "dirname(msi_get_component_path(\"{5C45463A-4AE9-4325-96DB-6E239C034F93}\"))" +``` + +If this command does not return anything, NSClient++ is not properly installed. +Verify that inside the `Programs and Features` (`appwiz.cpl`) control panel. + +You can run the bundled NSClient++ installer from the Icinga 2 Windows package. +The msi package is located in `C:\Program Files\ICINGA2\sbin`. + +The bundled NSClient++ version has properly been tested with Icinga 2. Keep that +in mind when using a different package. + + +### Check Thresholds Not Applied + +This could happen with [clients as command endpoint execution](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint). + +If you have for example a client host `icinga2-agent1.localdomain` +and a service `disk` check defined on the master, the warning and +critical thresholds are sometimes to applied and unwanted notification +alerts are raised. + +This happens because the client itself includes a host object with +its `NodeName` and a basic set of checks in the [conf.d](04-configuration.md#conf-d) +directory, i.e. `disk` with the default thresholds. + +Clients which have the `checker` feature enabled will attempt +to execute checks for local services and send their results +back to the master. + +If you now have the same host and service objects on the +master you will receive wrong check results from the client. + +Solution: + +* Disable the `checker` feature on clients: `icinga2 feature disable checker`. +* Remove the inclusion of [conf.d](04-configuration.md#conf-d) as suggested in the [client setup docs](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint). + +### Check Fork Errors + +Newer versions of systemd on Linux limit spawned processes for +services. + +* v227 introduces the `TasksMax` setting to units which allows to specify the spawned process limit. +* v228 adds `DefaultTasksMax` in the global `systemd-system.conf` with a default setting of 512 processes. +* v231 changes the default value to 15% + +This can cause problems with Icinga 2 in large environments with many +commands executed in parallel starting with systemd v228. Some distributions +also may have changed the defaults. + +The error message could look like this: + +``` +2017-01-12T11:55:40.742685+01:00 icinga2-master1 kernel: [65567.582895] cgroup: fork rejected by pids controller in /system.slice/icinga2.service +``` + +In order to solve the problem, increase the value for `DefaultTasksMax` +or set it to `infinity`. + +```bash +mkdir /etc/systemd/system/icinga2.service.d +cat >/etc/systemd/system/icinga2.service.d/limits.conf < + +Usually Icinga 2 is a mission critical part of infrastructure and should be +online at all times. In case of a recoverable crash (e.g. OOM) you may want to +restart Icinga 2 automatically. With systemd it is as easy as overriding some +settings of the Icinga 2 systemd service by creating +`/etc/systemd/system/icinga2.service.d/override.conf` with the following +content: + +``` +[Service] +Restart=always +RestartSec=1 +StartLimitInterval=10 +StartLimitBurst=3 +``` + +Using the watchdog can also help with monitoring Icinga 2, to activate and use it add the following to the override: + +``` +WatchdogSec=30s +``` + +This way systemd will kill Icinga 2 if it does not notify for over 30 seconds. A timeout of less than 10 seconds is not +recommended. When the watchdog is activated, `Restart=` can be set to `watchdog` to restart Icinga 2 in the case of a +watchdog timeout. + +Run `systemctl daemon-reload && systemctl restart icinga2` to apply the changes. +Now systemd will always try to restart Icinga 2 (except if you run +`systemctl stop icinga2`). After three failures in ten seconds it will stop +trying because you probably have a problem that requires manual intervention. + +### Late Check Results + +[Icinga Web 2](https://icinga.com/products/icinga-web-2/) provides +a dashboard overview for `overdue checks`. + +The REST API provides the [status](12-icinga2-api.md#icinga2-api-status) URL endpoint with some generic metrics +on Icinga and its features. + +```bash +curl -k -s -u root:icinga 'https://localhost:5665/v1/status?pretty=1' | less +``` + +You can also calculate late check results via the REST API: + +* Fetch the `last_check` timestamp from each object +* Compare the timestamp with the current time and add `check_interval` multiple times (change it to see which results are really late, like five times check_interval) + +You can use the [icinga2 console](11-cli-commands.md#cli-command-console) to connect to the instance, fetch all data +and calculate the differences. More infos can be found in [this blogpost](https://icinga.com/2016/08/11/analyse-icinga-2-problems-using-the-console-api/). + +``` +# ICINGA2_API_USERNAME=root ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://localhost:5665/' + +<1> => var res = []; for (s in get_objects(Service).filter(s => s.last_check < get_time() - 2 * s.check_interval)) { res.add([s.__name, DateTime(s.last_check).to_string()]) }; res + +[ [ "10807-host!10807-service", "2016-06-10 15:54:55 +0200" ], [ "mbmif.int.netways.de!disk /", "2016-01-26 16:32:29 +0100" ] ] +``` + +Or if you are just interested in numbers, call [len](18-library-reference.md#array-len) on the result array `res`: + +``` +<2> => var res = []; for (s in get_objects(Service).filter(s => s.last_check < get_time() - 2 * s.check_interval)) { res.add([s.__name, DateTime(s.last_check).to_string()]) }; res.len() + +2.000000 +``` + +If you need to analyze that problem multiple times, just add the current formatted timestamp +and repeat the commands. + +``` +<23> => DateTime(get_time()).to_string() + +"2017-04-04 16:09:39 +0200" + +<24> => var res = []; for (s in get_objects(Service).filter(s => s.last_check < get_time() - 2 * s.check_interval)) { res.add([s.__name, DateTime(s.last_check).to_string()]) }; res.len() + +8287.000000 +``` + +More details about the Icinga 2 DSL and its possibilities can be +found in the [language](17-language-reference.md#language-reference) and [library](18-library-reference.md#library-reference) reference chapters. + +### Late Check Results in Distributed Environments + +When it comes to a distributed HA setup, each node is responsible for a load-balanced amount of checks. +Host and Service objects provide the attribute `paused`. If this is set to `false`, the current node +actively attempts to schedule and execute checks. Otherwise the node does not feel responsible. + +``` +<3> => var res = {}; for (s in get_objects(Service).filter(s => s.last_check < get_time() - 2 * s.check_interval)) { res[s.paused] += 1 }; res +{ + @false = 2.000000 + @true = 1.000000 +} +``` + +You may ask why this analysis is important? Fair enough - if the numbers are not inverted in a HA zone +with two members, this may give a hint that the cluster nodes are in a split-brain scenario, or you've +found a bug in the cluster. + + +If you are running a cluster setup where the master/satellite executes checks on the client via +[top down command endpoint](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) mode, +you might want to know which zones are affected. + +This analysis assumes that clients which are not connected, have the string `connected` in their +service check result output and their state is `UNKNOWN`. + +``` +<4> => var res = {}; for (s in get_objects(Service)) { if (s.state==3) { if (match("*connected*", s.last_check_result.output)) { res[s.zone] += [s.host_name] } } }; for (k => v in res) { res[k] = len(v.unique()) }; res + +{ + Asia = 31.000000 + Europe = 214.000000 + USA = 207.000000 +} +``` + +The result set shows the configured zones and their affected hosts in a unique list. The output also just prints the numbers +but you can adjust this by omitting the `len()` call inside the for loop. + +## Notifications Troubleshooting + +### Notifications are not sent + +* Check the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) to see if a notification is triggered. +* If yes, verify that all conditions are satisfied. +* Are any errors on the notification command execution logged? + +Please ensure to add these details with your own description +to any question or issue posted to the community channels. + +Verify the following configuration: + +* Is the host/service `enable_notifications` attribute set, and if so, to which value? +* Do the [notification](09-object-types.md#objecttype-notification) attributes `states`, `types`, `period` match the notification conditions? +* Do the [user](09-object-types.md#objecttype-user) attributes `states`, `types`, `period` match the notification conditions? +* Are there any notification `begin` and `end` times configured? +* Make sure the [notification](11-cli-commands.md#enable-features) feature is enabled. +* Does the referenced NotificationCommand work when executed as Icinga user on the shell? + +If notifications are to be sent via mail, make sure that the mail program specified inside the +[NotificationCommand object](09-object-types.md#objecttype-notificationcommand) exists. +The name and location depends on the distribution so the preconfigured setting might have to be +changed on your system. + + +Examples: + +``` +# icinga2 feature enable notification +The feature 'notification' is already enabled. +``` + +```bash +icinga2 feature enable debuglog +systemctl restart icinga2 + +grep Notification /var/log/icinga2/debug.log > /root/analyze_notification_problem.log +``` + +You can use the Icinga 2 API [event streams](12-icinga2-api.md#icinga2-api-event-streams) to receive live notification streams: + +```bash +curl -k -s -u root:icinga -H 'Accept: application/json' -X POST 'https://localhost:5665/v1/events?queue=debugnotifications&types=Notification' +``` + + +### Analyze Notification Result + +> **Note** +> +> This feature is available since v2.11 and requires all endpoints +> being updated. + +Notifications inside a HA enabled zone are balanced between the endpoints, +just like checks. + +Sometimes notifications may fail, and with looking into the (debug) logs +for both masters, you cannot correlate this correctly. + +The `last_notification_result` runtime attribute is stored and synced for Notification +objects and can be queried via REST API. + +Example for retrieving the notification object and result from all `disk` services using a +[regex match](18-library-reference.md#global-functions-regex) on the name: + +``` +$ curl -k -s -u root:icinga -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' -X POST 'https://localhost:5665/v1/objects/notifications' \ +-d '{ "filter": "regex(pattern, service.name)", "filter_vars": { "pattern": "^disk" }, "attrs": [ "__name", "last_notification_result" ], "pretty": true }' +{ + "results": [ + + { + "attrs": { + "last_notification_result": { + "active": true, + "command": [ + "/etc/icinga2/scripts/mail-service-notification.sh", + "-4", + "", + "-6", + "", + "-b", + "", + "-c", + "", + "-d", + "2019-08-02 10:54:16 +0200", + "-e", + "disk", + "-l", + "icinga2-agent1.localdomain", + "-n", + "icinga2-agent1.localdomain", + "-o", + "DISK OK - free space: / 38108 MB (90.84% inode=100%);", + "-r", + "user@localdomain", + "-s", + "OK", + "-t", + "RECOVERY", + "-u", + "disk" + ], + "execution_end": 1564736056.186217, + "execution_endpoint": "icinga2-master1.localdomain", + "execution_start": 1564736056.132323, + "exit_status": 0.0, + "output": "", + "type": "NotificationResult" + } + }, + "joins": {}, + "meta": {}, + "name": "icinga2-agent1.localdomain!disk!mail-service-notification", + "type": "Notification" + } + +... + + ] +} +``` + +Example with the debug console: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' --eval 'get_object(Notification, "icinga2-agent1.localdomain!disk!mail-service-notification").last_notification_result.execution_endpoint' | jq + +"icinga2-agent1.localdomain" +``` + +Whenever a notification command failed to execute, you can fetch the output as well. + + +## Feature Troubleshooting + +### Feature is not working + +* Make sure that the feature configuration is enabled by symlinking from `features-available/` +to `features-enabled` and that the latter is included in [icinga2.conf](04-configuration.md#icinga2-conf). +* Are the feature attributes set correctly according to the documentation? +* Any errors on the logs? + +Look up the [object type](09-object-types.md#object-types) for the required feature and verify it is enabled: + +```bash +icinga2 object list --type +``` + +Example for the `graphite` feature: + +```bash +icinga2 object list --type GraphiteWriter +``` + +Look into the log and check whether the feature logs anything specific for this matter. + +```bash +grep GraphiteWriter /var/log/icinga2/icinga2.log +``` + +## REST API Troubleshooting + +In order to analyse errors on API requests, you can explicitly enable the [verbose parameter](12-icinga2-api.md#icinga2-api-parameters-global). + +``` +$ curl -k -s -u root:icinga -H 'Accept: application/json' -X DELETE 'https://localhost:5665/v1/objects/hosts/example-cmdb?pretty=1&verbose=1' +{ + "diagnostic_information": "Error: Object does not exist.\n\n ....", + "error": 404.0, + "status": "No objects found." +} +``` + +### REST API Troubleshooting: No Objects Found + +Please note that the `404` status with no objects being found can also originate +from missing or too strict object permissions for the authenticated user. + +This is a security feature to disable object name guessing. If this would not be the +case, restricted users would be able to get a list of names of your objects just by +trying every character combination. + +In order to analyse and fix the problem, please check the following: + +- use an administrative account with full permissions to check whether the objects are actually there. +- verify the permissions on the affected ApiUser object and fix them. + +### Missing Runtime Objects (Hosts, Downtimes, etc.) + +Runtime objects consume the internal config packages shared with +the REST API config packages. Each host, downtime, comment, service, etc. created +via the REST API is stored in the `_api` package. + +This includes downtimes and comments, which where sometimes stored in the wrong +directory path, because the active-stage file was empty/truncated/unreadable at +this point. + +Wrong: + +``` +/var/lib/icinga2/api/packages/_api//conf.d/downtimes/1234-5678-9012-3456.conf +``` + +Correct: + +``` +/var/lib/icinga2/api/packages/_api/dbe0bef8-c72c-4cc9-9779-da7c4527c5b2/conf.d/downtimes/1234-5678-9012-3456.conf +``` + +At creation time, the object lives in memory but its storage is broken. Upon restart, +it is missing and e.g. a missing downtime will re-enable unwanted notifications. + +`abcd-ef12-3456-7890` is the active stage name which wasn't correctly +read by the Icinga daemon. This information is stored in `/var/lib/icinga2/api/packages/_api/active-stage`. + +2.11 now limits the direct active-stage file access (this is hidden from the user), +and caches active stages for packages in-memory. + +It also tries to repair the broken package, and logs a new message: + +``` +systemctl restart icinga2 + +tail -f /var/log/icinga2/icinga2.log + +[2019-05-10 12:27:15 +0200] information/ConfigObjectUtility: Repairing config package '_api' with stage 'dbe0bef8-c72c-4cc9-9779-da7c4527c5b2'. +``` + +If this does not happen, you can manually fix the broken config package, and mark a deployed stage as active +again, carefully do the following steps with creating a backup before: + +Navigate into the API package prefix. + +```bash +cd /var/lib/icinga2/api/packages +``` + +Change into the broken package directory and list all directories and files +ordered by latest changes. + +``` +cd _api +ls -lahtr + +drwx------ 4 michi wheel 128B Mar 27 14:39 .. +-rw-r--r-- 1 michi wheel 25B Mar 27 14:39 include.conf +-rw-r--r-- 1 michi wheel 405B Mar 27 14:39 active.conf +drwx------ 7 michi wheel 224B Mar 27 15:01 dbe0bef8-c72c-4cc9-9779-da7c4527c5b2 +drwx------ 5 michi wheel 160B Apr 26 12:47 . +``` + +As you can see, the `active-stage` file is missing. When it is there, verify that its content +is set to the stage directory as follows. + +If you have more than one stage directory here, pick the latest modified +directory. Copy the directory name `abcd-ef12-3456-7890` and +add it into a new file `active-stage`. This can be done like this: + +```bash +echo "dbe0bef8-c72c-4cc9-9779-da7c4527c5b2" > active-stage +``` + +`active.conf` needs to have the correct active stage too, add it again +like this. Note: This is deep down in the code, use with care! + +```bash +sed -i 's/ActiveStages\["_api"\] = .*/ActiveStages\["_api"\] = "dbe0bef8-c72c-4cc9-9779-da7c4527c5b2"/g' /var/lib/icinga2/api/packages/_api/active.conf +``` + +Restart Icinga 2. + +```bash +systemctl restart icinga2 +``` + + +> **Note** +> +> The internal `_api` config package structure may change in the future. Do not modify +> things in there manually or with scripts unless guided here or asked by a developer. + + +## Certificate Troubleshooting + +Tools for analysing certificates and TLS connections: + +- `openssl` binary on Linux/Unix, `openssl.exe` on Windows ([download](https://slproweb.com/products/Win32OpenSSL.html)) +- `sslscan` tool, available [here](https://github.com/rbsec/sslscan) (Linux/Windows) + +Note: You can also execute sslscan on Windows using Powershell. + + +### Certificate Verification + +Whenever the TLS handshake fails when a client connects to the cluster or the REST API, +ensure to verify the used certificates. + +Print the CA and client certificate and ensure that the following attributes are set: + +* Version must be 3. +* Serial number is a hex-encoded string. +* Issuer should be your certificate authority (defaults to `Icinga CA` for all certificates generated by CLI commands and automated signing requests). +* Validity: The certificate must not be expired. +* Subject with the common name (CN) matches the client endpoint name and its FQDN. +* v3 extensions must set the basic constraint for `CA:TRUE` (ca.crt) or `CA:FALSE` (client certificate). +* Subject Alternative Name is set to the resolvable DNS name (required for REST API and browsers). + +Navigate into the local certificate store: + +```bash +cd /var/lib/icinga2/certs/ +``` + +Make sure to verify the agents' certificate and its stored `ca.crt` in `/var/lib/icinga2/certs` and ensure that +all instances (master, satellite, agent) are signed by the **same CA**. + +Compare the `ca.crt` file from the agent node and compare it to your master's `ca.crt` file. + + +Since 2.12, you can use the built-in CLI command `pki verify` to perform TLS certificate validation tasks. + +> **Hint** +> +> The CLI command uses exit codes aligned to the [Plugin API specification](05-service-monitoring.md#service-monitoring-plugin-api). +> Run the commands followed with `echo $?` to see the exit code. + +These CLI commands can be used on Windows agents too without requiring the OpenSSL binary. + +#### Print TLS Certificate + +Pass the certificate file to the `--cert` CLI command parameter to print its details. +This prints a shorter version of `openssl x509 -in -text`. + +``` +$ icinga2 pki verify --cert icinga2-agent2.localdomain.crt + +information/cli: Printing certificate 'icinga2-agent2.localdomain.crt' + + Version: 3 + Subject: CN = icinga2-agent2.localdomain + Issuer: CN = Icinga CA + Valid From: Feb 14 11:29:36 2020 GMT + Valid Until: Feb 10 11:29:36 2035 GMT + Serial: 12:fe:a6:22:f5:e3:db:a2:95:8e:92:b2:af:1a:e3:01:44:c4:70:e0 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: icinga2-agent2.localdomain + Fingerprint: 40 98 A0 77 58 4F CA D1 05 AC 18 53 D7 52 8D D7 9C 7F 5A 23 B4 AF 63 A4 92 9D DC FF 89 EF F1 4C +``` + +You can also print the `ca.crt` certificate without any further checks using the `--cert` parameter. + +#### Print and Verify CA Certificate + +The `--cacert` CLI parameter allows to check whether the given certificate file is a public CA certificate. + +``` +$ icinga2 pki verify --cacert ca.crt + +information/cli: Checking whether certificate 'ca.crt' is a valid CA certificate. + + Version: 3 + Subject: CN = Icinga CA + Issuer: CN = Icinga CA + Valid From: Jul 31 12:26:08 2019 GMT + Valid Until: Jul 27 12:26:08 2034 GMT + Serial: 89:fe:d6:12:66:25:3a:c5:07:c1:eb:d4:e6:f2:df:ca:13:6e:dc:e7 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: + Fingerprint: 9A 11 29 A8 A3 89 F8 56 30 1A E4 0A B2 6B 28 46 07 F0 14 17 BD 19 A4 FC BD 41 40 B5 1A 8F BF 20 + +information/cli: OK: CA certificate file 'ca.crt' was verified successfully. +``` + +In case you pass a wrong certificate, an error is shown and the exit code is `2` (Critical). + +``` +$ icinga2 pki verify --cacert icinga2-agent2.localdomain.crt + +information/cli: Checking whether certificate 'icinga2-agent2.localdomain.crt' is a valid CA certificate. + + Version: 3 + Subject: CN = icinga2-agent2.localdomain + Issuer: CN = Icinga CA + Valid From: Feb 14 11:29:36 2020 GMT + Valid Until: Feb 10 11:29:36 2035 GMT + Serial: 12:fe:a6:22:f5:e3:db:a2:95:8e:92:b2:af:1a:e3:01:44:c4:70:e0 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: icinga2-agent2.localdomain + Fingerprint: 40 98 A0 77 58 4F CA D1 05 AC 18 53 D7 52 8D D7 9C 7F 5A 23 B4 AF 63 A4 92 9D DC FF 89 EF F1 4C + +critical/cli: CRITICAL: The file 'icinga2-agent2.localdomain.crt' does not seem to be a CA certificate file. +``` + +#### Verify Certificate is signed by CA Certificate + +Pass the certificate file to the `--cert` CLI parameter, and the `ca.crt` file to the `--cacert` parameter. +Common troubleshooting scenarios involve self-signed certificates and untrusted agents resulting in disconnects. + +``` +$ icinga2 pki verify --cert icinga2-agent2.localdomain.crt --cacert ca.crt + +information/cli: Verifying certificate 'icinga2-agent2.localdomain.crt' + + Version: 3 + Subject: CN = icinga2-agent2.localdomain + Issuer: CN = Icinga CA + Valid From: Feb 14 11:29:36 2020 GMT + Valid Until: Feb 10 11:29:36 2035 GMT + Serial: 12:fe:a6:22:f5:e3:db:a2:95:8e:92:b2:af:1a:e3:01:44:c4:70:e0 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: icinga2-agent2.localdomain + Fingerprint: 40 98 A0 77 58 4F CA D1 05 AC 18 53 D7 52 8D D7 9C 7F 5A 23 B4 AF 63 A4 92 9D DC FF 89 EF F1 4C + +information/cli: with CA certificate 'ca.crt'. + + Version: 3 + Subject: CN = Icinga CA + Issuer: CN = Icinga CA + Valid From: Jul 31 12:26:08 2019 GMT + Valid Until: Jul 27 12:26:08 2034 GMT + Serial: 89:fe:d6:12:66:25:3a:c5:07:c1:eb:d4:e6:f2:df:ca:13:6e:dc:e7 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: + Fingerprint: 9A 11 29 A8 A3 89 F8 56 30 1A E4 0A B2 6B 28 46 07 F0 14 17 BD 19 A4 FC BD 41 40 B5 1A 8F BF 20 + +information/cli: OK: Certificate with CN 'icinga2-agent2.localdomain' is signed by CA. +``` + +#### Verify Certificate matches Common Name (CN) + +This allows to verify the common name inside the certificate with a given string parameter. +Typical troubleshooting involve upper/lower case CNs (Windows). + +``` +$ icinga2 pki verify --cert icinga2-agent2.localdomain.crt --cn icinga2-agent2.localdomain + +information/cli: Verifying common name (CN) 'icinga2-agent2.localdomain in certificate 'icinga2-agent2.localdomain.crt'. + + Version: 3 + Subject: CN = icinga2-agent2.localdomain + Issuer: CN = Icinga CA + Valid From: Feb 14 11:29:36 2020 GMT + Valid Until: Feb 10 11:29:36 2035 GMT + Serial: 12:fe:a6:22:f5:e3:db:a2:95:8e:92:b2:af:1a:e3:01:44:c4:70:e0 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: icinga2-agent2.localdomain + Fingerprint: 40 98 A0 77 58 4F CA D1 05 AC 18 53 D7 52 8D D7 9C 7F 5A 23 B4 AF 63 A4 92 9D DC FF 89 EF F1 4C + +information/cli: OK: CN 'icinga2-agent2.localdomain' matches certificate CN 'icinga2-agent2.localdomain'. +``` + +In the example below, the certificate uses an upper case CN. + +``` +$ icinga2 pki verify --cert icinga2-agent2.localdomain.crt --cn icinga2-agent2.localdomain + +information/cli: Verifying common name (CN) 'icinga2-agent2.localdomain in certificate 'icinga2-agent2.localdomain.crt'. + + Version: 3 + Subject: CN = ICINGA2-agent2.localdomain + Issuer: CN = Icinga CA + Valid From: Feb 14 11:29:36 2020 GMT + Valid Until: Feb 10 11:29:36 2035 GMT + Serial: 12:fe:a6:22:f5:e3:db:a2:95:8e:92:b2:af:1a:e3:01:44:c4:70:e0 + + Signature Algorithm: sha256WithRSAEncryption + Subject Alt Names: ICINGA2-agent2.localdomain + Fingerprint: 40 98 A0 77 58 4F CA D1 05 AC 18 53 D7 52 8D D7 9C 7F 5A 23 B4 AF 63 A4 92 9D DC FF 89 EF F1 4C + +critical/cli: CRITICAL: CN 'icinga2-agent2.localdomain' does NOT match certificate CN 'icinga2-agent2.localdomain'. +``` + + + +### Certificate Signing + +Icinga offers two methods: + +* [CSR Auto-Signing](06-distributed-monitoring.md#distributed-monitoring-setup-csr-auto-signing) which uses a client (an agent or a satellite) ticket generated on the master as trust identifier. +* [On-Demand CSR Signing](06-distributed-monitoring.md#distributed-monitoring-setup-on-demand-csr-signing) which allows to sign pending certificate requests on the master. + +Whenever a signed certificate is not received on the requesting clients, ensure to check the following: + +* The ticket was valid and the master's log shows nothing different (CSR Auto-Signing only) +* If the agent/satellite is directly connected to the CA master, check whether the master actually has performance problems to process the request. If the connection is closed without certificate response, analyse the master's health. It is also advised to upgrade to v2.11 where network stack problems have been fixed. +* If you're using a 3+ level cluster, check whether the satellite really forwarded the CSR signing request and the master processed it. + +Other common errors: + +* The generated ticket is invalid. The client receives this error message, as well as the master logs a warning message. +* The [api](09-object-types.md#objecttype-apilistener) feature does not have the `ticket_salt` attribute set to the generated `TicketSalt` constant by the CLI wizards. + +In case you are using On-Demand CSR Signing, `icinga2 ca list` on the master only lists +pending requests since v2.11. Add `--all` to also see signed requests. Keep in mind that +old requests are purged after 1 week automatically. + + +### TLS Handshake: Ciphers + +Starting with v2.11, the default configured ciphers have been hardened to modern +standards. This includes TLS v1.2 as minimum protocol version too. + +In case the TLS handshake fails with `no shared cipher`, first analyse whether both +instances support the same ciphers. + +#### Client connects to Server + +Connect using `openssl s_client` and try to reproduce the connection problem. + +> **Important** +> +> The endpoint with the server role **accepting** the connection picks the preferred +> cipher. E.g. when a satellite connects to the master, the master chooses the cipher. +> +> Keep this in mind where to simulate the client role connecting to a server with +> CLI tools such as `openssl s_client`. + + +`openssl s_client` tells you about the supported and shared cipher suites +on the remote server. `openssl ciphers` lists locally available ciphers. + +``` +$ openssl s_client -connect 192.168.33.5:5665 +... + +--- +SSL handshake has read 2899 bytes and written 786 bytes +--- +New, TLSv1/SSLv3, Cipher is AES256-GCM-SHA384 +Server public key is 4096 bit +Secure Renegotiation IS supported +Compression: NONE +Expansion: NONE +No ALPN negotiated +SSL-Session: + Protocol : TLSv1.2 + Cipher : AES256-GCM-SHA384 + +... +``` + +You can specifically use one cipher or a list with the `-cipher` parameter: + +```bash +openssl s_client -connect 192.168.33.5:5665 -cipher 'ECDHE-RSA-AES256-GCM-SHA384' +``` + +In order to fully simulate a connecting client, provide the certificates too: + +```bash +CERTPATH='/var/lib/icinga2/certs' +HOSTNAME='icinga2.vagrant.demo.icinga.com' +openssl s_client -connect 192.168.33.5:5665 -cert "${CERTPATH}/${HOSTNAME}.crt" -key "${CERTPATH}/${HOSTNAME}.key" -CAfile "${CERTPATH}/ca.crt" -cipher 'ECDHE-RSA-AES256-GCM-SHA384' +``` + +In case to need to change the default cipher list, +set the [cipher_list](09-object-types.md#objecttype-apilistener) attribute +in the `api` feature configuration accordingly. + +Beware of using insecure ciphers, this may become a +security risk in your organisation. + +#### Server Accepts Client + +If the master node does not actively connect to the satellite/agent node(s), but instead +the child node actively connectsm, you can still simulate a TLS handshake. + +Use `openssl s_server` instead of `openssl s_client` on the master during the connection +attempt. + +```bash +openssl s_server -connect 192.168.56.101:5665 +``` + +Since the server role chooses the preferred cipher suite in Icinga, +you can test-drive the "agent connects to master" mode here, granted that +the TCP connection is not blocked by the firewall. + + +#### Cipher Scan Tools + +You can also use different tools to test the available cipher suites, this is what SSL Labs, etc. +provide for TLS enabled websites as well. [This post](https://superuser.com/questions/109213/how-do-i-list-the-ssl-tls-cipher-suites-a-particular-website-offers) +highlights some tools and scripts such as [sslscan](https://github.com/rbsec/sslscan) or [testssl.sh](https://github.com/drwetter/testssl.sh/) + +Example for sslscan on macOS against a Debian 10 Buster instance +running v2.11: + +``` +$ brew install sslscan + +$ sslscan 192.168.33.22:5665 +Version: 1.11.13-static +OpenSSL 1.0.2f 28 Jan 2016 + +Connected to 192.168.33.22 + +Testing SSL server 192.168.33.22 on port 5665 using SNI name 192.168.33.22 + + TLS Fallback SCSV: +Server supports TLS Fallback SCSV + + TLS renegotiation: +Session renegotiation not supported + + TLS Compression: +Compression disabled + + Heartbleed: +TLS 1.2 not vulnerable to heartbleed +TLS 1.1 not vulnerable to heartbleed +TLS 1.0 not vulnerable to heartbleed + + Supported Server Cipher(s): +Preferred TLSv1.2 256 bits ECDHE-RSA-AES256-GCM-SHA384 Curve P-256 DHE 256 +Accepted TLSv1.2 128 bits ECDHE-RSA-AES128-GCM-SHA256 Curve P-256 DHE 256 +Accepted TLSv1.2 256 bits ECDHE-RSA-AES256-SHA384 Curve P-256 DHE 256 +Accepted TLSv1.2 128 bits ECDHE-RSA-AES128-SHA256 Curve P-256 DHE 256 + + SSL Certificate: +Signature Algorithm: sha256WithRSAEncryption +RSA Key Strength: 4096 + +Subject: icinga2-debian10.vagrant.demo.icinga.com +Altnames: DNS:icinga2-debian10.vagrant.demo.icinga.com +Issuer: Icinga CA + +Not valid before: Jul 12 07:39:55 2019 GMT +Not valid after: Jul 8 07:39:55 2034 GMT +``` + +## Distributed Troubleshooting + +This applies to any Icinga 2 node in a [distributed monitoring setup](06-distributed-monitoring.md#distributed-monitoring-scenarios). + +You should configure the [cluster health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) if you haven't +done so already. + +> **Note** +> +> Some problems just exist due to wrong file permissions or applied packet filters. Make +> sure to check these in the first place. + +### Cluster Troubleshooting Connection Errors + +General connection errors could be one of the following problems: + +* Incorrect network configuration +* Packet loss +* Firewall rules preventing traffic + +Use tools like `netstat`, `tcpdump`, `nmap`, etc. to make sure that the cluster communication +works (default port is `5665`). + +```bash +tcpdump -n port 5665 -i any + +netstat -tulpen | grep icinga + +nmap icinga2-agent1.localdomain +``` + +### Cluster Troubleshooting TLS Errors + +If the cluster communication fails with TLS/SSL error messages, make sure to check +the following + +* File permissions on the TLS certificate files +* Does the used CA match for all cluster endpoints? + * Verify the `Issuer` being your trusted CA + * Verify the `Subject` containing your endpoint's common name (CN) + * Check the validity of the certificate itself + +Try to manually connect from `icinga2-agent1.localdomain` to the master node `icinga2-master1.localdomain`: + +``` +$ openssl s_client -CAfile /var/lib/icinga2/certs/ca.crt -cert /var/lib/icinga2/certs/icinga2-agent1.localdomain.crt -key /var/lib/icinga2/certs/icinga2-agent1.localdomain.key -connect icinga2-master1.localdomain:5665 + +CONNECTED(00000003) +--- +... +``` + +If the connection attempt fails or your CA does not match, [verify the certificates](15-troubleshooting.md#troubleshooting-certificate-verification). + + +#### Cluster Troubleshooting Unauthenticated Clients + +Unauthenticated nodes are able to connect. This is required for agent/satellite setups. + +Master: + +``` +[2015-07-13 18:29:25 +0200] information/ApiListener: New client connection for identity 'icinga2-agent1.localdomain' (unauthenticated) +``` + +Agent as command execution bridge: + +``` +[2015-07-13 18:29:26 +1000] notice/ClusterEvents: Discarding 'execute command' message from 'icinga2-master1.localdomain': Invalid endpoint origin (client not allowed). +``` + +If these messages do not go away, make sure to [verify the master and agent certificates](15-troubleshooting.md#troubleshooting-certificate-verification). + + +### Cluster Troubleshooting Message Errors + +When the network connection is broken or gone, the Icinga 2 instances will be disconnected. +If the connection can't be re-established between endpoints in the same HA zone, +they remain in a Split-Brain-mode and history may differ. + +Although the Icinga 2 cluster protocol stores historical events in a [replay log](15-troubleshooting.md#troubleshooting-cluster-replay-log) +for later synchronisation, you should make sure to check why the network connection failed. + +Ensure to setup [cluster health checks](06-distributed-monitoring.md#distributed-monitoring-health-checks) +to monitor all endpoints and zones connectivity. + + +### Cluster Troubleshooting Command Endpoint Errors + +Command endpoints can be used [for agents](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint) +as well as inside an [High-Availability cluster](06-distributed-monitoring.md#distributed-monitoring-scenarios). + +There is no CLI command for manually executing the check, but you can verify +the following (e.g. by invoking a forced check from the web interface): + +* `/var/log/icinga2/icinga2.log` shows connection and execution errors. + * The ApiListener is not enabled to [accept commands](06-distributed-monitoring.md#distributed-monitoring-top-down-command-endpoint). This is visible as `UNKNOWN` check result output. + * `CheckCommand` definition not found on the remote client. This is visible as `UNKNOWN` check result output. + * Referenced check plugin not found on the remote agent. + * Runtime warnings and errors, e.g. unresolved runtime macros or configuration problems. +* Specific error messages are also populated into `UNKNOWN` check results including a detailed error message in their output. +* Verify the [check source](15-troubleshooting.md#checks-check-source). This is populated by the node executing the check. You can see that in Icinga Web's detail view or by querying the REST API for this checkable object. + +Additional tasks: + +* More verbose logs are found inside the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output). + +* Use the Icinga 2 API [event streams](12-icinga2-api.md#icinga2-api-event-streams) to receive live check result streams. + +Fetch all check result events matching the `event.service` name `remote-client`: + +```bash +curl -k -s -u root:icinga -H 'Accept: application/json' -X POST 'https://localhost:5665/v1/events?queue=debugcommandendpoint&types=CheckResult&filter=match%28%22remote-client*%22,event.service%29' +``` + + +#### Agent Hosts with Command Endpoint require a Zone + +2.11 fixes bugs where agent host checks would never be scheduled on +the master. One requirement is that the checkable host/service +is put into a zone. + +By default, the Director puts the agent host in `zones.d/master` +and you're good to go. If you manually manage the configuration, +the config compiler now throws an error with `command_endpoint` +being set but no `zone` defined. + +In case you previously managed the configuration outside of `zones.d`, +follow along with the following instructions. + +The most convenient way with e.g. managing the objects in `conf.d` +is to move them into the `master` zone. + +First, verify the name of your endpoint's zone. The CLI wizards +use `master` by default. + +``` +vim /etc/icinga2/zones.conf + +object Zone "master" { + ... +} +``` + +Then create a new directory in `zones.d` called `master`, if not existing. + +```bash +mkdir -p /etc/icinga2/zones.d/master +``` + +Now move the directory tree from `conf.d` into the `master` zone. + +```bash +mv conf.d/* /etc/icinga2/zones.d/master/ +``` + +Validate the configuration and reload Icinga. + +```bash +icinga2 daemon -C +systemctl restart icinga2 +``` + +Another method is to specify the `zone` attribute manually, but since +this may lead into other unwanted "not checked" scenarios, we don't +recommend this for your production environment. + +### Cluster Troubleshooting Config Sync + +In order to troubleshoot this, remember the key things with the config sync: + +* Within a config master zone, only one configuration master is allowed to have its config in `/etc/icinga2/zones.d`. + * The config master copies the zone configuration from `/etc/icinga2/zones.d` to `/var/lib/icinga2/api/zones`. This storage is the same for all cluster endpoints, and the source for all config syncs. + * The config master puts the `.authoritative` marker on these zone files locally. This is to ensure that it doesn't receive config updates from other endpoints. If you have copied the content from `/var/lib/icinga2/api/zones` to another node, ensure to remove them. +* During startup, the master validates the entire configuration and only syncs valid configuration to other zone endpoints. + +Satellites/Agents < 2.11 store the received configuration directly in `/var/lib/icinga2/api/zones`, validating it and reloading the daemon. +Satellites/Agents >= 2.11 put the received configuration into the staging directory `/var/lib/icinga2/api/zones-stage` first, and will only copy this to the production directory `/var/lib/icinga2/api/zones` once the validation was successful. + +The configuration sync logs the operations during startup with the `information` severity level. Received zone configuration is also logged. + +Typical errors are: + +* The api feature doesn't [accept config](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync). This is logged into `/var/lib/icinga2/icinga2.log`. +* The received configuration zone is not configured in [zones.conf](04-configuration.md#zones-conf) and Icinga denies it. This is logged into `/var/lib/icinga2/icinga2.log`. +* The satellite/agent has local configuration in `/etc/icinga2/zones.d` and thinks it is authoritive for this zone. It then denies the received update. Purge the content from `/etc/icinga2/zones.d`, `/var/lib/icinga2/api/zones/*` and restart Icinga to fix this. + +#### New configuration does not trigger a reload + +The debug/notice log dumps the calculated checksums for all files and the comparison. Analyse this to troubleshoot further. + +A complete sync for the `director-global` global zone can look like this: + +``` +[2019-08-01 09:20:25 +0200] notice/JsonRpcConnection: Received 'config::Update' message from 'icinga2-master1.localdomain' +[2019-08-01 09:20:25 +0200] information/ApiListener: Applying config update from endpoint 'icinga2-master1.localdomain' of zone 'master'. +[2019-08-01 09:20:25 +0200] notice/ApiListener: Creating config update for file '/var/lib/icinga2/api/zones/director-global/.checksums'. +[2019-08-01 09:20:25 +0200] notice/ApiListener: Creating config update for file '/var/lib/icinga2/api/zones/director-global/.timestamp'. +[2019-08-01 09:20:25 +0200] notice/ApiListener: Creating config update for file '/var/lib/icinga2/api/zones/director-global/director/001-director-basics.conf'. +[2019-08-01 09:20:25 +0200] notice/ApiListener: Creating config update for file '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf'. +[2019-08-01 09:20:25 +0200] information/ApiListener: Received configuration for zone 'director-global' from endpoint 'icinga2-master1.localdomain'. Comparing the checksums. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Checking for config change between stage and production. Old (4): '{"/.checksums":"c4dd1237e36dcad9142f4d9a81324a7cae7d01543a672299 +b8c1bb08b629b7d1","/.timestamp":"f21c0e6551328812d9f5176e5e31f390de0d431d09800a85385630727b404d83","/director/001-director-basics.conf":"f86583eec81c9bf3a1823a761991fb53d640bd0dc +6cd12bf8c5e6a275359970f","/director/host_templates.conf":"831e9b7e3ec1e33288e56a51e63c688da1d6316155349382a101f7fce6229ecc"}' vs. new (4): '{"/.checksums":"c4dd1237e36dcad9142f4d +9a81324a7cae7d01543a672299b8c1bb08b629b7d1","/.timestamp":"f21c0e6551328812d9f5176e5e31f390de0d431d09800a85385630727b404d83","/director/001-director-basics.conf":"f86583eec81c9bf +3a1823a761991fb53d640bd0dc6cd12bf8c5e6a275359970f","/director/host_templates.conf":"831e9b7e3ec1e33288e56a51e63c688da1d6316155349382a101f7fce6229ecc"}'. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Ignoring old internal file '/.checksums'. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Ignoring old internal file '/.timestamp'. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Checking /director/001-director-basics.conf for old checksum: f86583eec81c9bf3a1823a761991fb53d640bd0dc6cd12bf8c5e6a275359970f. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Checking /director/host_templates.conf for old checksum: 831e9b7e3ec1e33288e56a51e63c688da1d6316155349382a101f7fce6229ecc. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Ignoring new internal file '/.checksums'. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Ignoring new internal file '/.timestamp'. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Checking /director/001-director-basics.conf for new checksum: f86583eec81c9bf3a1823a761991fb53d640bd0dc6cd12bf8c5e6a275359970f. +[2019-08-01 09:20:25 +0200] debug/ApiListener: Checking /director/host_templates.conf for new checksum: 831e9b7e3ec1e33288e56a51e63c688da1d6316155349382a101f7fce6229ecc. +[2019-08-01 09:20:25 +0200] information/ApiListener: Stage: Updating received configuration file '/var/lib/icinga2/api/zones-stage/director-global//director/001-director-basics.c +onf' for zone 'director-global'. +[2019-08-01 09:20:25 +0200] information/ApiListener: Stage: Updating received configuration file '/var/lib/icinga2/api/zones-stage/director-global//director/host_templates.conf' +for zone 'director-global'. +[2019-08-01 09:20:25 +0200] information/ApiListener: Applying configuration file update for path '/var/lib/icinga2/api/zones-stage/director-global' (2209 Bytes). + +... + +[2019-08-01 09:20:25 +0200] information/ApiListener: Received configuration updates (4) from endpoint 'icinga2-master1.localdomain' are different to production, triggering validation and reload. +[2019-08-01 09:20:25 +0200] notice/Process: Running command '/usr/lib/x86_64-linux-gnu/icinga2/sbin/icinga2' '--no-stack-rlimit' 'daemon' '--close-stdio' '-e' '/var/log/icinga2/e +rror.log' '--validate' '--define' 'System.ZonesStageVarDir=/var/lib/icinga2/api/zones-stage/': PID 4532 +[2019-08-01 09:20:25 +0200] notice/Process: PID 4532 ('/usr/lib/x86_64-linux-gnu/icinga2/sbin/icinga2' '--no-stack-rlimit' 'daemon' '--close-stdio' '-e' '/var/log/icinga2/error.l +og' '--validate' '--define' 'System.ZonesStageVarDir=/var/lib/icinga2/api/zones-stage/') terminated with exit code 0 +[2019-08-01 09:20:25 +0200] information/ApiListener: Config validation for stage '/var/lib/icinga2/api/zones-stage/' was OK, replacing into '/var/lib/icinga2/api/zones/' and trig +gering reload. +[2019-08-01 09:20:26 +0200] information/ApiListener: Copying file 'director-global//.checksums' from config sync staging to production zones directory. +[2019-08-01 09:20:26 +0200] information/ApiListener: Copying file 'director-global//.timestamp' from config sync staging to production zones directory. +[2019-08-01 09:20:26 +0200] information/ApiListener: Copying file 'director-global//director/001-director-basics.conf' from config sync staging to production zones directory. +[2019-08-01 09:20:26 +0200] information/ApiListener: Copying file 'director-global//director/host_templates.conf' from config sync staging to production zones directory. + +... + +[2019-08-01 09:20:26 +0200] notice/Application: Got reload command, forwarding to umbrella process (PID 4236) +``` + +In case the received configuration updates are equal to what is running in production, a different message is logged and the validation/reload is skipped. + +``` +[2020-02-05 15:18:19 +0200] information/ApiListener: Received configuration updates (4) from endpoint 'icinga2-master1.localdomain' are equal to production, skipping validation and reload. +``` + + +#### Syncing Binary Files is Denied + +The config sync is built for syncing text configuration files, wrapped into JSON-RPC messages. +Some users have started to use this as binary file sync instead of using tools built for this: +rsync, git, Puppet, Ansible, etc. + +Starting with 2.11, this attempt is now prohibited and logged. + +``` +[2019-08-02 16:03:19 +0200] critical/ApiListener: Ignoring file '/etc/icinga2/zones.d/global-templates/forbidden.exe' for cluster config sync: Does not contain valid UTF8. Binary files are not supported. +Context: + (0) Creating config update for file '/etc/icinga2/zones.d/global-templates/forbidden.exe' + (1) Activating object 'api' of type 'ApiListener' +``` + +In order to solve this problem, remove the mentioned files from `zones.d` and use an alternate way +of syncing plugin binaries to your satellites and agents. + + +#### Zones in Zones doesn't work + +The cluster config sync works in the way that configuration +put into `/etc/icinga2/zones.d` only is included when configured +outside in `/etc/icinga2/zones.conf`. + +If you for example create a "Zone Inception" with defining the +`satellite` zone in `zones.d/master`, the config compiler does not +re-run and include this zone config recursively from `zones.d/satellite`. + +Since v2.11, the config compiler is only including directories where a +zone has been configured. Otherwise it would include renamed old zones, +broken zones, etc. and those long-lasting bugs have been now fixed. + +A more concrete example: Masters and Satellites still need to know the Zone hierarchy outside of `zones.d` synced configuration. + +**Doesn't work** + +``` +vim /etc/icinga2/zones.conf + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} +``` + +``` +vim /etc/icinga2/zones.d/master/satellite-zones.conf + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite1.localdomain" ] +} +``` + +``` +vim /etc/icinga2/zones.d/satellite/satellite-hosts.conf + +object Host "agent" { ... } +``` + +The `agent` host object will never reach the satellite, since the master does not have +the `satellite` zone configured outside of zones.d. + + +**Works** + +Each instance needs to know this, and know about the endpoints first: + +``` +vim /etc/icinga2/zones.conf + +object Endpoint "icinga2-master1.localdomain" { ... } +object Endpoint "icinga2-master2.localdomain" { ... } + +object Endpoint "icinga2-satellite1.localdomain" { ... } +object Endpoint "icinga2-satellite2.localdomain" { ... } +``` + +Then the zone hierarchy as trust and also config sync inclusion is required. + +``` +vim /etc/icinga2/zones.conf + +object Zone "master" { + endpoints = [ "icinga2-master1.localdomain", "icinga2-master2.localdomain" ] +} + +object Zone "satellite" { + endpoints = [ "icinga2-satellite1.localdomain", "icinga2-satellite1.localdomain" ] +} +``` + +Once done, you can start deploying actual monitoring objects into the satellite zone. + +``` +vim /etc/icinga2/zones.d/satellite/satellite-hosts.conf + +object Host "agent" { ... } +``` + +That's also explained and described in the [documentation](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents). + +The thing you can do: For `command_endpoint` agents like inside the Director: +Host -> Agent -> yes, there is no config sync for this zone in place. Therefore +it is valid to just sync their zones via the config sync. + +#### Director Changes + +The following restores the Zone/Endpoint objects as config objects outside of `zones.d` +in your master/satellite's zones.conf with rendering them as external objects in the Director. + +[Example](06-distributed-monitoring.md#distributed-monitoring-scenarios-master-satellite-agents) +for a 3 level setup with the masters and satellites knowing about the zone hierarchy +outside defined in [zones.conf](04-configuration.md#zones-conf): + +``` +object Endpoint "icinga-master1.localdomain" { + //define 'host' attribute to control the connection direction on each instance +} + +object Endpoint "icinga-master2.localdomain" { + //... +} + +object Endpoint "icinga-satellite1.localdomain" { + //... +} + +object Endpoint "icinga-satellite2.localdomain" { + //... +} + +//-------------- +// Zone hierarchy with endpoints, required for the trust relationship and that the cluster config sync knows which zone directory defined in zones.d needs to be synced to which endpoint. +// That's no different to what is explained in the docs as basic zone trust hierarchy, and is intentionally managed outside in zones.conf there. + +object Zone "master" { + endpoints = [ "icinga-master1.localdomain", "icinga-master2.localdomain" ] +} + +object Zone "satellite" { + endpoints = [ "icinga-satellite1.localdomain", "icinga-satellite2.localdomain" ] + parent = "master" // trust +} +``` + +Prepare the above configuration on all affected nodes, satellites are likely uptodate already. +Then continue with the steps below. + +> * backup your database, just to be on the safe side +> * create all non-external Zone/Endpoint-Objects on all related Icinga Master/Satellite-Nodes (manually in your local zones.conf) +> * while doing so please do NOT restart Icinga, no deployments +> * change the type in the Director DB: +> +> ```sql +> UPDATE icinga_zone SET object_type = 'external_object' WHERE object_type = 'object'; +> UPDATE icinga_endpoint SET object_type = 'external_object' WHERE object_type = 'object'; +> ``` +> +> * render and deploy a new configuration in the Director. It will state that there are no changes. Ignore it, deploy anyways +> +> That's it. All nodes should automatically restart, triggered by the deployed configuration via cluster protocol. + + +### Cluster Troubleshooting Overdue Check Results + +If your master does not receive check results (or any other events) from the child zones +(satellite, clients, etc.), make sure to check whether the client sending in events +is allowed to do so. + +> **Tip** +> +> General troubleshooting hints on late check results are documented [here](15-troubleshooting.md#late-check-results). + +The [distributed monitoring conventions](06-distributed-monitoring.md#distributed-monitoring-conventions) +apply. So, if there's a mismatch between your client node's endpoint name and its provided +certificate's CN, the master will deny all events. + +> **Tip** +> +> [Icinga Web 2](https://icinga.com/docs/icinga-web-2/latest/doc/01-About/) provides a dashboard view +> for overdue check results. + +Enable the [debug log](15-troubleshooting.md#troubleshooting-enable-debug-output) on the master +for more verbose insights. + +If the client cannot authenticate, it's a more general [problem](15-troubleshooting.md#troubleshooting-cluster-unauthenticated-clients). + +The client's endpoint is not configured on nor trusted by the master node: + +``` +Discarding 'check result' message from 'icinga2-agent1.localdomain': Invalid endpoint origin (client not allowed). +``` + +The check result message sent by the client does not belong to the zone the checkable object is +in on the master: + +``` +Discarding 'check result' message from 'icinga2-agent1.localdomain': Unauthorized access. +``` + + +### Cluster Troubleshooting Replay Log + +If your `/var/lib/icinga2/api/log` directory grows, it generally means that your cluster +cannot replay the log on connection loss and re-establishment. A master node for example +will store all events for not connected endpoints in the same and child zones. + +Check the following: + +* All clients are connected? (e.g. [cluster health check](06-distributed-monitoring.md#distributed-monitoring-health-checks)). +* Check your [connection](15-troubleshooting.md#troubleshooting-cluster-connection-errors) in general. +* Does the log replay work, e.g. are all events processed and the directory gets cleared up over time? +* Decrease the `log_duration` attribute value for that specific [endpoint](09-object-types.md#objecttype-endpoint). + +The cluster health checks also measure the `slave_lag` metric. Use this data to correlate +graphs with other events (e.g. disk I/O, network problems, etc). + + +### Cluster Troubleshooting: Windows Agents + + +#### Windows Service Exe Path + +Icinga agents can be installed either as x86 or x64 package. If you enable features, or wonder why +logs are not written, the first step is to analyse which path the Windows service `icinga2` is using. + +Start a new administrative Powershell and ensure that the `icinga2` service is running. + +``` +C:\Program Files\ICINGA2\sbin> net start icinga2 +``` + +Use the `Get-WmiObject` function to extract the windows service and its path name. + +``` +C:\Program Files\ICINGA2\sbin> Get-WmiObject win32_service | ?{$_.Name -like '*icinga*'} | select Name, DisplayName, State, PathName + +Name DisplayName State PathName +---- ----------- ----- -------- +icinga2 Icinga 2 Running "C:\Program Files\ICINGA2\sbin\icinga2.exe" --scm "daemon" +``` + +If you have used the `icinga2.exe` from a different path to enable e.g. the `debuglog` feature, +navigate into `C:\Program Files\ICINGA2\sbin\` and use the correct exe to control the feature set. + + +#### Windows Agents consuming 100% CPU + +> **Note** +> +> The network stack was rewritten in 2.11. This fixes several hanging connections and threads +> on older Windows agents and master/satellite nodes. Prior to testing the below, plan an upgrade. + +Icinga 2 requires the `NodeName` [constant](17-language-reference.md#constants) in various places to run. +This includes loading the TLS certificates, setting the proper check source, +and so on. + +Typically the Windows setup wizard and also the CLI commands populate the [constants.conf](04-configuration.md#constants-conf) +file with the auto-detected or user-provided FQDN/Common Name. + +If this constant is not set during startup, Icinga will try to resolve the +FQDN, if that fails, fetch the hostname. If everything fails, it logs +an error and sets this to `localhost`. This results in undefined behaviour +if ignored by the admin. + +Querying the DNS when not reachable is CPU consuming, and may look like Icinga +is doing lots of checks, etc. but actually really is just starting up. + +In order to fix this, edit the `constants.conf` file and populate +the `NodeName` constant with the FQDN. Ensure this is the same value +as the local endpoint object name. + +``` +const NodeName = "windows-agent1.domain.com" +``` + + + +#### Windows blocking Icinga 2 with ephemeral port range + +When you see a message like this in your Windows agent logs: + +``` +critical/TcpSocket: Invalid socket: 10055, "An operation on a socket could not be performed because the system lacked sufficient buffer space or because a queue was full." +``` + +Windows is blocking Icinga 2 and as such, no more TCP connection handling is possible. + +Depending on the version, patch level and installed applications, Windows is changing its +range of [ephemeral ports](https://en.wikipedia.org/wiki/Ephemeral_port#Range). + +In order to solve this, raise the `MaxUserPort` value in the registry. + +``` +HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters + +Value Name: MaxUserPort Value +Type: DWORD +Value data: 65534 +``` + +More details in [this blogpost](https://www.netways.de/blog/2019/01/24/windows-blocking-icinga-2-with-ephemeral-port-range/) +and this [MS help entry](https://support.microsoft.com/en-us/help/196271/when-you-try-to-connect-from-tcp-ports-greater-than-5000-you-receive-t). diff --git a/doc/16-upgrading-icinga-2.md b/doc/16-upgrading-icinga-2.md new file mode 100644 index 0000000..f90e611 --- /dev/null +++ b/doc/16-upgrading-icinga-2.md @@ -0,0 +1,950 @@ +# Upgrading Icinga 2 + +Upgrading Icinga 2 is usually quite straightforward. +Ordinarily the only manual steps involved +are scheme updates for the IDO database. + +Specific version upgrades are described below. Please note that version +updates are incremental. An upgrade from v2.6 to v2.8 requires to +follow the instructions for v2.7 too. + +## Upgrading to v2.13 + +### DB IDO Schema Update + +There is an optional schema update on MySQL which increases the max length of object names from 128 to 255 characters. + +Please proceed here for the [MySQL upgrading docs](16-upgrading-icinga-2.md#upgrading-mysql-db). + +### Behavior changes + +#### Deletion of child downtimes on services + +Service downtimes created while using the `all_services` flag on the [schedule-downtime](12-icinga2-api.md#schedule-downtime) API action +will now automatically be deleted when deleting the hosts downtime. + +#### Windows Event Log + +Icinga 2.13 now supports logging to the Windows Event Log. Icinga will now also log messages from the early +startup phase to the Windows Event Log. These were previously missing from the log file and you could only +see them by manually starting Icinga in the foreground. + +This feature is now enabled and replaces the existing mainlog feature logging to a file. When upgrading, the installer +will enable the windowseventlog feature and disable the mainlog feature. Logging to a file is still possible. +If you don't want this configuration migration on upgrade, you can opt-out by installing +the `%ProgramData%\icinga2\etc\icinga2\features-available\windowseventlog.conf` file before upgrading to Icinga 2.13. + +#### Broken API package name validation + +This version has replaced a broken regex in the API package validation code which results in package names +now being validated correctly. Package names should now only consist of alphanumeric characters, dashes and underscores. + +This change only applies to newly created packages to support already existing ones. + +## Upgrading to v2.12 + +* CLI + * New `pki verify` CLI command for better [TLS certificate troubleshooting](15-troubleshooting.md#troubleshooting-certificate-verification) + +### Behavior changes + +The behavior of multi parent [dependencies](03-monitoring-basics.md#dependencies) was fixed to e.g. render hosts unreachable when both router uplinks are down. + +Previous behaviour: + +1) parentHost1 DOWN, parentHost2 UP => childHost **not reachable** +2) parentHost1 DOWN, parentHost2 DOWN => childHost **not reachable** + +New behavior: + +1) parentHost1 DOWN, parentHost2 UP => childHost **reachable** +2) parentHost1 DOWN, parentHost2 DOWN => childHost **not reachable** + +Please review your [Dependency](09-object-types.md#objecttype-dependency) configuration as 1) may lead to +different results for + +- `last_reachable` via REST API query +- Notifications not suppressed by faulty reachability calculation anymore + +### Breaking changes + +As of v2.12 our [API](12-icinga2-api.md) URL endpoint [`/v1/actions/acknowledge-problem`](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) refuses acknowledging an already acknowledged checkable by overwriting the acknowledgement. +To replace an acknowledgement you have to remove the old one before adding the new one. + +The deprecated parameters `--cert` and `--key` for the `pki save-cert` CLI command +have been removed from the command and documentation. + +## Upgrading to v2.11 + +### Bugfixes for 2.11 + +2.11.1 on agents/satellites fixes a problem where 2.10.x as config master would send out an unwanted config marker file, +thus rendering the agent to think it is autoritative for the config, and never accepting any new +config files for the zone(s). **If your config master is 2.11.x already, you are not affected by this problem.** + +In order to fix this, upgrade to at least 2.11.1, and purge away the local config sync storage once, then restart. + +```bash +yum install icinga2 + +rm -rf /var/lib/icinga2/api/zones/* +rm -rf /var/lib/icinga2/api/zones-stage/* + +systemctl restart icinga2 +``` + +2.11.2 fixes a problem where the newly introduced config sync "check-change-then-reload" functionality +could cause endless reload loops with agents. The most visible parts are failing command endpoint checks +with "not connected" UNKNOWN state. **Only applies to HA enabled zones with 2 masters and/or 2 satellites.** + +In order to fix this, upgrade all agents/satellites to at least 2.11.2 and restart them. + +### Packages + +EOL distributions where no packages are available with this release: + +* SLES 11 +* Ubuntu 14 LTS +* RHEL/CentOS 6 x86 + +Raspbian Packages are available inside the `icinga-buster` repository +on [https://packages.icinga.com](https://packages.icinga.com/raspbian/). +Please note that Stretch is not supported suffering from compiler +regressions. Upgrade to Raspbian Buster is highly recommended. + +#### Added: Boost 1.66+ + +The rewrite of our core network stack for cluster and REST API +requires newer Boost versions, specifically >= 1.66. For technical +details, please continue reading in [this issue](https://github.com/Icinga/icinga2/issues/7041). + +Distribution | Repository providing Boost Dependencies +---------------------|------------------------------------- +CentOS 7 | [EPEL repository](02-installation.md#centos-repository) +RHEL 7 | [EPEL repository](02-installation.md#rhel-repository) +RHEL/CentOS 6 x64 | [packages.icinga.com](https://packages.icinga.com) +Fedora | Fedora Upstream +Debian 10 Buster | Debian Upstream +Debian 9 Stretch | [Backports repository](02-installation.md#debian-backports-repository) **New since 2.11** +Debian 8 Jessie | [packages.icinga.com](https://packages.icinga.com) +Ubuntu 18 Bionic | [packages.icinga.com](https://packages.icinga.com) +Ubuntu 16 Xenial | [packages.icinga.com](https://packages.icinga.com) +SLES 15 | SUSE Upstream +SLES 12 | [packages.icinga.com](https://packages.icinga.com) (replaces the SDK repository requirement) + +On platforms where EPEL or Backports cannot satisfy this dependency, +we provide Boost as package on our [package repository](https://packages.icinga.com) +for your convenience. + +After upgrade, you may remove the old Boost packages (1.53 or anything above) +if you don't need them anymore. + +#### Added: .NET Framework 4.6 + +We modernized the graphical Windows wizard to use the more recent .NET Framework 4.6. This requires that Windows versions +older than Windows 10/Windows Server 2016 installs at least [.NET Framework 4.6](https://www.microsoft.com/en-US/download/details.aspx?id=53344). Starting with Windows 10/Windows Server 2016 a .NET Framework 4.6 or higher is installed by default. + +The MSI-Installer package checks if the .NET Framework 4.6 or higher is present, if not the installation wizard will abort with an error message telling you to install at least .NET Framework 4.6. + +#### Removed: YAJL + +Our JSON library, namely [YAJL](https://github.com/lloyd/yajl), isn't maintained anymore +and may cause [crashes](https://github.com/Icinga/icinga2/issues/6684). + +It is replaced by [JSON for Modern C++](https://github.com/nlohmann/json) by Niels Lohmann +and compiled into the binary as header only include. It helps our way to C++11 and allows +to fix additional UTF8 issues more easily. Read more about its [design goals](https://github.com/nlohmann/json#design-goals) +and [benchmarks](https://github.com/miloyip/nativejson-benchmark#parsing-time). + +### Core + +#### Reload Handling + +2.11 provides fixes for unwanted notifications during restarts. +The updated systemd service file now uses the `KillMode=mixed` setting. + +The reload handling was improved with an umbrella process, which means +that normal runtime operations include **3 processes**. You may need to +adjust the local instance monitoring of the [procs](08-advanced-topics.md#monitoring-icinga) check. + +More details can be found in the [technical concepts](19-technical-concepts.md#technical-concepts-core-reload) chapter. + +#### Downtime Notifications + +Imagine that a host/service changes to a HARD NOT-OK state, +and its check interval is set to a high interval e.g. 1 hour. + +A maintenance downtime prevents the notification being sent, +but once it ends and the host/service is still in a downtime, +no immediate notification is re-sent but you'll have to wait +for the next check. + +Another scenario is with one-shot notifications (interval=0) +which would never notify again after the downtime ends and +the problem state being intact. The state change logic requires +to recover and become HARD NOT-OK to notify again. + +In order to solve these problems with filtered/suppressed notifications +in downtimes, v2.11 changes the behaviour like this: + +- If there was a notification suppressed in a downtime, the core stores that information +- Once the downtime ends and the problem state is still intact, Icinga checks whether a re-notification should be sent immediately + +A new cluster message was added to keep this in sync amongst HA masters. + +> **Important** +> +> In order to properly use this new feature, all involved endpoints +> must be upgraded to v2.11. + +### Network Stack + +The core network stack has been rewritten in 2.11 (some say this could be Icinga 3). + +You can read the full story [here](https://github.com/Icinga/icinga2/issues/7041). + +The only visible changes for users are: + +- No more dead-locks with hanging TLS connections (Cluster, REST API) +- Better log messages in error cases +- More robust and stable with using external libraries instead of self-written socket I/O + +Coming with this release, we've also updated TLS specific requirements +explained below. + +#### TLS 1.2 + +v2.11 raises the minimum required TLS version to 1.2. +This is available since OpenSSL 1.0.1 (EL6 & Debian Jessie). + +Older Icinga satellites/agents need to support TLS 1.2 during the TLS +handshake. + +The `api` feature attribute `tls_protocolmin` now only supports the +value `TLSv1.2` being the default. + +#### Hardened Cipher List + +The previous default cipher list allowed weak ciphers. There's no sane way +other than explicitly setting the allowed ciphers. + +The new default sets this to: + +``` +ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384:AES128-GCM-SHA256 +``` + +You can override this setting in the [api](09-object-types.md#objecttype-apilistener) +feature with the `cipher_list` attribute. + +In case that one of these ciphers is marked as insecure in the future, +please let us know with an issue on GitHub. + +### Cluster + +#### Agent Hosts with Command Endpoint require a Zone + +2.11 fixes bugs where agent host checks would never be scheduled on +the master. One definite requirement is that the checkable host/service +is put into a zone. + +By default, the Director puts the agent host in `zones.d/master` +and you're good to go. If you manually manage the configuration, +the config compiler now throws an error with `command_endpoint` +being set but no `zone` defined. + +The most convenient way with e.g. managing the objects in `conf.d` +is to move them into the `master` zone. Please continue in the +[troubleshooting docs](15-troubleshooting.md#troubleshooting-cluster-command-endpoint-errors-agent-hosts-command-endpoint-zone) +for further instructions. + +#### Config Sync + +2.11 overhauls the cluster config sync in many ways. This includes the +following under the hood: + +- Synced configuration files are not immediately put into production, but left inside a stage. +- Unsuccessful config validation never puts the config into production, additional logging and API states are available. +- Zone directories which are not configured in zones.conf, are not included anymore on secondary master/satellites/clients. +- Synced config change calculation use checksums instead of timestamps to trigger validation/reload. This is more safe, and the usage of timestamps is now deprecated. +- Don't allow parallel cluster syncs to avoid race conditions with overridden files. +- Deleted directories and files are now purged, previous versions had a bug. + +Whenever a newer child endpoint receives a configuration update without +checksums, it will log a warning. + +``` +Received configuration update without checksums from parent endpoint satellite1. This behaviour is deprecated. Please upgrade the parent endpoint to 2.11+ +``` + +This is a gentle reminder to upgrade the master and satellites first, +prior to installing new clients/agents. + +Technical details are available in the [technical concepts](19-technical-concepts.md#technical-concepts-cluster-config-sync) chapter. + +Since the config sync change detection now uses checksums, this may fail +with anything else than syncing configuration text files. Syncing binary +files were never supported, but rumors say that some users do so. + +This is now prohibited and logged. + +``` +[2019-08-02 16:03:19 +0200] critical/ApiListener: Ignoring file '/etc/icinga2/zones.d/global-templates/forbidden.exe' for cluster config sync: Does not contain valid UTF8. Binary files are not supported. +Context: + (0) Creating config update for file '/etc/icinga2/zones.d/global-templates/forbidden.exe' + (1) Activating object 'api' of type 'ApiListener' +``` + +Such binaries wrapped into JSON-RPC cluster messages may always cause changes +and trigger reload loops. In order to prevent such harm in production, +use infrastructure tools such as Foreman, Puppet, Ansible, etc. to install +plugins on the masters, satellites and agents. + +##### Config Sync: Zones in Zones + +The cluster config sync works in the way that configuration +put into `/etc/icinga2/zones.d` only is included when configured +outside in `/etc/icinga2/zones.conf`. + +If you for example create a "Zone Inception" with defining the +`satellite` zone in `zones.d/master`, the config compiler does not +re-run and include this zone config recursively from `zones.d/satellite`. + +Since v2.11, the config compiler is only including directories where a +zone has been configured. Otherwise it would include renamed old zones, +broken zones, etc. and those long-lasting bugs have been now fixed. + +Please consult the [troubleshoot docs](15-troubleshooting.md#troubleshooting-cluster-config-zones-in-zones) +for concrete examples and solutions. + +#### HA-aware Features + +v2.11 introduces additional HA functionality similar to the DB IDO feature. +This enables the feature being active only on one endpoint while the other +endpoint is paused. When one endpoint is shut down, automatic failover happens. + +This feature is turned off by default keeping the current behaviour. If you need +it active on just one endpoint, set `enable_ha = true` on both endpoints in the +feature configuration. + +This affects the following features: + +* [Elasticsearch](09-object-types.md#objecttype-elasticsearchwriter) +* [Gelf](09-object-types.md#objecttype-gelfwriter) +* [Graphite](09-object-types.md#objecttype-graphitewriter) +* [InfluxDB](09-object-types.md#objecttype-influxdbwriter) +* [OpenTsdb](09-object-types.md#objecttype-opentsdbwriter) +* [Perfdata](09-object-types.md#objecttype-perfdatawriter) (for PNP) + +### HA Failover + +The reconnect failover has been improved, and the default `failover_timeout` +for the DB IDO features has been lowered from 60 to 30 seconds. +Object authority updates (required for balancing in the cluster) happen +more frequenty (was 30, is 10 seconds). +Also the cold startup without object authority updates has been reduced +from 60 to 30 seconds. This is to allow cluster reconnects (lowered from 60s to 10s in 2.10) +before actually considering a failover/split brain scenario. + +The [IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection) and [IdoPgsqlConnection](09-object-types.md#objecttype-idopgsqlconnection) +objects provide a new attribute named `last_failover` which shows the last failover timestamp. +This value also is available in the [ido](10-icinga-template-library.md#itl-icinga-ido) CheckCommand output. + + +### CLI Commands + +The `troubleshoot` CLI command has been removed. It was never completed, +and turned out not to provide required details for GitHub issues anyways. + +We didn't ask nor endorse users on GitHub/Discourse in the past 2 years, so +we're removing it without deprecation. + +Issue templates, the troubleshooting docs and support knowledge has +proven to be better. + +#### Permissions + +CLI commands such as `api setup`, `node wizard/setup`, `feature enable/disable/list` +required root permissions previously. Since the file permissions allow +the Icinga user to change things already, and users kept asking to +run Icinga on their own webspace without root permissions, this is now possible +with 2.11. + +If you are running the commands with a different user than the +compiled `ICINGA_USER` and `ICINGA_GROUP` CMake settings (`icinga` everywhere, +except Debian with `nagios` for historical reasons), ensure that this +user has the capabilities to change to a different user. + +If you still encounter problems, run the aforementioned CLI commands as root, +or with sudo. + +#### CA List Behaviour Change + +`ca list` only shows the pending certificate signing requests by default. + +You can use the new `--all` parameter to show all signing requests. +Note that Icinga automatically purges signed requests older than 1 week. + +#### New: CA Remove/Restore + +`ca remove` allows you to remove pending signing requests. Once the +master receives a CSR, and it is marked as removed, the request is +denied. + +`ca restore` allows you to restore a removed signing request. You +can list removed signing requests with the new `--removed` parameter +for `ca list`. + +### Configuration + +The deprecated `concurrent_checks` attribute in the [checker feature](09-object-types.md#objecttype-checkercomponent) +has no effect anymore if set. Please use the [MaxConcurrentChecks](17-language-reference.md#icinga-constants-global-config) +constant in [constants.conf](04-configuration.md#constants-conf) instead. + +### REST API + +#### Actions + +The [schedule-downtime](12-icinga2-api.md#icinga2-api-actions-schedule-downtime-host-all-services) +action supports the `all_services` parameter for Host types. Defaults to false. + +#### Config Packages + +Deployed configuration packages require an active stage, with many previous +allowed. This mechanism is used by the Icinga Director as external consumer, +and Icinga itself for storing runtime created objects inside the `_api` +package. + +This includes downtimes and comments, which where sometimes stored in the wrong +directory path, because the active-stage file was empty/truncated/unreadable at +this point. + +2.11 makes this mechanism more stable and detects broken config packages. +It will also attempt to fix them, the following log entry is perfectly fine. + +``` +[2019-05-10 12:12:09 +0200] information/ConfigObjectUtility: Repairing config package '_api' with stage 'dbe0bef8-c72c-4cc9-9779-da7c4527c5b2'. +``` + +If you still encounter problems, please follow [this troubleshooting entry](15-troubleshooting.md#troubleshooting-api-missing-runtime-objects). + +### DB IDO MySQL Schema + +The schema for MySQL contains an optional update which +drops unneeded indexes. You don't necessarily need to apply +this update. + +### Documentation + +* `Custom attributes` have been renamed to `Custom variables` following the name `vars` and their usage in backends and web interfaces. +The term `custom attribute` still applies, but referring from the web to the core docs is easier. +* The distributed environment term `client` has been refined into `agent`. Wordings and images have been adjusted, and a `client` only is used as +general term when requesting something from a parent server role. +* The images for basics, modes and scenarios in the distributed monitoring chapter have been re-created from scratch. +* `02-getting-started.md` was renamed to `02-installation.md`, `04-configuring-icinga-2.md` into `04-configuration.md`. Apache redirects will be in place. + +## Upgrading to v2.10 + +### Path Constant Changes + +During package upgrades you may see a notice that the configuration +content of features has changed. This is due to a more general approach +with path constants in v2.10. + +The known constants `SysconfDir` and `LocalStateDir` stay intact and won't +break on upgrade. +If you are using these constants in your own custom command definitions +or other objects, you are advised to revise them and update them according +to the [documentation](17-language-reference.md#icinga-constants). + +Example diff: + +``` +object NotificationCommand "mail-service-notification" { +- command = [ SysconfDir + "/icinga2/scripts/mail-service-notification.sh" ] ++ command = [ ConfigDir + "/scripts/mail-service-notification.sh" ] +``` + +If you have the `ICINGA2_RUN_DIR` environment variable configured in the +sysconfig file, you need to rename it to `ICINGA2_INIT_RUN_DIR`. `ICINGA2_STATE_DIR` +has been removed and this setting has no effect. + +> **Note** +> +> This is important if you rely on the sysconfig configuration in your own scripts. + +### New Constants + +New [Icinga constants](17-language-reference.md#icinga-constants) have been added in this release. + +* `Environment` for specifying the Icinga environment. Defaults to not set. +* `ApiBindHost` and `ApiBindPort` to allow overriding the default ApiListener values. This will be used for an Icinga addon only. + +### Configuration: Namespaces + +The keywords `namespace` and `using` are now [reserved](17-language-reference.md#reserved-keywords) for the namespace functionality provided +with v2.10. Read more about how it works [here](17-language-reference.md#namespaces). + +### Configuration: ApiListener + +Anonymous JSON-RPC connections in the cluster can now be configured with `max_anonymous_clients` +attribute. +The corresponding REST API results from `/v1/status/ApiListener` in `json_rpc` have been renamed +from `clients` to `anonymous_clients` to better reflect their purpose. Authenticated clients +are counted as connected endpoints. A similar change is there for the performance data metrics. + +The TLS handshake timeout defaults to 10 seconds since v2.8.2. This can now be configured +with the configuration attribute `tls_handshake_timeout`. Beware of performance issues +with setting a too high value. + +### API: schedule-downtime Action + +The attribute `child_options` was previously accepting 0,1,2 for specific child downtime settings. +This behaviour stays intact, but the new proposed way are specific constants as values (`DowntimeNoChildren`, `DowntimeTriggeredChildren`, `DowntimeNonTriggeredChildren`). + +### Notifications: Recovery and Acknowledgement + +When a user should be notified on `Problem` and `Acknowledgement`, v2.10 now checks during +the `Acknowledgement` notification event whether this user has been notified about a problem before. + +``` + types = [ Problem, Acknowledgement, Recovery ] +``` + +If **no** `Problem` notification was sent, and the types filter includes problems for this user, +the `Acknowledgement` notification is **not** sent. + +In contrast to that, the following configuration always sends `Acknowledgement` notifications. + +``` + types = [ Acknowledgement, Recovery ] +``` + +This change also restores the old behaviour for `Recovery` notifications. The above configuration +leaving out the `Problem` type can be used to only receive recovery notifications. If `Problem` +is added to the types again, Icinga 2 checks whether it has notified a user of a problem when +sending the recovery notification. + +More details can be found in [this PR](https://github.com/Icinga/icinga2/pull/6527). + +### Stricter configuration validation + +Some config errors are now fatal. While it never worked before, icinga2 refuses to start now! + +For example the following started to give a fatal error in 2.10: + +``` + object Zone "XXX" { + endpoints = [ "master-server" ] + parent = "global-templates" + } +``` + +```critical/config: Error: Zone 'XXX' can not have a global zone as parent.``` + +### Package Changes + +Debian/Ubuntu drops the `libicinga2` package. `apt-get upgrade icinga2` +won't remove such packages leaving the upgrade in an unsatisfied state. + +Please use `apt-get full-upgrade` or `apt-get dist-upgrade` instead, as explained [here](https://github.com/Icinga/icinga2/issues/6695#issuecomment-430585915). + +On RHEL/CentOS/Fedora, `icinga2-libs` has been obsoleted. Unfortunately yum's dependency +resolver doesn't allow to install older versions than 2.10 then. Please +read [here](https://github.com/Icinga/icinga-packaging/issues/114#issuecomment-429264827) +for details. + +RPM packages dropped the [Classic UI](16-upgrading-icinga-2.md#upgrading-to-2-8-removed-classicui-config-package) +package in v2.8, Debian/Ubuntu packages were forgotten. This is now the case with this +release. Icinga 1.x is EOL by the end of 2018, plan your migration to [Icinga Web 2](https://icinga.com/docs/icingaweb2/latest/). + +## Upgrading to v2.9 + +### Deprecation and Removal Notes + +- Deprecation of 1.x compatibility features: `StatusDataWriter`, `CompatLogger`, `CheckResultReader`. Their removal is scheduled for 2.11. +Icinga 1.x is EOL and will be out of support by the end of 2018. +- Removal of Icinga Studio. It always has been experimental and did not satisfy our high quality standards. We've therefore removed it. + +### Sysconfig Changes + +The security fixes in v2.8.2 required moving specific runtime settings +into the Sysconfig file and environment. This included that Icinga 2 +would itself parse this file and read the required variables. This has generated +numerous false-positive log messages and led to many support questions. v2.9.0 +changes this in the standard way to read these variables from the environment, and use +sane compile-time defaults. + +> **Note** +> +> In order to upgrade, remove everything in the sysconfig file and re-apply +> your changes. + +There is a bug with existing sysconfig files where path variables are not expanded +because systemd [does not support](https://github.com/systemd/systemd/issues/2123) +shell variable expansion. This worked with SysVInit though. + +Edit the sysconfig file and either remove everything, or edit this line +on RHEL 7. Modify the path for other distributions. + +``` +vim /etc/sysconfig/icinga2 + +-ICINGA2_PID_FILE=$ICINGA2_RUN_DIR/icinga2/icinga2.pid ++ICINGA2_PID_FILE=/run/icinga2/icinga2.pid +``` + +If you want to adjust the number of open files for the Icinga application +for example, you would just add this setting like this on RHEL 7: + +``` +vim /etc/sysconfig/icinga2 + +ICINGA2_RLIMIT_FILES=50000 +``` + +Restart Icinga 2 afterwards, the systemd service file automatically puts the +value into the application's environment where this is read on startup. + +### Setup Wizard Changes + +Client and satellite setups previously had the example configuration in `conf.d` included +by default. This caused trouble on config sync, or with locally executed checks generating +wrong check results for command endpoint clients. + +In v2.9.0 `node wizard`, `node setup` and the graphical Windows wizard will disable +the inclusion by default. You can opt-out and explicitly enable it again if needed. + +In addition to the default global zones `global-templates` and `director-global`, +the setup wizards also offer to specify your own custom global zones and generate +the required configuration automatically. + +The setup wizards also use full qualified names for Zone and Endpoint object generation, +either the default values (FQDN for clients) or the user supplied input. This removes +the dependency on the `NodeName` and `ZoneName` constant and helps to immediately see +the parent-child relationship. Those doing support will also see the benefit in production. + +### CLI Command Changes + +The [node setup](06-distributed-monitoring.md#distributed-monitoring-automation-cli-node-setup) +parameter `--master_host` was deprecated and replaced with `--parent_host`. +This parameter is now optional to allow connection-less client setups similar to the `node wizard` +CLI command. The `parent_zone` parameter has been added to modify the parent zone name e.g. +for client-to-satellite setups. + +The `api user` command which was released in v2.8.2 turned out to cause huge problems with +configuration validation, windows restarts and OpenSSL versions. It is therefore removed in 2.9, +the `password_hash` attribute for the ApiUser object stays intact but has no effect. This is to ensure +that clients don't break on upgrade. We will revise this feature in future development iterations. + +### Configuration Changes + +The CORS attributes `access_control_allow_credentials`, `access_control_allow_headers` and +`access_control_allow_methods` are now controlled by Icinga 2 and cannot be changed anymore. + +### Unique Generated Names + +With the removal of RHEL 5 as supported platform, we can finally use real unique IDs. +This is reflected in generating names for e.g. API stage names. Previously it was a handcrafted +mix of local FQDN, timestamps and random numbers. + +### Custom Vars not updating + +A rare issue preventing the custom vars of objects created prior to 2.9.0 being updated when changed may occur. To +remedy this, truncate the customvar tables and restart Icinga 2. The following is an example of how to do this with mysql: + +``` +$ mysql -uroot -picinga icinga +MariaDB [icinga]> truncate icinga_customvariables; +Query OK, 0 rows affected (0.05 sec) +MariaDB [icinga]> truncate icinga_customvariablestatus; +Query OK, 0 rows affected (0.03 sec) +MariaDB [icinga]> exit +Bye +$ sudo systemctl restart icinga2 +``` + +Custom vars should now stay up to date. + + +## Upgrading to v2.8.2 + +With version 2.8.2 the location of settings formerly found in `/etc/icinga2/init.conf` has changed. They are now +located in the sysconfig, `/etc/sysconfig/icinga2` (RPM) or `/etc/default/icinga2` (DPKG) on most systems. The +`init.conf` file has been removed and its settings will be ignored. These changes are only relevant if you edited the +`init.conf`. Below is a table displaying the new names for the affected settings. + + Old `init.conf` | New `sysconfig/icinga2` + ----------------|------------------------ + RunAsUser | ICINGA2\_USER + RunAsGroup | ICINGA2\_GROUP + RLimitFiles | ICINGA2\_RLIMIT\_FILES + RLimitProcesses | ICINGA2\_RLIMIT\_PROCESSES + RLimitStack | ICINGA2\_RLIMIT\_STACK + +## Upgrading to v2.8 + +### DB IDO Schema Update to 2.8.0 + +There are additional indexes and schema fixes which require an update. + +Please proceed here for [MySQL](16-upgrading-icinga-2.md#upgrading-mysql-db) or [PostgreSQL](16-upgrading-icinga-2.md#upgrading-postgresql-db). + +> **Note** +> +> `2.8.1.sql` fixes a unique constraint problem with fresh 2.8.0 installations. +> You don't need this update if you are upgrading from an older version. + +### Changed Certificate Paths + +The default certificate path was changed from `/etc/icinga2/pki` to +`/var/lib/icinga2/certs`. + + Old Path | New Path + ---------------------------------------------------|--------------------------------------------------- + `/etc/icinga2/pki/icinga2-agent1.localdomain.crt` | `/var/lib/icinga2/certs/icinga2-agent1.localdomain.crt` + `/etc/icinga2/pki/icinga2-agent1.localdomain.key` | `/var/lib/icinga2/certs/icinga2-agent1.localdomain.key` + `/etc/icinga2/pki/ca.crt` | `/var/lib/icinga2/certs/ca.crt` + +This applies to Windows clients in the same way: `%ProgramData%\etc\icinga2\pki` +was moved to `%ProgramData%\var\lib\icinga2\certs`. + + Old Path | New Path + ----------------------------------------------------------------|---------------------------------------------------------------- + `%ProgramData%\etc\icinga2\pki\icinga2-agent1.localdomain.crt` | `%ProgramData%\var\lib\icinga2\certs\icinga2-agent1.localdomain.crt` + `%ProgramData%\etc\icinga2\pki\icinga2-agent1.localdomain.key` | `%ProgramData%\var\lib\icinga2\certs\icinga2-agent1.localdomain.key` + `%ProgramData%\etc\icinga2\pki\ca.crt` | `%ProgramData%\var\lib\icinga2\certs\ca.crt` + + +> **Note** +> +> The default expected path for client certificates is `/var/lib/icinga2/certs/ + NodeName + {.crt,.key}`. +> The `NodeName` constant is usually the FQDN and certificate common name (CN). Check the [conventions](06-distributed-monitoring.md#distributed-monitoring-conventions) +> section inside the Distributed Monitoring chapter. + +The [setup CLI commands](06-distributed-monitoring.md#distributed-monitoring-setup-master) and the +default [ApiListener configuration](06-distributed-monitoring.md#distributed-monitoring-apilistener) +have been adjusted to these paths too. + +The [ApiListener](09-object-types.md#objecttype-apilistener) object attributes `cert_path`, `key_path` +and `ca_path` have been deprecated and removed from the example configuration. + +#### Migration Path + +> **Note** +> +> Icinga 2 automatically migrates the certificates to the new default location if they +> are configured and detected in `/etc/icinga2/pki`. + +During startup, the migration kicks in and ensures to copy the certificates to the new +location. This will also happen if someone updates the certificate files in `/etc/icinga2/pki` +to ensure that the new certificate location always has the latest files. + +This has been implemented in the Icinga 2 binary to ensure it works on both Linux/Unix +and the Windows platform. + +If you are not using the built-in CLI commands and setup wizards to deploy the client certificates, +please ensure to update your deployment tools/scripts. This mainly affects + +* Puppet modules +* Ansible playbooks +* Chef cookbooks +* Salt recipes +* Custom scripts, e.g. Windows Powershell or self-made implementations + +In order to support a smooth migration between versions older than 2.8 and future releases, +the built-in certificate migration path is planned to exist as long as the deprecated +`ApiListener` object attributes exist. + +You are safe to use the existing configuration paths inside the `api` feature. + +**Example** + +Look at the following example taken from the Director Linux deployment script for clients. + +* Ensure that the default certificate path is changed from `/etc/icinga2/pki` to `/var/lib/icinga2/certs`. + +``` +-ICINGA2_SSL_DIR="${ICINGA2_CONF_DIR}/pki" ++ICINGA2_SSL_DIR="${ICINGA2_STATE_DIR}/lib/icinga2/certs" +``` + +* Remove the ApiListener configuration attributes. + +``` +object ApiListener "api" { +- cert_path = SysconfDir + "/icinga2/pki/${ICINGA2_NODENAME}.crt" +- key_path = SysconfDir + "/icinga2/pki/${ICINGA2_NODENAME}.key" +- ca_path = SysconfDir + "/icinga2/pki/ca.crt" + accept_commands = true + accept_config = true +} +``` + +Test the script with a fresh client installation before putting it into production. + +> **Tip** +> +> Please support module and script developers in their migration. If you find +> any project which would require these changes, create an issue or a patchset in a PR +> and help them out. Thanks in advance! + +### On-Demand Signing and CA Proxy + +Icinga 2 v2.8 supports the following features inside the cluster: + +* Forward signing requests from clients through a satellite instance to a signing master ("CA Proxy"). +* Signing requests without a ticket. The master instance allows to list and sign CSRs ("On-Demand Signing"). + +In order to use these features, **all instances must be upgraded to v2.8**. + +More details in [this chapter](06-distributed-monitoring.md#distributed-monitoring-setup-sign-certificates-master). + +### Windows Client + +Windows versions older than Windows 10/Server 2016 require the [Universal C Runtime for Windows](https://support.microsoft.com/en-us/help/2999226/update-for-universal-c-runtime-in-windows). + +### Removed Bottom Up Client Mode + +This client mode was deprecated in 2.6 and was removed in 2.8. + +The node CLI command does not provide `list` or `update-config` anymore. + +> **Note** +> +> The old migration guide can be found on [GitHub](https://github.com/Icinga/icinga2/blob/v2.7.0/doc/06-distributed-monitoring.md#bottom-up-migration-to-top-down-). + +The clients don't need to have a local `conf.d` directory included. + +Icinga 2 continues to run with the generated and imported configuration. +You are advised to [migrate](https://github.com/Icinga/icinga2/issues/4798) +any existing configuration to the "top down" mode with the help of the +Icinga Director or config management tools such as Puppet, Ansible, etc. + + +### Removed Classic UI Config Package + +The config meta package `classicui-config` and the configuration files +have been removed. You need to manually configure +this legacy interface. Create a backup of the configuration +before upgrading and re-configure it afterwards. + + +### Flapping Configuration + +Icinga 2 v2.8 implements a new flapping detection algorithm which splits the +threshold configuration into low and high settings. + +`flapping_threshold` is deprecated and does not have any effect when flapping +is enabled. Please remove `flapping_threshold` from your configuration. This +attribute will be removed in v2.9. + +Instead you need to use the `flapping_threshold_low` and `flapping_threshold_high` +attributes. More details can be found [here](08-advanced-topics.md#check-flapping). + +### Deprecated Configuration Attributes + + Object | Attribute + --------------|------------------ + ApiListener | cert\_path (migration happens) + ApiListener | key\_path (migration happens) + ApiListener | ca\_path (migration happens) + Host, Service | flapping\_threshold (has no effect) + +## Upgrading to v2.7 + +v2.7.0 provided new notification scripts and commands. Please ensure to +update your configuration accordingly. An advisory has been published [here](https://icinga.com/2017/08/23/advisory-for-icinga-2-v2-7-update-and-mail-notification-scripts/). + +In case are having troubles with OpenSSL 1.1.0 and the +public CA certificates, please read [this advisory](https://icinga.com/2017/08/30/advisory-for-ssl-problems-with-leading-zeros-on-openssl-1-1-0/) +and check the [troubleshooting chapter](15-troubleshooting.md#troubleshooting). + +If Icinga 2 fails to start with an empty reference to `$ICINGA2_CACHE_DIR` +ensure to set it inside `/etc/sysconfig/icinga2` (RHEL) or `/etc/default/icinga2` (Debian). + +RPM packages will put a file called `/etc/sysconfig/icinga2.rpmnew` +if you have modified the original file. + +Example on CentOS 7: + +``` +vim /etc/sysconfig/icinga2 + +ICINGA2_CACHE_DIR=/var/cache/icinga2 + +systemctl restart icinga2 +``` + +## Upgrading the MySQL database + +If you want to upgrade an existing Icinga 2 instance, check the +`/usr/share/icinga2-ido-mysql/schema/upgrade` directory for incremental schema upgrade file(s). + +> **Note** +> +> If there isn't an upgrade file for your current version available, there's nothing to do. + +Apply all database schema upgrade files incrementally. + +``` +# mysql -u root -p icinga < /usr/share/icinga2-ido-mysql/schema/upgrade/.sql +``` + +The Icinga 2 DB IDO feature checks the required database schema version on startup +and generates an log message if not satisfied. + + +**Example:** You are upgrading Icinga 2 from version `2.4.0` to `2.8.0`. Look into +the `upgrade` directory: + +``` +$ ls /usr/share/icinga2-ido-mysql/schema/upgrade/ +2.0.2.sql 2.1.0.sql 2.2.0.sql 2.3.0.sql 2.4.0.sql 2.5.0.sql 2.6.0.sql 2.8.0.sql +``` + +There are two new upgrade files called `2.5.0.sql`, `2.6.0.sql` and `2.8.0.sql` +which must be applied incrementally to your IDO database. + +```bash +mysql -u root -p icinga < /usr/share/icinga2-ido-mysql/schema/upgrade/2.5.0.sql +mysql -u root -p icinga < /usr/share/icinga2-ido-mysql/schema/upgrade/2.6.0.sql +mysql -u root -p icinga < /usr/share/icinga2-ido-mysql/schema/upgrade/2.8.0.sql +``` + +## Upgrading the PostgreSQL database + +If you want to upgrade an existing Icinga 2 instance, check the +`/usr/share/icinga2-ido-pgsql/schema/upgrade` directory for incremental schema upgrade file(s). + +> **Note** +> +> If there isn't an upgrade file for your current version available, there's nothing to do. + +Apply all database schema upgrade files incrementally. + +``` +# export PGPASSWORD=icinga +# psql -U icinga -d icinga < /usr/share/icinga2-ido-pgsql/schema/upgrade/.sql +``` + +The Icinga 2 DB IDO feature checks the required database schema version on startup +and generates an log message if not satisfied. + +**Example:** You are upgrading Icinga 2 from version `2.4.0` to `2.8.0`. Look into +the `upgrade` directory: + +``` +$ ls /usr/share/icinga2-ido-pgsql/schema/upgrade/ +2.0.2.sql 2.1.0.sql 2.2.0.sql 2.3.0.sql 2.4.0.sql 2.5.0.sql 2.6.0.sql 2.8.0.sql +``` + +There are two new upgrade files called `2.5.0.sql`, `2.6.0.sql` and `2.8.0.sql` +which must be applied incrementally to your IDO database. + +```bash +export PGPASSWORD=icinga +psql -U icinga -d icinga < /usr/share/icinga2-ido-pgsql/schema/upgrade/2.5.0.sql +psql -U icinga -d icinga < /usr/share/icinga2-ido-pgsql/schema/upgrade/2.6.0.sql +psql -U icinga -d icinga < /usr/share/icinga2-ido-pgsql/schema/upgrade/2.8.0.sql +``` diff --git a/doc/17-language-reference.md b/doc/17-language-reference.md new file mode 100644 index 0000000..02c49fd --- /dev/null +++ b/doc/17-language-reference.md @@ -0,0 +1,1371 @@ +# Language Reference + +## Object Definition + +Icinga 2 features an object-based configuration format. You can define new +objects using the `object` keyword: + +``` +object Host "host1.example.org" { + display_name = "host1" + + address = "192.168.0.1" + address6 = "2001:db8:1234::42" +} +``` + +In general you need to write each statement on a new line. Expressions started +with `{`, `(` and `[` extend until the matching closing character and can be broken +up into multiple lines. + +Alternatively you can write multiple statements on a single line by separating +them with a semicolon: + +``` +object Host "host1.example.org" { + display_name = "host1" + + address = "192.168.0.1"; address6 = "2001:db8:1234::42" +} +``` + +Each object is uniquely identified by its type (`Host`) and name +(`host1.example.org`). Some types have composite names, e.g. the +`Service` type which uses the `host_name` attribute and the name +you specified to generate its object name. + +Exclamation marks (!) are not permitted in object names. + +Objects can contain a comma-separated list of property +declarations. Instead of commas semicolons may also be used. +The following data types are available for property values: + +All objects have at least the following attributes: + +Attribute | Description +---------------------|----------------------------- +name | The name of the object. This attribute can be modified in the object definition to override the name specified with the `object` directive. +type | The type of the object. + +## Expressions + +The following expressions can be used on the right-hand side of assignments. + +### Numeric Literals + +A floating-point number. + +Example: + +``` +27.3 +``` + +### Duration Literals + +Similar to floating-point numbers except for the fact that they support +suffixes to help with specifying time durations. + +Example: + +``` +2.5m +``` + +Supported suffixes include ms (milliseconds), s (seconds), m (minutes), +h (hours) and d (days). + +Duration literals are converted to seconds by the config parser and +are treated like numeric literals. + +### String Literals + +A string. + +Example: + +``` +"Hello World!" +``` + +#### String Literals Escape Sequences + +Certain characters need to be escaped. The following escape sequences +are supported: + +Character | Escape sequence +--------------------------|------------------------------------ +" | \\" +\\ | \\\\ +<TAB> | \\t +<CARRIAGE-RETURN> | \\r +<LINE-FEED> | \\n +<BEL> | \\b +<FORM-FEED> | \\f + +In addition to these pre-defined escape sequences you can specify +arbitrary ASCII characters using the backslash character (\\) followed +by an ASCII character in octal encoding. + +### Multi-line String Literals + +Strings spanning multiple lines can be specified by enclosing them in +{{{ and }}}. + +Example: + +``` +{{{This +is +a multi-line +string.}}} +``` + +Unlike in ordinary strings special characters do not have to be escaped +in multi-line string literals. + +### Boolean Literals + +The keywords `true` and `false` are used to denote truth values. + +### Null Value + +The `null` keyword can be used to specify an empty value. + +### Dictionary + +An unordered list of key-value pairs. Keys must be unique and are +compared in a case-sensitive manner. + +Individual key-value pairs must either be comma-separated or on separate lines. +The comma after the last key-value pair is optional. + +Example: + +``` +{ + address = "192.168.0.1" + port = 443 +} +``` + +Identifiers may not contain certain characters (e.g. space) or start +with certain characters (e.g. digits). If you want to use a dictionary +key that is not a valid identifier, you can enclose the key in double +quotes. + +### Array + +An ordered list of values. + +Individual array elements must be comma-separated. +The comma after the last element is optional. + +Example: + +``` +[ "hello", 42 ] +``` + +An array may simultaneously contain values of different types, such as +strings and numbers. + +### Operators + +The following operators are supported in expressions. The operators are sorted by descending precedence. + +Operator | Precedence | Examples (Result) | Description +---------|------------|-----------------------------------------------|-------------------------------- +`()` | 1 | (3 + 3) * 5 | Groups sub-expressions +`()` | 1 | Math.random() | Calls a function +`[]` | 1 | a[3] | Array subscript +`.` | 1 | a.b | Element access +`!` | 2 | !"Hello" (false), !false (true) | Logical negation of the operand +`~` | 2 | ~true (false) | Bitwise negation of the operand +`+` | 2 | +3 | Unary plus +`-` | 2 | -3 | Unary minus +`&` | 2 | &var (reference to 'var') | Reference operator +`*` | 2 | *var | Indirection operator +`*` | 3 | 5m * 10 (3000) | Multiplies two numbers +`/` | 3 | 5m / 5 (60) | Divides two numbers +`%` | 3 | 17 % 12 (5) | Remainder after division +`+` | 4 | 1 + 3 (4), "hello " + "world" ("hello world") | Adds two numbers; concatenates strings +`-` | 4 | 3 - 1 (2) | Subtracts two numbers +`<<` | 5 | 4 << 8 (1024) | Left shift +`>>` | 5 | 1024 >> 4 (64) | Right shift +`<` | 6 | 3 < 5 (true) | Less than +`>` | 6 | 3 > 5 (false) | Greater than +`<=` | 6 | 3 <= 3 (true) | Less than or equal +`>=` | 6 | 3 >= 3 (true) | Greater than or equal +`in` | 7 | "foo" in [ "foo", "bar" ] (true) | Element contained in array +`!in` | 7 | "foo" !in [ "bar", "baz" ] (true) | Element not contained in array +`==` | 8 | "hello" == "hello" (true), 3 == 5 (false) | Equal to +`!=` | 8 | "hello" != "world" (true), 3 != 3 (false) | Not equal to +`&` | 9 | 7 & 3 (3) | Binary AND +`^` | 10 | 17 ^ 12 (29) | Bitwise XOR +| | 11 | 2 | 3 (3) | Binary OR +|| | 12 | true || false (true), 0 || 7 (7)| Logical OR +`&&` | 13 | true && false (false), 3 && 7 (7), 0 && 7 (0) | Logical AND +`=` | 14 | a = 3 | Assignment +`=>` | 15 | x => x * x (function with arg x) | Lambda, for loop +`?` | 16 | (2 * 3 > 5) ? 1 : 0 (1) | [Ternary operator](17-language-reference.md#conditional-statements-ternary) + +### References + +A reference to a value can be obtained using the `&` operator. The `*` operator can be used +to dereference a reference: + +``` +var value = "Hello!" +var p = &value /* p refers to value */ +*p = "Hi!" +log(value) // Prints "Hi!" because the variable was changed +``` + +### Namespaces + +Namespaces can be used to organize variables and functions. They are used to avoid name conflicts. The `namespace` +keyword is used to create a new namespace: + +``` +namespace Utils { + function calculate() { + return 2 + 2 + } +} +``` + +The namespace is made available as a global variable which has the namespace's name (e.g. `Utils`): + +``` +Utils.calculate() +``` + +The `using` keyword can be used to make all attributes in a namespace available to a script without having to +explicitly specify the namespace's name for each access: + +``` +using Utils +calculate() +``` + +The `using` keyword only has an effect for the current file and only for code that follows the keyword: + +``` +calculate() // This will not work. +using Utils +``` + +The following namespaces are automatically imported as if by using the `using` keyword: + +* System +* System.Configuration +* Types +* Icinga + +### Function Calls + +Functions can be called using the `()` operator: + +``` +const MyGroups = [ "test1", "test" ] + +{ + check_interval = len(MyGroups) * 1m +} +``` + +A list of available functions is available in the [Library Reference](18-library-reference.md#library-reference) chapter. + +## Assignments + +In addition to the `=` operator shown above a number of other operators +to manipulate attributes are supported. Here's a list of all +available operators (the outermost `{` `}` stand for a local variable scope): + +### Operator = + +Sets an attribute to the specified value. + +Example: + +``` +{ + a = 5 + a = 7 +} +``` + +In this example `a` has the value `7` after both instructions are executed. + +### Operator += + +The += operator is a shortcut. The following expression: + +``` +{ + a = [ "hello" ] + a += [ "world" ] +} +``` + +is equivalent to: + +``` +{ + a = [ "hello" ] + a = a + [ "world" ] +} +``` + +### Operator -= + +The -= operator is a shortcut. The following expression: + +``` +{ + a = 10 + a -= 5 +} +``` + +is equivalent to: + +``` +{ + a = 10 + a = a - 5 +} +``` + +### Operator \*= + +The *= operator is a shortcut. The following expression: + +``` +{ + a = 60 + a *= 5 +} +``` + +is equivalent to: + +``` +{ + a = 60 + a = a * 5 +} +``` + +### Operator /= + +The /= operator is a shortcut. The following expression: + +``` +{ + a = 300 + a /= 5 +} +``` + +is equivalent to: + +``` +{ + a = 300 + a = a / 5 +} +``` + +## Indexer + +The indexer syntax provides a convenient way to set dictionary elements. + +Example: + +``` +{ + hello.key = "world" +} +``` + +Example (alternative syntax): + +``` +{ + hello["key"] = "world" +} +``` + +This is equivalent to writing: + +``` +{ + hello += { + key = "world" + } +} +``` + +If the `hello` attribute does not already have a value, it is automatically initialized to an empty dictionary. + +## Template Imports + +Objects can import attributes from other objects. + +Example: + +``` +template Host "default-host" { + vars.colour = "red" +} + +template Host "test-host" { + import "default-host" + + vars.colour = "blue" +} + +object Host "localhost" { + import "test-host" + + address = "127.0.0.1" + address6 = "::1" +} +``` + +The `default-host` and `test-host` objects are marked as templates +using the `template` keyword. Unlike ordinary objects templates are not +instantiated at run-time. Parent objects do not necessarily have to be +templates, however in general they are. + +The `vars` dictionary for the `localhost` object contains all three +custom variables and the custom variable `colour` has the value `"blue"`. + +Parent objects are resolved in the order they're specified using the +`import` keyword. + +Default templates which are automatically imported into all object definitions +can be specified using the `default` keyword: + +``` +template CheckCommand "plugin-check-command" default { + // ... +} +``` + +Default templates are imported before any other user-specified statement in an +object definition is evaluated. + +If there are multiple default templates the order in which they are imported +is unspecified. + +## Constants + +Global constants can be set using the `const` keyword: + +``` +const VarName = "some value" +``` + +Once defined a constant can be accessed from any file. Constants cannot be changed +once they are set. + +> **Tip** +> +> Best practice is to manage constants in the [constants.conf](04-configuration.md#constants-conf) file. + +### Icinga 2 Specific Constants + +Icinga 2 provides a number of special global constants. These include directory paths, global configuration +and runtime parameters for the application version and (build) platform. + +#### Directory Path Constants + +Constant | Description +--------------------|------------------- +ConfigDir |**Read-only.** Main configuration directory. Usually set to `/etc/icinga2`. +DataDir |**Read-only.** Runtime data for the Icinga daemon. Usually set to `/var/lib/icinga2`. +LogDir |**Read-only.** Logfiles from the daemon. Usually set to `/var/log/icinga2`. +CacheDir |**Read-only.** Cached status information of the daemon. Usually set to `/var/cache/icinga2`. +SpoolDir |**Read-only.** Spool directory for certain data outputs. Usually set to `/var/spool/icinga2`. +InitRunDir |**Read-only.** Directory for PID files and sockets in daemon mode. Usually set to `/run/icinga2`. +ZonesDir |**Read-only.** Contains the path of the zones.d directory. Defaults to `ConfigDir + "/zones.d"`. + +#### Global Configuration Constants + +Constant | Description +--------------------|------------------- +Vars |**Read-write.** Contains a dictionary with global custom variables. Not set by default. +NodeName |**Read-write.** Contains the cluster node name. Set to the local hostname by default. +ReloadTimeout |**Read-write.** Defines the reload timeout for child processes. Defaults to `300s`. +Environment |**Read-write.** The name of the Icinga environment. Included in the SNI host name for outbound connections. Not set by default. +RunAsUser |**Read-write.** Defines the user the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig. +RunAsGroup |**Read-write.** Defines the group the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig. +MaxConcurrentChecks |**Read-write.** The number of max checks run simultaneously. Defaults to `512`. +ApiBindHost |**Read-write.** Overrides the default value for the ApiListener `bind_host` attribute. Defaults to `::` if IPv6 is supported by the operating system and to `0.0.0.0` otherwise. +ApiBindPort |**Read-write.** Overrides the default value for the ApiListener `bind_port` attribute. Not set by default. + +#### Application Runtime Constants + +Constant | Description +--------------------|------------------- +PlatformName |**Read-only.** The name of the operating system, e.g. `Ubuntu`. +PlatformVersion |**Read-only.** The version of the operating system, e.g. `14.04.3 LTS`. +PlatformKernel |**Read-only.** The name of the operating system kernel, e.g. `Linux`. +PlatformKernelVersion|**Read-only.** The version of the operating system kernel, e.g. `3.13.0-63-generic`. +BuildCompilerName |**Read-only.** The name of the compiler Icinga was built with, e.g. `Clang`. +BuildCompilerVersion|**Read-only.** The version of the compiler Icinga was built with, e.g. `7.3.0.7030031`. +BuildHostName |**Read-only.** The name of the host Icinga was built on, e.g. `acheron`. +ApplicationVersion |**Read-only.** The application version, e.g. `2.9.0`. + +#### Additional Constants + +Writable constants can be specified on the CLI using the `--define/-D` parameter. + +> **Note for v2.10+** +> +> Default paths which include `/etc` and `/var` as base directory continue to work +> based on the `SysconfDir` and `LocalStateDir` constants respectively. + +In addition to that, the constants below are used to define specific file paths. You should never need +to change them, as they are pre-compiled based on the constants above. + +Variable |Description +--------------------|------------------- +StatePath |**Read-write.** Contains the path of the Icinga 2 state file. Defaults to `DataDir + "/icinga2.state"`. +ObjectsPath |**Read-write.** Contains the path of the Icinga 2 objects file. Defaults to `CacheDir + "/icinga2.debug"`. +PidPath |**Read-write.** Contains the path of the Icinga 2 PID file. Defaults to `InitRunDir + "/icinga2.pid"`. +PkgDataDir |**Read-only.** Contains the path of the package data directory. Defaults to `PrefixDir + "/share/icinga2"`. + +The constants below have been used until Icinga v2.10, and are still intact. You don't need them +for future builds and configuration based on the newly available constants above. + +Variable |Description +--------------------|------------------- +PrefixDir |**Read-only.** Contains the installation prefix that was specified with `cmake -DCMAKE_INSTALL_PREFIX`. `Defaults to "/usr/local"`. +SysconfDir |**Read-only.** Contains the path of the sysconf directory. Defaults to `PrefixDir + "/etc"`. +LocalStateDir |**Read-only.** Contains the path of the local state directory. Defaults to `PrefixDir + "/var"`. +RunDir |**Read-only.** Contains the path of the run directory. Defaults to `LocalStateDir + "/run"`. + +#### Advanced Constants and Variables + +Advanced runtime constants. Please only use them if advised by support or developers. + +Variable | Description +---------------------------|------------------- +EventEngine |**Read-write.** The name of the socket event engine, can be `poll` or `epoll`. The epoll interface is only supported on Linux. +AttachDebugger |**Read-write.** Whether to attach a debugger when Icinga 2 crashes. Defaults to `false`. + +Advanced sysconfig environment variables, defined in `/etc/sysconfig/icinga2` (RHEL/SLES) or `/etc/default/icinga2` (Debian/Ubuntu). + +Variable | Description +---------------------------|------------------- +ICINGA2\_RLIMIT\_FILES |**Read-write.** Defines the resource limit for `RLIMIT_NOFILE` that should be set at start-up. Value cannot be set lower than the default `16 * 1024`. 0 disables the setting. Set in Icinga 2 sysconfig. +ICINGA2\_RLIMIT\_PROCESSES |**Read-write.** Defines the resource limit for `RLIMIT_NPROC` that should be set at start-up. Value cannot be set lower than the default `16 * 1024`. 0 disables the setting. Set in Icinga 2 sysconfig. +ICINGA2\_RLIMIT\_STACK |**Read-write.** Defines the resource limit for `RLIMIT_STACK` that should be set at start-up. Value cannot be set lower than the default `256 * 1024`. 0 disables the setting. Set in Icinga 2 sysconfig. + +#### Debug Constants and Variables + +These constants are only available in debug builds for developers and help with tracing messages and attaching to debuggers. + +Variable | Description +---------------------------|------------------- +Internal.DebugJsonRpc | **Read-write.** Setting this to `1` prints the raw JSON-RPC message to STDOUT. +Internal.DebugWorkerDelay | **Read-write.** Delays the main worker process by X seconds after forked from the umbrella process. This helps with attaching LLDB which cannot follow child forks like GDB. + +Example: + +``` +$ icinga2 daemon -DInternal.DebugWorkerDelay=120 +Closed FD 6 which we inherited from our parent process. +[2020-01-29 12:22:33 +0100] information/cli: Icinga application loader (version: v2.11.0-477-gfe8701d77; debug) +[2020-01-29 12:22:33 +0100] information/RunWorker: DEBUG: Current PID: 85253. Sleeping for 120 seconds to allow lldb/gdb -p attachment. + +$ lldb -p 85253 +(lldb) b icinga::Checkable::ProcessCheckResult +(lldb) c +``` + + +## Apply + +The `apply` keyword can be used to create new objects which are associated with +another group of objects. + +``` +apply Service "ping" to Host { + import "generic-service" + + check_command = "ping4" + + assign where host.name == "localhost" +} +``` + +In this example the `assign where` condition is a boolean expression which is +evaluated for all objects of type `Host` and a new service with name "ping" +is created for each matching host. [Expression operators](17-language-reference.md#expression-operators) +may be used in `assign where` conditions. + +The `to` keyword and the target type may be omitted if there is only one target +type, e.g. for the `Service` type. + +Depending on the object type used in the `apply` expression additional local +variables may be available for use in the `where` condition: + +Source Type | Target Type | Variables +------------------|-------------|-------------- +Service | Host | host +Dependency | Host | host +Dependency | Service | host, service +Notification | Host | host +Notification | Service | host, service +ScheduledDowntime | Host | host +ScheduledDowntime | Service | host, service + +Any valid config attribute can be accessed using the `host` and `service` +variables. For example, `host.address` would return the value of the host's +"address" attribute -- or null if that attribute isn't set. + +More usage examples are documented in the [monitoring basics](03-monitoring-basics.md#using-apply-expressions) +chapter. + +## Apply For + +[Apply](17-language-reference.md#apply) rules can be extended with the +[for loop](17-language-reference.md#for-loops) keyword. + +``` +apply Service "prefix-" for (key => value in host.vars.dictionary) to Host { + import "generic-service" + + check_command = "ping4" + vars.host_value = value +} +``` + +Any valid config attribute can be accessed using the `host` and `service` +variables. The attribute must be of the Array or Dictionary type. In this example +`host.vars.dictionary` is of the Dictionary type which needs a key-value-pair +as iterator. + +In this example all generated service object names consist of `prefix-` and +the value of the `key` iterator. The prefix string can be omitted if not required. + +The `key` and `value` variables can be used for object attribute assignment, e.g. for +setting the `check_command` attribute or custom variables as command parameters. + +`apply for` rules are first evaluated against all objects matching the `for loop` list +and afterwards the `assign where` and `ignore where` conditions are evaluated. + +It is not necessary to check attributes referenced in the `for loop` expression +for their existance using an additional `assign where` condition. + +More usage examples are documented in the [monitoring basics](03-monitoring-basics.md#using-apply-for) +chapter. + +## Group Assign + +Group objects can be assigned to specific member objects using the `assign where` +and `ignore where` conditions. + +``` +object HostGroup "linux-servers" { + display_name = "Linux Servers" + + assign where host.vars.os == "Linux" +} +``` + +In this example the `assign where` condition is a boolean expression which is evaluated +for all objects of the type `Host`. Each matching host is added as member to the host group +with the name "linux-servers". Membership exclusion can be controlled using the `ignore where` +condition. [Expression operators](17-language-reference.md#expression-operators) may be used in `assign where` and +`ignore where` conditions. + +Source Type | Variables +------------------|-------------- +HostGroup | host +ServiceGroup | host, service +UserGroup | user + + +## Boolean Values + +The `assign where`, `ignore where`, `if` and `while` statements, the `!` operator as +well as the `bool()` function convert their arguments to a boolean value based on the +following rules: + +Description | Example Value | Boolean Value +---------------------|-------------------|-------------- +Empty value | null | false +Zero | 0 | false +Non-zero integer | -23945 | true +Empty string | "" | false +Non-empty string | "Hello" | true +Empty array | [] | false +Non-empty array | [ "Hello" ] | true +Empty dictionary | {} | false +Non-empty dictionary | { key = "value" } | true + +For a list of supported expression operators for `assign where` and `ignore where` +statements, see [expression operators](17-language-reference.md#expression-operators). + +## Comments + +The Icinga 2 configuration format supports C/C++-style and shell-style comments. + +Example: + +``` +/* + This is a comment. + */ +object Host "localhost" { + check_interval = 30 // this is also a comment. + retry_interval = 15 # yet another comment +} +``` + +## Includes + +Other configuration files can be included using the `include` directive. +Paths must be relative to the configuration file that contains the +`include` directive. + +Example: + +``` +include "some/other/file.conf" +include "conf.d/*.conf" +``` + +Wildcard includes are not recursive. + +Icinga also supports include search paths similar to how they work in a +C/C++ compiler: + +``` +include +``` + +Note the use of angle brackets instead of double quotes. This causes the +config compiler to search the include search paths for the specified +file. By default $PREFIX/share/icinga2/include is included in the list of search +paths. Additional include search paths can be added using +[command-line options](11-cli-commands.md#config-include-path). + +Wildcards are not permitted when using angle brackets. + +## Recursive Includes + +The `include_recursive` directive can be used to recursively include all +files in a directory which match a certain pattern. + +Example: + +``` +include_recursive "conf.d", "*.conf" +include_recursive "templates" +``` + +The first parameter specifies the directory from which files should be +recursively included. + +The file names need to match the pattern given in the second parameter. +When no pattern is specified the default pattern "*.conf" is used. + +## Zone Includes + +> **Note** +> +> This is an internal functionality consumed by Icinga itself. +> +> The preferred way for users managing configuration files in +> zones is to use the [cluster config sync](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync) +> or [REST API config packages](12-icinga2-api.md#icinga2-api-config-management). + +The `include_zones` recursively includes all subdirectories for the +given path. + +In addition to that it sets the `zone` attribute for all objects created +in these subdirectories to the name of the subdirectory. + +Example: + +``` +include_zones "etc", "zones.d", "*.conf" +include_zones "puppet", "puppet-zones" +``` + +The first parameter specifies a tag name for this directive. Each `include_zones` +invocation should use a unique tag name. When copying the zones' configuration +files Icinga uses the tag name as the name for the destination directory in +`/var/lib/icinga2/api/config`. + +The second parameter specifies the directory which contains the subdirectories. + +The file names need to match the pattern given in the third parameter. +When no pattern is specified the default pattern "*.conf" is used. + +## Library directive + +The `library` directive was used to manually load additional +libraries. Starting with version 2.9 it is no longer necessary to explicitly load +libraries and this directive has no effect. + +## Functions + +Functions can be defined using the `function` keyword. + +Example: + +``` +function multiply(a, b) { + return a * b +} +``` + +When encountering the `return` keyword further execution of the function is terminated and +the specified value is supplied to the caller of the function: + +``` +log(multiply(3, 5)) +``` + +In this example the `multiply` function we declared earlier is invoked with two arguments (3 and 5). +The function computes the product of those arguments and makes the result available to the +function's caller. + +When no value is supplied for the `return` statement the function returns `null`. + +Functions which do not have a `return` statement have their return value set to the value of the +last expression which was performed by the function. For example, we could have also written our +`multiply` function like this: + +``` +function multiply(a, b) { + a * b +} +``` + +Anonymous functions can be created by omitting the name in the function definition. The +resulting function object can be used like any other value: + +``` +var fn = function() { 3 } + +fn() /* Returns 3 */ +``` + +## Lambda Expressions + +Functions can also be declared using the alternative lambda syntax. + +Example: + +``` +f = (x) => x * x +``` + +Multiple statements can be used by putting the function body into braces: + +``` +f = (x) => { + log("Lambda called") + x * x +} +``` + +Just like with ordinary functions the return value is the value of the last statement. + +For lambdas which take exactly one argument the braces around the arguments can be omitted: + +``` +f = x => x * x +``` + +### Lambda Expressions with Closures + +Lambda expressions which take a given number of arguments may need additional +variable values from the outer scope. When the lambda expression does not allow +to change the interface, [closures](17-language-reference.md#closures) come into play. + +``` +var y + +f = ((x) use(y) => x == y) +``` + +Note that the braces around arguments are always required when using closures. + +A more concrete example: + +Within the DSL, you want to [filter](18-library-reference.md#array-filter) an array of HostGroup objects by their name. +The filter function takes one argument being a function callback which either returns +`true` or `false`. Matching items are collected into the result set. + +``` +get_objects(HostGroup).filter((hg) => hg.name == "linux-servers") +``` + +Instead of hardcoding the matching hostgroup name into the lambda scope, you want +to control the value from the outside configuration values, e.g. in a custom variable +or global constant. + +``` +var hg_filter_name = "linux-servers" + +get_objects(HostGroup).filter((hg) use(hg_filter_name) => hg.name == hg_filter_name) +``` + +You can also use this example vice versa and extract host object matching a specific +host group name. + +``` +var hg_filter_name = "linux-servers" + +get_objects(Host).filter((h) use (hg_search_name) => hg_search_name in h.groups).map(h => h.name) +``` + +Note that this example makes use of the [map](18-library-reference.md#array-map) method for the Array type which +extracts the host name attribute from the full object into a new array. + +## Abbreviated Lambda Syntax + +Lambdas which take no arguments can also be written using the abbreviated lambda syntax. + +Example: + +``` +f = {{ 3 }} +``` + +This creates a new function which returns the value 3. + +## Variable Scopes + +When setting a variable Icinga checks the following scopes in this order whether the variable +already exists there: + +* Local Scope +* `this` Scope +* Global Scope + +The local scope contains variables which only exist during the invocation of the current function, +object or apply statement. Local variables can be declared using the `var` keyword: + +``` +function multiply(a, b) { + var temp = a * b + return temp +} +``` + +Each time the `multiply` function is invoked a new `temp` variable is used which is in no way +related to previous invocations of the function. + +When setting a variable which has not previously been declared as local using the `var` keyword +the `this` scope is used. + +The `this` scope refers to the current object which the function or object/apply statement +operates on. + +``` +object Host "localhost" { + check_interval = 5m +} +``` + +In this example the `this` scope refers to the "localhost" object. The `check_interval` attribute +is set for this particular host. + +You can explicitly access the `this` scope using the `this` keyword: + +``` +object Host "localhost" { + var check_interval = 5m + + /* This explicitly specifies that the attribute should be set + * for the host, if we had omitted `this.` the (poorly named) + * local variable `check_interval` would have been modified instead. + */ + this.check_interval = 1m +} +``` +Similarly the keywords `locals` and `globals` are available to access the local and global scope. + +Functions also have a `this` scope. However unlike for object/apply statements the `this` scope for +a function is set to whichever object was used to invoke the function. Here's an example: + +``` + hm = { + h_word = null + + function init(word) { + h_word = word + } + } + + /* Let's invoke the init() function */ + hm.init("hello") +``` + +We're using `hm.init` to invoke the function which causes the value of `hm` to become the `this` +scope for this function call. + +## Closures + +By default `function`s, `object`s and `apply` rules do not have access to variables declared +outside of their scope (except for global variables). + +In order to access variables which are defined in the outer scope the `use` keyword can be used: + +``` +function MakeHelloFunction(name) { + return function() use(name) { + log("Hello, " + name) + } +} +``` + +In this case a new variable `name` is created inside the inner function's scope which has the +value of the `name` function argument. + +Alternatively a different value for the inner variable can be specified: + +``` +function MakeHelloFunction(name) { + return function() use (greeting = "Hello, " + name) { + log(greeting) + } +} +``` + +## Conditional Statements + +### Conditional Statements: if/else + +Sometimes it can be desirable to only evaluate statements when certain conditions are met. The if/else +construct can be used to accomplish this. + +Example: + +``` +a = 3 + +if (a < 5) { + a *= 7 +} else if (a > 10) { + a *= 5 +} else { + a *= 2 +} +``` + +An if/else construct can also be used in place of any other value. The value of an if/else statement +is the value of the last statement which was evaluated for the branch which was taken: + +``` +a = if (true) { + log("Taking the 'true' branch") + 7 * 3 +} else { + log("Taking the 'false' branch") + 9 +} +``` + +This example prints the log message "Taking the 'true' branch" and the `a` variable is set to 21 (7 * 3). + +The value of an if/else construct is null if the condition evaluates to false and no else branch is given. + +### Conditional Statements: Ternary Operator + +Instead of if/else condition chains, you can also use the ternary operator `?` +with assignments. Values are separated with a colon `:` character. + +``` +cond ? cond_val_true : cond_val_false +``` + +Whether the first condition matches, the first value is returned, if not, the else and second +branch value is returned. + +The following example evaluates a condition and either assigns `1` or `0` +to the local variable. + +``` +<1> => var x = (2 * 3 > 5) ? 1 : 0 +null +<2> => x +1.000000 +<3> => var x = (2 * 3 > 7) ? 1 : 0 +null +<4> => x +0.000000 +``` + +Additional examples with advanced condition chaining: + +``` +<1> => 1 ? 2 : 3 ? 4 : 5 ? 6 : 7 +2.000000 +<2> => 0 ? 2 : 3 ? 4 : 5 ? 6 : 7 +4.000000 +<3> => 0 ? 2 : 0 ? 4 : 5 ? 6 : 7 +6.000000 +<4> => 0 ? 2 : 0 ? 4 : 0 ? 6 : 7 +7.000000 +<5> => 1 + 0 ? 2 : 3 + 4 +2.000000 +<6> => 0 + 0 ? 2 : 3 + 4 +7.000000 +<7> => (()=>{ return 1 ? 2 : 3 })() +2.000000 +<8> => var x = 1 ? 2 : 3 +null +<9> => x +2.000000 +``` + + +## While Loops + +The `while` statement checks a condition and executes the loop body when the condition evaluates to `true`. +This is repeated until the condition is no longer true. + +Example: + +``` +var num = 5 + +while (num > 5) { + log("Test") + num -= 1 +} +``` + +The `continue` and `break` keywords can be used to control how the loop is executed: The `continue` keyword +skips over the remaining expressions for the loop body and begins the next loop evaluation. The `break` keyword +breaks out of the loop. + +## For Loops + +The `for` statement can be used to iterate over arrays and dictionaries. + +Example: + +``` +var list = [ "a", "b", "c" ] + +for (var item in list) { + log("Item: " + item) +} +``` + +The loop body is evaluated once for each item in the array. The variable `item` is declared as a local +variable just as if the `var` keyword had been used. + +Iterating over dictionaries can be accomplished in a similar manner: + +``` +var dict = { a = 3, b = 7 } + +for (var key => var value in dict) { + log("Key: " + key + ", Value: " + value) +} +``` + +The `continue` and `break` keywords can be used to control how the loop is executed: The `continue` keyword +skips over the remaining expressions for the loop body and begins the next loop evaluation. The `break` keyword +breaks out of the loop. + +The `var` keyword is optional when declaring variables in the loop's header. Variables declared without the `var` +keyword are nonetheless local to the function. + +## Constructors + +In order to create a new value of a specific type constructor calls may be used. + +Example: + +``` +var pd = PerfdataValue() +pd.label = "test" +pd.value = 10 +``` + +You can also try to convert an existing value to another type by specifying it as an argument for the constructor call. + +Example: + +``` +var s = String(3) /* Sets s to "3". */ +``` + +## Throwing Exceptions + +Built-in commands may throw exceptions to signal errors such as invalid arguments. User scripts can throw exceptions +using the `throw` keyword. + +Example: + +``` +throw "An error occurred." +``` + +## Handling Exceptions + +Exceptions can be handled using the `try` and `except` keywords. When an exception occurs while executing code in the +`try` clause no further statements in the `try` clause are evaluated and the `except` clause is executed instead. + +Example: + +``` +try { + throw "Test" + + log("This statement won't get executed.") +} except { + log("An error occurred in the try clause.") +} +``` + +## Breakpoints + +The `debugger` keyword can be used to insert a breakpoint. It may be used at any place where an assignment would also be a valid expression. + +By default breakpoints have no effect unless Icinga is started with the `--script-debugger` command-line option. When the script debugger is enabled Icinga stops execution of the script when it encounters a breakpoint and spawns a console which lets the user inspect the current state of the execution environment. + +## Types + +All values have a static type. The `typeof` function can be used to determine the type of a value: + +``` +typeof(3) /* Returns an object which represents the type for numbers */ +``` + +The following built-in types are available: + +Type | Examples | Description +-----------|-------------------|------------------------ +Number | 3.7 | A numerical value. +Boolean | true, false | A boolean value. +String | "hello" | A string. +Array | [ "a", "b" ] | An array. +Dictionary | { a = 3 } | A dictionary. + +Depending on which libraries are loaded additional types may become available. The `icinga` +library implements a whole bunch of other [object types](09-object-types.md#object-types), +e.g. Host, Service, CheckCommand, etc. + +Each type has an associated type object which describes the type's semantics. These +type objects are made available using global variables which match the type's name: + +``` +/* This logs 'true' */ +log(typeof(3) == Number) +``` + +The type object's `prototype` property can be used to find out which methods a certain type +supports: + +``` +/* This returns: ["contains","find","len","lower","replace","reverse","split","substr","to_string","trim","upper"] */ +keys(String.prototype) +``` + +Additional documentation on type methods is available in the +[library reference](18-library-reference.md#library-reference). + +## Location Information + +The location of the currently executing script can be obtained using the +`current_filename` and `current_line` keywords. + +Example: + +``` +log("Hello from '" + current_filename + "' in line " + current_line) +``` + +## Reserved Keywords + +These keywords are reserved and must not be used as constants or custom variables. + +``` +object +template +include +include_recursive +include_zones +library +null +true +false +const +var +this +globals +locals +use +default +ignore_on_error +current_filename +current_line +apply +to +where +import +assign +ignore +function +return +break +continue +for +if +else +while +throw +try +except +in +using +namespace +``` +You can escape reserved keywords using the `@` character. The following example +tries to set `vars.include` which references a reserved keyword and generates +an error: + +``` +[2014-09-15 17:24:00 +0200] critical/config: Location: +/etc/icinga2/conf.d/hosts/localhost.conf(13): vars.sla = "24x7" +/etc/icinga2/conf.d/hosts/localhost.conf(14): +/etc/icinga2/conf.d/hosts/localhost.conf(15): vars.include = "some cmdb export field" + ^^^^^^^ +/etc/icinga2/conf.d/hosts/localhost.conf(16): } +/etc/icinga2/conf.d/hosts/localhost.conf(17): + +Config error: in /etc/icinga2/conf.d/hosts/localhost.conf: 15:8-15:14: syntax error, unexpected include (T_INCLUDE), expecting T_IDENTIFIER +[2014-09-15 17:24:00 +0200] critical/config: 1 errors, 0 warnings. +``` + +You can escape the `include` keyword by prefixing it with an additional `@` character: + +``` +object Host "localhost" { + import "generic-host" + + address = "127.0.0.1" + address6 = "::1" + + vars.os = "Linux" + vars.sla = "24x7" + + vars.@include = "some cmdb export field" +} +``` diff --git a/doc/18-library-reference.md b/doc/18-library-reference.md new file mode 100644 index 0000000..b3d0216 --- /dev/null +++ b/doc/18-library-reference.md @@ -0,0 +1,1960 @@ +# Library Reference + +## Global functions + +These functions are globally available in [assign/ignore where expressions](03-monitoring-basics.md#using-apply-expressions), +[functions](17-language-reference.md#functions), [API filters](12-icinga2-api.md#icinga2-api-filters) +and the [Icinga 2 debug console](11-cli-commands.md#cli-command-console). + +You can use the [Icinga 2 debug console](11-cli-commands.md#cli-command-console) +as a sandbox to test these functions before implementing +them in your scenarios. + +### basename + +Signature: + +``` +function basename(path) +``` + +Returns the filename portion of the specified path. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var path = "/etc/icinga2/scripts/xmpp-notification.pl" +null +<2> => basename(path) +"xmpp-notification.pl" +``` + +### bool + +Signature: + +``` +function bool(value) +``` + +Converts the value to a bool. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => bool(1) +true +<2> => bool(0) +false +``` + +### cidr_match + +Signature: + +``` +function cidr_match(pattern, ip, mode) +``` + +Returns true if the CIDR pattern matches the IP address, false otherwise. + +IPv4 addresses are converted to IPv4-mapped IPv6 addresses before being +matched against the pattern. The `mode` argument is optional and can be +either `MatchAll` (in which case all elements for an array have to match) or `MatchAny` +(in which case at least one element has to match). The default mode is `MatchAll`. + +Example for a single IP address: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.address = "192.168.56.101" +null +<2> => cidr_match("192.168.56.0/24", host.address) +true +<3> => cidr_match("192.168.56.0/26", host.address) +false +``` + +Example for an array of IP addresses: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.vars.vhost_ips = [ "192.168.56.101", "192.168.56.102", "10.0.10.99" ] +null +<2> => cidr_match("192.168.56.0/24", host.vars.vhost_ips, MatchAll) +false +<3> => cidr_match("192.168.56.0/24", host.vars.vhost_ips, MatchAny) +true +``` + +### dirname + +Signature: + +``` +function dirname(path) +``` + +Returns the directory portion of the specified path. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var path = "/etc/icinga2/scripts/xmpp-notification.pl" +null +<2> => dirname(path) +"/etc/icinga2/scripts" +``` + +### escape_create_process_arg + +Signature: + +``` +function escape_create_process_arg(text) +``` + +Escapes a string for use as an argument for CreateProcess(). Windows only. + +### escape_shell_arg + +Signature: + +``` +function escape_shell_arg(text) +``` + +Escapes a string for use as a single shell argument. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => escape_shell_arg("'$host.name$' '$service.name$'") +"''\\''$host.name$'\\'' '\\''$service.name$'\\'''" +``` + +### escape_shell_cmd + +Signature: + +``` +function escape_shell_cmd(text) +``` + +Escapes shell meta characters in a string. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => escape_shell_cmd("/bin/echo 'shell test' $ENV") +"/bin/echo 'shell test' \\$ENV" +``` + +### get_time + +Signature: + +``` +function get_time() +``` + +Returns the current UNIX timestamp as floating point number. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => get_time() +1480072135.633008 +<2> => get_time() +1480072140.401207 +``` + +### getenv + +Signature: + +``` +function getenv(key) +``` + +Returns the value from the specified environment variable key. + +Example: + +``` +$ MY_ENV_VAR=icinga2 icinga2 console +Icinga 2 (version: v2.11.0) +Type $help to view available commands. +<1> => getenv("MY_ENV_VAR") +"icinga2" +``` + +### glob + +Signature: + +``` +function glob(pathSpec, type) +``` + +Returns an array containing all paths which match the +`pathSpec` argument. + +The `type` argument is optional and specifies which types +of paths are matched. This can be a combination of the `GlobFile` +and `GlobDirectory` constants. The default value is `GlobFile | GlobDirectory`. + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var pathSpec = "/etc/icinga2/conf.d/*.conf" +null +<2> => glob(pathSpec) +[ "/etc/icinga2/conf.d/app.conf", "/etc/icinga2/conf.d/commands.conf", ... ] +``` + +### glob\_recursive + +Signature: + +``` +function glob_recursive(path, pattern, type) +``` + +Recursively descends into the specified directory and returns an array containing +all paths which match the `pattern` argument. + +The `type` argument is optional and specifies which types +of paths are matched. This can be a combination of the `GlobFile` +and `GlobDirectory` constants. The default value is `GlobFile | GlobDirectory`. + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var path = "/etc/icinga2/zones.d/" +null +<2> => var pattern = "*.conf" +null +<3> => glob_recursive(path, pattern) +[ "/etc/icinga2/zones.d/global-templates/templates.conf", "/etc/icinga2/zones.d/master/hosts.conf", ... ] +``` + +### intersection + +Signature: + +``` +function intersection(array, array, ...) +``` + +Returns an array containing all unique elements which are common to all +specified arrays. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var dev_notification_groups = [ "devs", "slack" ] +null +<2> => var host_notification_groups = [ "slack", "noc" ] +null +<3> => intersection(dev_notification_groups, host_notification_groups) +[ "slack" ] +``` + +### keys + +Signature: + +``` +function keys(dict) +``` + +Returns an array containing the dictionary's keys. + +**Note**: Instead of using this global function you are advised to use the type's +prototype method: [Dictionary#keys](18-library-reference.md#dictionary-keys). + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.vars.disks["/"] = {} +null +<2> => host.vars.disks["/var"] = {} +null +<3> => host.vars.disks.keys() +[ "/", "/var" ] +``` + +### len + +Signature: + +``` +function len(value) +``` + +Returns the length of the value, i.e. the number of elements for an array +or dictionary, or the length of the string in bytes. + +**Note**: Instead of using this global function you are advised to use the type's +prototype method: [Array#len](18-library-reference.md#array-len), [Dictionary#len](18-library-reference.md#dictionary-len) and +[String#len](18-library-reference.md#string-len). + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.groups = [ "linux-servers", "db-servers" ] +null +<2> => host.groups.len() +2.000000 +<3> => host.vars.disks["/"] = {} +null +<4> => host.vars.disks["/var"] = {} +null +<5> => host.vars.disks.len() +2.000000 +<6> => host.vars.os_type = "Linux/Unix" +null +<7> => host.vars.os_type.len() +10.000000 +``` + +### log + +Signature: + +``` +function log(value) +``` + +Writes a message to the log. Non-string values are converted to a JSON string. + +Signature: + +``` +function log(severity, facility, value) +``` + +Writes a message to the log. `severity` can be one of `LogDebug`, `LogNotice`, +`LogInformation`, `LogWarning`, and `LogCritical`. + +Non-string values are converted to a JSON string. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => log(LogCritical, "Console", "First line") +critical/Console: First line +null +<2> => var groups = [ "devs", "slack" ] +null +<3> => log(LogCritical, "Console", groups) +critical/Console: ["devs","slack"] +null +``` + +### match + +Signature: + +``` +function match(pattern, value, mode) +``` + +Returns true if the wildcard (`?*`) `pattern` matches the `value`, false otherwise. +The `value` can be of the type [String](18-library-reference.md#string-type) or [Array](18-library-reference.md#array-type) (which +contains string elements). + +The `mode` argument is optional and can be either `MatchAll` (in which case all elements +for an array have to match) or `MatchAny` (in which case at least one element has to match). +The default mode is `MatchAll`. + +Example for string values: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var name = "db-prod-sfo-657" +null +<2> => match("*prod-sfo*", name) +true +<3> => match("*-dev-*", name) +false +``` + +Example for an array of string values: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0-28) +<1> => host.vars.application_types = [ "web-wp", "web-rt", "db-local" ] +null +<2> => match("web-*", host.vars.application_types, MatchAll) +false +<3> => match("web-*", host.vars.application_types, MatchAny) +true +``` + +### number + +Signature: + +``` +function number(value) +``` + +Converts the value to a number. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => number(false) +0.000000 +<2> => number("78") +78.000000 +``` + +### parse_performance_data + +Signature: + +``` +function parse_performance_data(pd) +``` + +Parses a performance data string and returns an array describing the values. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var pd = "'time'=1480074205.197363;;;" +null +<2> => parse_performance_data(pd) +{ + counter = false + crit = null + label = "time" + max = null + min = null + type = "PerfdataValue" + unit = "" + value = 1480074205.197363 + warn = null +} +``` + +### path\_exists + +Signature: + +``` +function path_exists(path) +``` + +Returns true if the specified path exists, false otherwise. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var path = "/etc/icinga2/scripts/xmpp-notification.pl" +null +<2> => path_exists(path) +true +``` + +### random + +Signature: + +``` +function random() +``` + +Returns a random value between 0 and RAND\_MAX (as defined in stdlib.h). + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => random() +1263171996.000000 +<2> => random() +108402530.000000 +``` + +### range + +Signature: + +``` +function range(end) +function range(start, end) +function range(start, end, increment) +``` + +Returns an array of numbers in the specified range. +If you specify one parameter, the first element starts at `0`. +The following array numbers are incremented by `1` and stop before +the specified end. +If you specify the start and end numbers, the returned array +number are incremented by `1`. They start at the specified start +number and stop before the end number. +Optionally you can specify the incremented step between numbers +as third parameter. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => range(5) +[ 0.000000, 1.000000, 2.000000, 3.000000, 4.000000 ] +<2> => range(2,4) +[ 2.000000, 3.000000 ] +<3> => range(2,10,2) +[ 2.000000, 4.000000, 6.000000, 8.000000 ] +``` + +### regex + +Signature: + +``` +function regex(pattern, value, mode) +``` + +Returns true if the regular expression `pattern` matches the `value`, false otherwise. +The `value` can be of the type [String](18-library-reference.md#string-type) or [Array](18-library-reference.md#array-type) (which +contains string elements). + +The `mode` argument is optional and can be either `MatchAll` (in which case all elements +for an array have to match) or `MatchAny` (in which case at least one element has to match). +The default mode is `MatchAll`. + +**Tip**: In case you are looking for regular expression tests try [regex101](https://regex101.com). + +Example for string values: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.vars.os_type = "Linux/Unix" +null +<2> => regex("^Linux", host.vars.os_type) +true +<3> => regex("^Linux$", host.vars.os_type) +false +``` + +Example for an array of string values: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => host.vars.databases = [ "db-prod1", "db-prod2", "db-dev" ] +null +<2> => regex("^db-prod\\d+", host.vars.databases, MatchAny) +true +<3> => regex("^db-prod\\d+", host.vars.databases, MatchAll) +false +``` + +### sleep + +Signature: + +``` +function sleep(interval) +``` + +Sleeps for the specified amount of time (in seconds). + +### string + +Signature: + +``` +function string(value) +``` + +Converts the value to a string. + +**Note**: Instead of using this global function you are advised to use the type's +prototype method: + +* [Number#to_string](18-library-reference.md#number-to_string) +* [Boolean#to_string](18-library-reference.md#boolean-to_string) +* [String#to_string](18-library-reference.md#string-to_string) +* [Object#to_string](18-library-reference.md#object-to-string) for Array and Dictionary types +* [DateTime#to_string](18-library-reference.md#datetime-tostring) + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => 5.to_string() +"5" +<2> => false.to_string() +"false" +<3> => "abc".to_string() +"abc" +<4> => [ "dev", "slack" ].to_string() +"[ \"dev\", \"slack\" ]" +<5> => { "/" = {}, "/var" = {} }.to_string() +"{\n\t\"/\" = {\n\t}\n\t\"/var\" = {\n\t}\n}" +<6> => DateTime(2016, 11, 25).to_string() +"2016-11-25 00:00:00 +0100" +``` + +### typeof + +Signature: + +``` +function typeof(value) +``` + +Returns the [Type](18-library-reference.md#type-type) object for a value. + +Example: + +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => typeof(3) == Number +true +<2> => typeof("str") == String +true +<3> => typeof(true) == Boolean +true +<4> => typeof([ 1, 2, 3]) == Array +true +<5> => typeof({ a = 2, b = 3 }) == Dictionary +true +``` + +### union + +Signature: + +``` +function union(array, array, ...) +``` + +Returns an array containing all unique elements from the specified arrays. + +Example: +``` +$ icinga2 console +Icinga 2 (version: v2.11.0) +<1> => var dev_notification_groups = [ "devs", "slack" ] +null +<2> => var host_notification_groups = [ "slack", "noc" ] +null +<3> => union(dev_notification_groups, host_notification_groups) +[ "devs", "noc", "slack" ] +``` + +## Scoped Functions + +This chapter describes functions which are only available +in a specific scope. + +### macro + +Signature: + +``` +function macro("$macro_name$") +``` + +The `macro` function can be used to resolve [runtime macro](03-monitoring-basics.md#runtime-macros) +strings into their values. +The returned value depends on the attribute value which is resolved +from the specified runtime macro. + +This function is only available in runtime evaluated functions, e.g. +for [custom variables](03-monitoring-basics.md#custom-variables-functions) which +use the [abbreviated lambda syntax](17-language-reference.md#nullary-lambdas). + +This example sets the `snmp_address` custom variable +based on `$address$` and `$address6$`. + +``` + vars.snmp_address = {{ + var addr_v4 = macro("$address$") + var addr_v6 = macro("$address6$") + + if (addr_v4) { + return addr_v4 + } else { + return "udp6:[" + addr_v6 + "]" + } + }} +``` + +More reference examples are available inside the [Icinga Template Library](10-icinga-template-library.md#icinga-template-library) +and the [object accessors chapter](08-advanced-topics.md#access-object-attributes-at-runtime). + +## Object Accessor Functions + +These functions can be used to retrieve a reference to another object by name. + +### get_check_command + +Signature: + +``` +function get_check_command(name); +``` + +Returns the CheckCommand object with the specified name, or `null` if no such CheckCommand object exists. + +### get_event_command + +Signature: + +``` +function get_event_command(name); +``` + +Returns the EventCommand object with the specified name, or `null` if no such EventCommand object exists. + +### get_host + +Signature: + +``` +function get_host(host_name); +``` + +Returns the Host object with the specified name, or `null` if no such Host object exists. + +### get_host_group + +Signature: + +``` +function get_host_group(name); +``` + +Returns the HostGroup object with the specified name, or `null` if no such HostGroup object exists. + +### get_notification_command + +Signature: + +``` +function get_notification_command(name); +``` + +Returns the NotificationCommand object with the specified name, or `null` if no such NotificationCommand object exists. + +### get_object + +Signature: + +``` +function get_object(type, name); +``` + +Returns the object with the specified type and name, or `null` if no such object exists. `type` must refer +to a type object. + +### get_objects + +Signature: + +``` +function get_objects(type); +``` + +Returns an array of objects whose type matches the specified type. `type` must refer +to a type object. + +### get_service + +Signature: + +``` +function get_service(host_name, service_name); +function get_service(host, service_name); +``` + +Returns the Service object with the specified host name or object and service name pair, +or `null` if no such Service object exists. + +Example in the [debug console](11-cli-commands.md#cli-command-console) +which fetches the `disk` service object from the current Icinga 2 node: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' +Icinga 2 (version: v2.11.0) + +<1> => get_service(NodeName, "disk") +<2> => get_service(NodeName, "disk").__name +"icinga2-master1.localdomain!disk" + +<3> => get_service(get_host(NodeName), "disk").__name +"icinga2-master1.localdomain!disk" +``` + +### get_service_group + +Signature: + +``` +function get_service_group(name); +``` + +Returns the ServiceGroup object with the specified name, or `null` if no such ServiceGroup object exists. + +### get_services + +Signature: + +``` +function get_services(host_name); +function get_services(host); +``` + +Returns an [array](17-language-reference.md#array) of service objects for the specified host name or object, +or `null` if no such host object exists. + +Example in the [debug console](11-cli-commands.md#cli-command-console) +which fetches all service objects from the current Icinga 2 node: + +``` +$ ICINGA2_API_PASSWORD=icinga icinga2 console --connect 'https://root@localhost:5665/' +Icinga 2 (version: v2.11.0) + +<1> => get_services(NodeName).map(s => s.name) +[ "disk", "disk /", "http", "icinga", "load", "ping4", "ping6", "procs", "ssh", "users" ] +``` + +Note: [map](18-library-reference.md#array-map) takes a [lambda function](17-language-reference.md#lambdas) as argument. In this example +we only want to collect and print the `name` attribute with `s => s.name`. + +This works in a similar fashion for a host object where you can extract all service states +in using the [map](18-library-reference.md#array-map) functionality: + +``` +<2> => get_services(get_host(NodeName)).map(s => s.state) +[ 2.000000, 2.000000, 2.000000, 0.000000, 0.000000, 0.000000, 2.000000, 0.000000, 0.000000, 1.000000, 0.000000, 0.000000 ] +``` + +### get_template + +Signature: + +``` +function get_template(type, name); +``` + +Returns the template with the specified type and name, or `null` if no such object exists. `type` must refer +to a type object. + +> **Note** +> +> Only the name and debug info attributes are available for templates accessed in the DSL. +> Object attributes are not available in this scope. + +You can use this functionality to check whether a template exists e.g. on a satellite endpoint +and if not, import a different template. + +``` +object Host "icinga-agent47.localdomain" { + if (get_template(Host, "master-host-tmpl")) { + import "master-host-tmpl" + } else { + import "generic-host" + } +} +``` + +### get_templates + +Signature: + +``` +function get_templates(type); +``` + +Returns an array of templates whose type matches the specified type. `type` must refer +to a type object. + +> **Note** +> +> Only the name and debug info attributes are available for templates accessed in the DSL. +> Object attributes are not available in this scope. + +You can use this function to iterate over all available template names, similar to what +the [templates API URL endpoint](12-icinga2-api.md#icinga2-api-config-templates) provides. + +``` +<1> => get_templates(Host).map(n => n.name) +[ "ssh-agent" ] +``` + +### get_time_period + +Signature: + +``` +function get_time_period(name); +``` + +Returns the TimePeriod object with the specified name, or `null` if no such TimePeriod object exists. + +### get_user + +Signature: + +``` +function get_user(name); +``` + +Returns the User object with the specified name, or `null` if no such User object exists. + +### get_user_group + +Signature: + +``` +function get_user_group(name); +``` + +Returns the UserGroup object with the specified name, or `null` if no such UserGroup object exists. + +## Json object + +The global `Json` object can be used to encode and decode JSON. + +### Json.decode + +Signature: + +``` +function decode(x); +``` + +Decodes a JSON string. + +### Json.encode + +Signature: + +``` +function encode(x); +``` + +Encodes an arbitrary value into JSON. + +## Math object + +The global `Math` object can be used to access a number of mathematical constants +and functions. + +### Math.E + +Euler's constant. + +### Math.LN2 + +Natural logarithm of 2. + +### Math.LN10 + +Natural logarithm of 10. + +### Math.LOG2E + +Base 2 logarithm of E. + +### Math.PI + +The mathematical constant Pi. + +### Math.SQRT1_2 + +Square root of 1/2. + +### Math.SQRT2 + +Square root of 2. + +### Math.abs + +Signature: + +``` +function abs(x); +``` + +Returns the absolute value of `x`. + +### Math.acos + +Signature: + +``` +function acos(x); +``` + +Returns the arccosine of `x`. + +### Math.asin + +Signature: + +``` +function asin(x); +``` + +Returns the arcsine of `x`. + +### Math.atan + +Signature: + +``` +function atan(x); +``` + +Returns the arctangent of `x`. + +### Math.atan2 + +Signature: + +``` +function atan2(y, x); +``` +Returns the arctangent of the quotient of `y` and `x`. + +### Math.ceil + +Signature: + +``` +function ceil(x); +``` + +Returns the smallest integer value not less than `x`. + +### Math.cos + +Signature: + +``` +function cos(x); +``` + +Returns the cosine of `x`. + +### Math.exp + +Signature: + +``` +function exp(x); +``` + +Returns E raised to the `x`th power. + +### Math.floor + +Signature: + +``` +function floor(x); +``` + +Returns the largest integer value not greater than `x`. + +### Math.isinf + +Signature: + +``` +function isinf(x); +``` + +Returns whether `x` is infinite. + +### Math.isnan + +Signature: + +``` +function isnan(x); +``` + +Returns whether `x` is NaN (not-a-number). + +### Math.log + +Signature: + +``` +function log(x); +``` + +Returns the natural logarithm of `x`. + +### Math.max + +Signature: + +``` +function max(...); +``` + +Returns the largest argument. A variable number of arguments can be specified. +If no arguments are given, -Infinity is returned. + +### Math.min + +Signature: + +``` +function min(...); +``` + +Returns the smallest argument. A variable number of arguments can be specified. +If no arguments are given, +Infinity is returned. + +### Math.pow + +Signature: + +``` +function pow(x, y); +``` + +Returns `x` raised to the `y`th power. + +### Math.random + +Signature: + +``` +function random(); +``` + +Returns a pseudo-random number between 0 and 1. + +### Math.round + +Signature: + +``` +function round(x); +``` + +Returns `x` rounded to the nearest integer value. + +### Math.sign + +Signature: + +``` +function sign(x); +``` + +Returns -1 if `x` is negative, 1 if `x` is positive +and 0 if `x` is 0. + +### Math.sin + +Signature: + +``` +function sin(x); +``` + +Returns the sine of `x`. + +### Math.sqrt + +Signature: + +``` +function sqrt(x); +``` + +Returns the square root of `x`. + +### Math.tan + +Signature: + +``` +function tan(x); +``` + +Returns the tangent of `x`. + +## Array type + +Inherits methods from the [Object type](18-library-reference.md#object-type). + +### Array#add + +Signature: + +``` +function add(value); +``` + +Adds a new value after the last element in the array. + +### Array#all + +Signature: + +``` +function all(func); +``` + +Returns true if the array contains only elements for which `func(element)` +is true, false otherwise. + +### Array#any + +Signature: + +``` +function any(func); +``` + +Returns true if the array contains at least one element for which `func(element)` +is true, false otherwise. + +### Array#clear + +Signature: + +``` +function clear(); +``` + +Removes all elements from the array. + +### Array#contains + +Signature: + +``` +function contains(value); +``` + +Returns true if the array contains the specified value, false otherwise. + +### Array#filter + +Signature: + +``` +function filter(func); +``` + +Returns a copy of the array containing only the elements for which `func(element)` +is true. + +### Array#freeze + +Signature: + +``` +function freeze() +``` + +Disallows further modifications to this array. Trying to modify the array will result in an exception. + +### Array#get + +Signature: + +``` +function get(index); +``` + +Retrieves the element at the specified zero-based index. + +### Array#join + +Signature: + +``` +function join(separator); +``` + +Joins all elements of the array using the specified separator. + +### Array#len + +Signature: + +``` +function len(); +``` + +Returns the number of elements contained in the array. + +### Array#map + +Signature: + +``` +function map(func); +``` + +Calls `func(element)` for each of the elements in the array and returns +a new array containing the return values of these function calls. + +### Array#reduce + +Signature: + +``` +function reduce(func); +``` + +Reduces the elements of the array into a single value by calling the provided +function `func` as `func(a, b)` repeatedly where `a` and `b` are elements of the array +or results from previous function calls. + +### Array#remove + +Signature: + +``` +function remove(index); +``` + +Removes the element at the specified zero-based index. + +### Array#reverse + +Signature: + +``` +function reverse(); +``` + +Returns a new array with all elements of the current array in reverse order. + +### Array#set + +Signature: + +``` +function set(index, value); +``` + +Sets the element at the zero-based index to the specified value. The `index` must refer to an element +which already exists in the array. + +### Array#shallow_clone + +``` +function shallow_clone(); +``` + +Returns a copy of the array. Note that for elements which are reference values (e.g. objects such +as arrays and dictionaries) only the references are copied. + +### Array#sort + +Signature: + +``` +function sort(less_cmp); +``` + +Returns a copy of the array where all items are sorted. The items are +compared using the `<` (less-than) operator. A custom comparator function +can be specified with the `less_cmp` argument. + +### Array#unique + +Signature: + +``` +function unique(); +``` + +Returns a copy of the array with all duplicate elements removed. The original order +of the array is not preserved. + +## Boolean type + +### Boolean#to_string + +Signature: + +``` +function to_string(); +``` + +The `to_string` method returns a string representation of the boolean value. + +Example: + +``` +var example = true + example.to_string() /* Returns "true" */ +``` + +## DateTime type + +Inherits methods from the [Object type](18-library-reference.md#object-type). + +### DateTime constructor + +Signature: + +``` +function DateTime() +function DateTime(unixTimestamp) +function DateTime(year, month, day) +function DateTime(year, month, day, hours, minutes, seconds) +``` + +Constructs a new DateTime object. When no arguments are specified for the constructor a new +DateTime object representing the current time is created. + +Example: + +``` +var d1 = DateTime() /* current time */ +var d2 = DateTime(2016, 5, 21) /* midnight April 21st, 2016 (local time) */ +``` + +### DateTime arithmetic + +Subtracting two DateTime objects yields the interval between them, in seconds. + +Example: + +``` +var delta = DateTime() - DateTime(2016, 5, 21) /* seconds since midnight April 21st, 2016 */ +``` + +Subtracting a number from a DateTime object yields a new DateTime object that is further in the past: + +Example: + +``` +var dt = DateTime() - 2 * 60 * 60 /* Current time minus 2 hours */ +``` + +Adding a number to a DateTime object yields a new DateTime object that is in the future: + +Example: + +``` +var dt = DateTime() + 24 * 60 * 60 /* Current time plus 24 hours */ +``` + +### DateTime#format + +Signature: + +``` +function format(fmt) +``` + +Returns a string representation for the DateTime object using the specified format string. +The format string may contain format conversion placeholders as specified in strftime(3). + +Example: + +``` +var s = DateTime(2016, 4, 21).format("%A") /* Sets s to "Thursday". */ +``` + +### DateTime#to_string + +Signature: + +``` +function to_string() +``` + +Returns a string representation for the DateTime object. Uses a suitable default format. + +Example: + +``` +var s = DateTime(2016, 4, 21).to_string() /* Sets s to "2016-04-21 00:00:00 +0200". */ +``` + +## Dictionary type + +Inherits methods from the [Object type](18-library-reference.md#object-type). + +### Dictionary#clear + +Signature: + +``` +function clear(); +``` + +Removes all items from the dictionary. + +### Dictionary#contains + +Signature: + +``` +function contains(key); +``` + +Returns true if a dictionary item with the specified `key` exists, false otherwise. + +### Dictionary#freeze + +Signature: + +``` +function freeze() +``` + +Disallows further modifications to this dictionary. Trying to modify the dictionary will result in an exception. + +### Dictionary#get + +Signature: + +``` +function get(key); +``` + +Retrieves the value for the specified `key`. Returns `null` if they `key` does not exist +in the dictionary. + +### Dictionary#keys + +Signature: + +``` +function keys(); +``` + +Returns a list of keys for all items that are currently in the dictionary. + +### Dictionary#len + +Signature: + +``` +function len(); +``` + +Returns the number of items contained in the dictionary. + +### Dictionary#remove + +Signature: + +``` +function remove(key); +``` + +Removes the item with the specified `key`. Trying to remove an item which does not exist +is a no-op. + +### Dictionary#set + +Signature: + +``` +function set(key, value); +``` + +Creates or updates an item with the specified `key` and `value`. + +### Dictionary#shallow_clone + +Signature: + +``` +function shallow_clone(); +``` + +Returns a copy of the dictionary. Note that for elements which are reference values (e.g. objects such +as arrays and dictionaries) only the references are copied. + +### Dictionary#values + +Signature: + +``` +function values(); +``` + +Returns a list of values for all items that are currently in the dictionary. + +## Function type + +Inherits methods from the [Object type](18-library-reference.md#object-type). + +### Function#call + +Signature: + +``` +function call(thisArg, ...); +``` + +Invokes the function using an alternative `this` scope. The `thisArg` argument specifies the `this` +scope for the function. All other arguments are passed directly to the function. + +Example: + +``` +function set_x(val) { + this.x = val +} + +dict = {} + +set_x.call(dict, 7) /* Invokes set_x using `dict` as `this` */ +``` + +### Function#callv + +Signature: + +``` +function callv(thisArg, args); +``` + +Invokes the function using an alternative `this` scope. The `thisArg` argument specifies the `this` +scope for the function. The items in the `args` array are passed to the function as individual arguments. + +Example: + +``` +function set_x(val) { + this.x = val +} + +var dict = {} + +var args = [ 7 ] + +set_x.callv(dict, args) /* Invokes set_x using `dict` as `this` */ +``` + +## Number type + +### Number#to_string + +Signature: + +``` +function to_string(); +``` + +The `to_string` method returns a string representation of the number. + +Example: + +``` +var example = 7 + example.to_string() /* Returns "7" */ +``` + +## Object type + +This is the base type for all types in the Icinga application. + +### Object#clone + +Signature: + +``` + function clone(); +``` + +Returns a copy of the object. Note that for object elements which are +reference values (e.g. objects such as arrays or dictionaries) the entire +object is recursively copied. + +### Object#to_string + +Signature: + +``` +function to_string(); +``` + +Returns a string representation for the object. Unless overridden this returns a string +of the format "Object of type ''" where is the name of the +object's type. + +Example: + +``` +[ 3, true ].to_string() /* Returns "[ 3.000000, true ]" */ +``` + +### Object#type + +Signature: + +String type; + +Returns the object's type name. This attribute is read-only. + +Example: + +``` +get_host("localhost").type /* Returns "Host" */ +``` + +## String type + +### String#contains + +Signature: + +``` +function contains(str); +``` + +Returns `true` if the string `str` was found in the string. If the string +was not found, `false` is returned. Use [find](18-library-reference.md#string-find) +for getting the index instead. + +Example: + +``` +"Hello World".contains("World") /* Returns true */ +``` + +### String#find + +Signature: + +``` +function find(str, start); +``` + +Returns the zero-based index at which the string `str` was found in the string. If the string +was not found, -1 is returned. `start` specifies the zero-based index at which `find` should +start looking for the string (defaults to 0 when not specified). + +Example: + +``` +"Hello World".find("World") /* Returns 6 */ +``` + +### String#len + +Signature + +``` +function len(); +``` + +Returns the length of the string in bytes. Note that depending on the encoding type of the string +this is not necessarily the number of characters. + +Example: + +``` +"Hello World".len() /* Returns 11 */ +``` + +### String#lower + +Signature: + +``` +function lower(); +``` + +Returns a copy of the string with all of its characters converted to lower-case. + +Example: + +``` +"Hello World".lower() /* Returns "hello world" */ +``` + +### String#replace + +Signature: + +``` +function replace(search, replacement); +``` + +Returns a copy of the string with all occurences of the string specified in `search` replaced +with the string specified in `replacement`. + +### String#reverse + +Signature: + +``` +function reverse(); +``` + +Returns a copy of the string in reverse order. + +### String#split + +Signature: + +``` +function split(delimiters); +``` + +Splits a string into individual parts and returns them as an array. The `delimiters` argument +specifies the characters which should be used as delimiters between parts. + +Example: + +``` +"x-7,y".split("-,") /* Returns [ "x", "7", "y" ] */ +``` + +### String#substr + +Signature: + +``` +function substr(start, len); +``` + +Returns a part of a string. The `start` argument specifies the zero-based index at which the part begins. +The optional `len` argument specifies the length of the part ("until the end of the string" if omitted). + +Example: + +``` +"Hello World".substr(6) /* Returns "World" */ +``` + +### String#to_string + +Signature: + +``` +function to_string(); +``` + +Returns a copy of the string. + +### String#trim + +Signature: + +``` +function trim(); +``` + +Removes trailing whitespaces and returns the string. + +### String#upper + +Signature: + +``` +function upper(); +``` + +Returns a copy of the string with all of its characters converted to upper-case. + +Example: + +``` +"Hello World".upper() /* Returns "HELLO WORLD" */ +``` + +## Type type + +Inherits methods from the [Object type](18-library-reference.md#object-type). + +The `Type` type provides information about the underlying type of an object or scalar value. + +All types are registered as global variables. For example, in order to obtain a reference to the `String` type the global variable `String` can be used. + +### Type#base + +Signature: + +``` +Type base; +``` + +Returns a reference to the type's base type. This attribute is read-only. + +Example: + +``` +Dictionary.base == Object /* Returns true, because the Dictionary type inherits directly from the Object type. */ +``` + +### Type#name + +Signature: + +``` +String name; +``` + +Returns the name of the type. + +### Type#prototype + +Signature: + +``` +Object prototype; +``` + +Returns the prototype object for the type. When an attribute is accessed on an object that doesn't exist the prototype object is checked to see if an attribute with the requested name exists. If it does, the attribute's value is returned. + +The prototype functionality is used to implement methods. + +Example: + +``` +3.to_string() /* Even though '3' does not have a to_string property the Number type's prototype object does. */ +``` diff --git a/doc/19-technical-concepts.md b/doc/19-technical-concepts.md new file mode 100644 index 0000000..0fb5895 --- /dev/null +++ b/doc/19-technical-concepts.md @@ -0,0 +1,2217 @@ +# Technical Concepts + +This chapter provides technical concepts and design insights +into specific Icinga 2 components such as: + +* [Application](19-technical-concepts.md#technical-concepts-application) +* [Configuration](19-technical-concepts.md#technical-concepts-configuration) +* [Features](19-technical-concepts.md#technical-concepts-features) +* [Check Scheduler](19-technical-concepts.md#technical-concepts-check-scheduler) +* [Checks](19-technical-concepts.md#technical-concepts-checks) +* [Cluster](19-technical-concepts.md#technical-concepts-cluster) +* [TLS Network IO](19-technical-concepts.md#technical-concepts-tls-network-io) + +## Application + +### CLI Commands + +The Icinga 2 application is managed with different CLI sub commands. +`daemon` takes care about loading the configuration files, running the +application as daemon, etc. +Other sub commands allow to enable features, generate and request +TLS certificates or enter the debug console. + +The main entry point for each CLI command parses the command line +parameters and then triggers the required actions. + +### daemon CLI command + +This CLI command loads the configuration files, starting with `icinga2.conf`. +The [configuration compiler](19-technical-concepts.md#technical-concepts-configuration) parses the +file and detects additional file includes, constants, and any other DSL +specific declaration. + +At this stage, the configuration will already be checked against the +defined grammar in the scanner, and custom object validators will also be +checked. + +If the user provided `-C/--validate`, the CLI command returns with the +validation exit code. + +When running as daemon, additional parameters are checked, e.g. whether +this application was triggered by a reload, needs to daemonize with fork() +involved and update the object's authority. The latter is important for +HA-enabled cluster zones. + +## Configuration + +### Lexer + +The lexer stage does not understand the DSL itself, it only +maps specific character sequences into identifiers. + +This allows Icinga to detect the beginning of a string with `"`, +reading the following characters and determining the end of the +string with again `"`. + +Other parts covered by the lexer a escape sequences insides a string, +e.g. `"\"abc"`. + +The lexer also identifiers logical operators, e.g. `&` or `in`, +specific keywords like `object`, `import`, etc. and comment blocks. + +Please check `lib/config/config_lexer.ll` for details. + +Icinga uses [Flex](https://github.com/westes/flex) in the first stage. + +> Flex (The Fast Lexical Analyzer) +> +> Flex is a fast lexical analyser generator. It is a tool for generating programs +> that perform pattern-matching on text. Flex is a free (but non-GNU) implementation +> of the original Unix lex program. + +### Parser + +The parser stage puts the identifiers from the lexer into more +context with flow control and sequences. + +The following comparison is parsed into a left term, an operator +and a right term. + +``` +x > 5 +``` + +The DSL contains many elements which require a specific order, +and sometimes only a left term for example. + +The parser also takes care of parsing an object declaration for +example. It already knows from the lexer that `object` marks the +beginning of an object. It then expects a type string afterwards, +and the object name - which can be either a string with double quotes +or a previously defined constant. + +An opening bracket `{` in this specific context starts the object +scope, which also is stored for later scope specific variable access. + +If there's an apply rule defined, this follows the same principle. +The config parser detects the scope of an apply rule and generates +Icinga 2 C++ code for the parsed string tokens. + +``` +assign where host.vars.sla == "24x7" +``` + +is parsed into an assign token identifier, and the string expression +is compiled into a new `ApplyExpression` object. + +The flow control inside the parser ensures that for example `ignore where` +can only be defined when a previous `assign where` was given - or when +inside an apply for rule. + +Another example are specific object types which allow assign expression, +specifically group objects. Others objects must throw a configuration error. + +Please check `lib/config/config_parser.yy` for more details, +and the [language reference](17-language-reference.md#language-reference) chapter for +documented DSL keywords and sequences. + +> Icinga uses [Bison](https://en.wikipedia.org/wiki/GNU_bison) as parser generator +> which reads a specification of a context-free language, warns about any parsing +> ambiguities, and generates a parser in C++ which reads sequences of tokens and +> decides whether the sequence conforms to the syntax specified by the grammar. + + +### Compiler + +The config compiler initializes the scanner inside the [lexer](19-technical-concepts.md#technical-concepts-configuration-lexer) +stage. + +The configuration files are parsed into memory from inside the [daemon CLI command](19-technical-concepts.md#technical-concepts-application-cli-commands-daemon) +which invokes the config validation in `ValidateConfigFiles()`. This compiles the +files into an AST expression which is executed. + +At this stage, the expressions generate so-called "config items" which +are a pre-stage of the later compiled object. + +`ConfigItem::CommitItems` takes care of committing the items, and doing a +rollback on failure. It also checks against matching apply rules from the previous run +and generates statistics about the objects which can be seen by the config validation. + +`ConfigItem::CommitNewItems` collects the registered types and items, +and checks for a specific required order, e.g. a service object needs +a host object first. + +The following stages happen then: + +- **Commit**: A workqueue then commits the items in a parallel fashion for this specific type. The object gets its name, and the AST expression is executed. It is then registered into the item into `m_Object` as reference. +- **OnAllConfigLoaded**: Special signal for each object to pre-load required object attributes, resolve group membership, initialize functions and timers. +- **CreateChildObjects**: Run apply rules for this specific type. +- **CommitNewItems**: Apply rules may generate new config items, this is to ensure that they again run through the stages. + +Note that the items are now committed and the configuration is validated and loaded +into memory. The final config objects are not yet activated though. + +This only happens after the validation, when the application is about to be run +with `ConfigItem::ActivateItems`. + +Each item has an object created in `m_Object` which is checked in a loop. +Again, the dependency order of activated objects is important here, e.g. logger features come first, then +config objects and last the checker, api, etc. features. This is done by sorting the objects +based on their type specific activation priority. + +The following signals are triggered in the stages: + +- **PreActivate**: Setting the `active` flag for the config object. +- **Activate**: Calls `Start()` on the object, sets the local HA authority and notifies subscribers that this object is now activated (e.g. for config updates in the DB backend). + + +### References + +* [The Icinga Config Compiler: An Overview](https://www.netways.de/blog/2018/07/12/the-icinga-config-compiler-an-overview/) +* [A parser/lexer/compiler for the Leonardo language](https://github.com/EmilGedda/Leonardo) +* [I wrote a programming language. Here’s how you can, too.](https://medium.freecodecamp.org/the-programming-language-pipeline-91d3f449c919) +* [http://onoffswitch.net/building-a-custom-lexer/](http://onoffswitch.net/building-a-custom-lexer/) +* [Writing an Interpreter with Lex, Yacc, and Memphis](http://memphis.compilertools.net/interpreter.html) +* [Flex](https://github.com/westes/flex) +* [GNU Bison](https://www.gnu.org/software/bison/) + +## Core + +### Core: Reload Handling + +The initial design of the reload state machine looks like this: + +* receive reload signal SIGHUP +* fork a child process, start configuration validation in parallel work queues +* parent process continues with old configuration objects and the event scheduling +(doing checks, replicating cluster events, triggering alert notifications, etc.) +* validation NOT ok: child process terminates, parent process continues with old configuration state +* validation ok: child process signals parent process to terminate and save its current state (all events until now) into the icinga2 state file +* parent process shuts down writing icinga2.state file +* child process waits for parent process gone, reads the icinga2 state file and synchronizes all historical and status data +* child becomes the new session leader + +Since Icinga 2.6, there are two processes when checked with `ps aux | grep icinga2` or `pidof icinga2`. +This was to ensure that feature file descriptors don't leak into the plugin process (e.g. DB IDO MySQL sockets). + +Icinga 2.9 changed the reload handling a bit with SIGUSR2 signals +and systemd notifies. + +With systemd, it could occur that the tree was broken thus resulting +in killing all remaining processes on stop, instead of a clean exit. +You can read the full story [here](https://github.com/Icinga/icinga2/issues/7309). + +With 2.11 you'll now see 3 processes: + +- The umbrella process which takes care about signal handling and process spawning/stopping +- The main process with the check scheduler, notifications, etc. +- The execution helper process + +During reload, the umbrella process spawns a new reload process which validates the configuration. +Once successful, the new reload process signals the umbrella process that it is finished. +The umbrella process forwards the signal and tells the old main process to shutdown. +The old main process writes the icinga2.state file. The umbrella process signals +the reload process that the main process terminated. + +The reload process was in idle wait before, and now continues to read the written +state file and run the event loop (checks, notifications, "events", ...). The reload +process itself also spawns the execution helper process again. + + +## Features + +Features are implemented in specific libraries and can be enabled +using CLI commands. + +Features either write specific data or receive data. + +Examples for writing data: [DB IDO](14-features.md#db-ido), [Graphite](14-features.md#graphite-carbon-cache-writer), [InfluxDB](14-features.md#influxdb-writer). [GELF](14-features.md#gelfwriter), etc. +Examples for receiving data: [REST API](12-icinga2-api.md#icinga2-api), etc. + +The implementation of features makes use of existing libraries +and functionality. This makes the code more abstract, but shorter +and easier to read. + +Features register callback functions on specific events they want +to handle. For example the `GraphiteWriter` feature subscribes to +new CheckResult events. + +Each time Icinga 2 receives and processes a new check result, this +event is triggered and forwarded to all subscribers. + +The GraphiteWriter feature calls the registered function and processes +the received data. Features which connect Icinga 2 to external interfaces +normally parse and reformat the received data into an applicable format. + +Since this check result signal is blocking, many of the features include a work queue +with asynchronous task handling. + +The GraphiteWriter uses a TCP socket to communicate with the carbon cache +daemon of Graphite. The InfluxDBWriter is instead writing bulk metric messages +to InfluxDB's HTTP API, similar to Elasticsearch. + + +## Check Scheduler + +The check scheduler starts a thread which loops forever. It waits for +check events being inserted into `m_IdleCheckables`. + +If the current pending check event number is larger than the configured +max concurrent checks, the thread waits up until it there's slots again. + +In addition, further checks on enabled checks, check periods, etc. are +performed. Once all conditions have passed, the next check timestamp is +calculated and updated. This also is the timestamp where Icinga expects +a new check result ("freshness check"). + +The object is removed from idle checkables, and inserted into the +pending checkables list. This can be seen via REST API metrics for the +checker component feature as well. + +The actual check execution happens asynchronously using the application's +thread pool. + +Once the check returns, it is removed from pending checkables and again +inserted into idle checkables. This ensures that the scheduler takes this +checkable event into account in the next iteration. + +### Start + +When checkable objects get activated during the startup phase, +the checker feature registers a handler for this event. This is due +to the fact that the `checker` feature is fully optional, and e.g. not +used on command endpoint clients. + +Whenever such an object activation signal is triggered, Icinga 2 checks +whether it is [authoritative for this object](19-technical-concepts.md#technical-concepts-cluster-ha-object-authority). +This means that inside an HA enabled zone with two endpoints, only non-paused checkable objects are +actively inserted into the idle checkable list for the check scheduler. + +### Initial Check + +When a new checkable object (host or service) is initially added to the +configuration, Icinga 2 performs the following during startup: + +* `Checkable::Start()` is called and calculates the first check time +* With a spread delta, the next check time is actually set. + +If the next check should happen within a time frame of 60 seconds, +Icinga 2 calculates a delta from a random value. The minimum of `check_interval` +and 60 seconds is used as basis, multiplied with a random value between 0 and 1. + +In the best case, this check gets immediately executed after application start. +The worst case scenario is that the check is scheduled 60 seconds after start +the latest. + +The reasons for delaying and spreading checks during startup is that +the application typically needs more resources at this time (cluster connections, +feature warmup, initial syncs, etc.). Immediate check execution with +thousands of checks could lead into performance problems, and additional +events for each received check results. + +Therefore the initial check window is 60 seconds on application startup, +random seed for all checkables. This is not predictable over multiple restarts +for specific checkable objects, the delta changes every time. + +### Scheduling Offset + +There's a high chance that many checkable objects get executed at the same time +and interval after startup. The initial scheduling spreads that a little, but +Icinga 2 also attempts to ensure to keep fixed intervals, even with high check latency. + +During startup, Icinga 2 calculates the scheduling offset from a random number: + +* `Checkable::Checkable()` calls `SetSchedulingOffset()` with `Utility::Random()` +* The offset is a pseudo-random integral value between `0` and `RAND_MAX`. + +Whenever the next check time is updated with `Checkable::UpdateNextCheck()`, +the scheduling offset is taken into account. + +Depending on the state type (SOFT or HARD), either the `retry_interval` or `check_interval` +is used. If the interval is greater than 1 second, the time adjustment is calculated in the +following way: + +`now * 100 + offset` divided by `interval * 100`, using the remainder (that's what `fmod()` is for) +and dividing this again onto base 100. + +Example: offset is 6500, interval 300, now is 1542190472. + +``` +1542190472 * 100 + 6500 = 154219053714 +300 * 100 = 30000 +154219053714 / 30000 = 5140635.1238 + +(5140635.1238 - 5140635.0) * 30000 = 3714 +3714 / 100 = 37.14 +``` + +37.15 seconds as an offset would be far too much, so this is again used as a calculation divider for the +real offset with the base of 5 times the actual interval. + +Again, the remainder is calculated from the offset and `interval * 5`. This is divided onto base 100 again, +with an additional 0.5 seconds delay. + +Example: offset is 6500, interval 300. + +``` +6500 / 300 = 21.666666666666667 +(21.666666666666667 - 21.0) * 300 = 200 +200 / 100 = 2 +2 + 0.5 = 2.5 +``` + +The minimum value between the first adjustment and the second offset calculation based on the interval is +taken, in the above example `2.5` wins. + +The actual next check time substracts the adjusted time from the future interval addition to provide +a more widespread scheduling time among all checkable objects. + +`nextCheck = now - adj + interval` + +You may ask, what other values can happen with this offset calculation. Consider calculating more examples +with different interval settings. + +Example: offset is 34567, interval 60, now is 1542190472. + +``` +1542190472 * 100 + 34567 = 154219081767 +60 * 100 = 6000 +154219081767 / 6000 = 25703180.2945 +(25703180.2945 - 25703180.0) * 6000 / 100 = 17.67 + +34567 / 60 = 576.116666666666667 +(576.116666666666667 - 576.0) * 60 / 100 + 0.5 = 1.2 +``` + +`1m` interval starts at `now + 1.2s`. + +Example: offset is 12345, interval 86400, now is 1542190472. + +``` +1542190472 * 100 + 12345 = 154219059545 +86400 * 100 = 8640000 +154219059545 / 8640000 = 17849.428188078703704 +(17849.428188078703704 - 17849) * 8640000 = 3699545 +3699545 / 100 = 36995.45 + +12345 / 86400 = 0.142881944444444 +0.142881944444444 * 86400 / 100 + 0.5 = 123.95 +``` + +`1d` interval starts at `now + 2m4s`. + +> **Note** +> +> In case you have a better algorithm at hand, feel free to discuss this in a PR on GitHub. +> It needs to fulfill two things: 1) spread and shuffle execution times on each `next_check` update +> 2) not too narrowed window for both long and short intervals +> Application startup and initial checks need to be handled with care in a slightly different +> fashion. + +When `SetNextCheck()` is called, there are signals registered. One of them sits +inside the `CheckerComponent` class whose handler `CheckerComponent::NextCheckChangedHandler()` +deletes/inserts the next check event from the scheduling queue. This basically +is a list with multiple indexes with the keys for scheduling info and the object. + + +## Checks + +### Check Latency and Execution Time + +Each check command execution logs the start and end time where +Icinga 2 (and the end user) is able to calculate the plugin execution time from it. + +```cpp +GetExecutionEnd() - GetExecutionStart() +``` + +The higher the execution time, the higher the command timeout must be set. Furthermore +users and developers are encouraged to look into plugin optimizations to minimize the +execution time. Sometimes it is better to let an external daemon/script do the checks +and feed them back via REST API. + +Icinga 2 stores the scheduled start and end time for a check. If the actual +check execution time differs from the scheduled time, e.g. due to performance +problems or limited execution slots (concurrent checks), this value is stored +and computed from inside the check result. + +The difference between the two deltas is called `check latency`. + +```cpp +(GetScheduleEnd() - GetScheduleStart()) - CalculateExecutionTime() +``` + +### Severity + +The severity attribute is introduced with Icinga v2.11 and provides +a bit mask calculated value from specific checkable object states. + +The severity value is pre-calculated for visualization interfaces +such as Icinga Web which sorts the problem dashboard by severity by default. + +The higher the severity number is, the more important the problem is. +However, the formula can change across Icinga 2 releases. + + +## Cluster + +This documentation refers to technical roles between cluster +endpoints. + +- The `server` or `parent` role accepts incoming connection attempts and handles requests +- The `client` role actively connects to remote endpoints receiving config/commands, requesting certificates, etc. + +A client role is not necessarily bound to the Icinga agent. +It may also be a satellite which actively connects to the +master. + +### Communication + +Icinga 2 uses its own certificate authority (CA) by default. The +public and private CA keys can be generated on the signing master. + +Each node certificate must be signed by the private CA key. + +Note: The following description uses `parent node` and `child node`. +This also applies to nodes in the same cluster zone. + +During the connection attempt, a TLS handshake is performed. +If the public certificate of a child node is not signed by the same +CA, the child node is not trusted and the connection will be closed. + +If the TLS handshake succeeds, the parent node reads the +certificate's common name (CN) of the child node and looks for +a local Endpoint object name configuration. + +If there is no Endpoint object found, further communication +(runtime and config sync, etc.) is terminated. + +The child node also checks the CN from the parent node's public +certificate. If the child node does not find any local Endpoint +object name configuration, it will not trust the parent node. + +Both checks prevent accepting cluster messages from an untrusted +source endpoint. + +If an Endpoint match was found, there is one additional security +mechanism in place: Endpoints belong to a Zone hierarchy. + +Several cluster messages can only be sent "top down", others like +check results are allowed being sent from the child to the parent node. + +Once this check succeeds the cluster messages are exchanged and processed. + + +### CSR Signing + +In order to make things easier, Icinga 2 provides built-in methods +to allow child nodes to request a signed certificate from the +signing master. + +Icinga 2 v2.8 introduces the possibility to request certificates +from indirectly connected nodes. This is required for multi level +cluster environments with masters, satellites and agents. + +CSR Signing in general starts with the master setup. This step +ensures that the master is in a working CSR signing state with: + +* public and private CA key in `/var/lib/icinga2/ca` +* private `TicketSalt` constant defined inside the `api` feature +* Cluster communication is ready and Icinga 2 listens on port 5665 + +The child node setup which is run with CLI commands will now +attempt to connect to the parent node. This is not necessarily +the signing master instance, but could also be a parent satellite node. + +During this process the child node asks the user to verify the +parent node's public certificate to prevent MITM attacks. + +There are two methods to request signed certificates: + +* Add the ticket into the request. This ticket was generated on the master +beforehand and contains hashed details for which client it has been created. +The signing master uses this information to automatically sign the certificate +request. + +* Do not add a ticket into the request. It will be sent to the signing master +which stores the pending request. Manual user interaction with CLI commands +is necessary to sign the request. + +The certificate request is sent as `pki::RequestCertificate` cluster +message to the parent node. + +If the parent node is not the signing master, it stores the request +in `/var/lib/icinga2/certificate-requests` and forwards the +cluster message to its parent node. + +Once the message arrives on the signing master, it first verifies that +the sent certificate request is valid. This is to prevent unwanted errors +or modified requests from the "proxy" node. + +After verification, the signing master checks if the request contains +a valid signing ticket. It hashes the certificate's common name and +compares the value to the received ticket number. + +If the ticket is valid, the certificate request is immediately signed +with CA key. The request is sent back to the client inside a `pki::UpdateCertificate` +cluster message. + +If the child node was not the certificate request origin, it only updates +the cached request for the child node and send another cluster message +down to its child node (e.g. from a satellite to an agent). + + +If no ticket was specified, the signing master waits until the +`ca sign` CLI command manually signed the certificate. + +> **Note** +> +> Push notifications for manual request signing is not yet implemented (TODO). + +Once the child node reconnects it synchronizes all signed certificate requests. +This takes some minutes and requires all nodes to reconnect to each other. + + +#### CSR Signing: Clients without parent connection + +There is an additional scenario: The setup on a child node does +not necessarily need a connection to the parent node. + +This mode leaves the node in a semi-configured state. You need +to manually copy the master's public CA key into `/var/lib/icinga2/certs/ca.crt` +on the client before starting Icinga 2. + +> **Note** +> +> The `client` in this case can be either a satellite or an agent. + +The parent node needs to actively connect to the child node. +Once this connections succeeds, the child node will actively +request a signed certificate. + +The update procedure works the same way as above. + +### High Availability + +General high availability is automatically enabled between two endpoints in the same +cluster zone. + +**This requires the same configuration and enabled features on both nodes.** + +HA zone members trust each other and share event updates as cluster messages. +This includes for example check results, next check timestamp updates, acknowledgements +or notifications. + +This ensures that both nodes are synchronized. If one node goes away, the +remaining node takes over and continues as normal. + +#### High Availability: Object Authority + +Cluster nodes automatically determine the authority for configuration +objects. By default, all config objects are set to `HARunEverywhere` and +as such the object authority is true for any config object on any instance. + +Specific objects can override and influence this setting, e.g. with `HARunOnce` +instead prior to config object activation. + +This is done when the daemon starts and in a regular interval inside +the ApiListener class, specifically calling `ApiListener::UpdateObjectAuthority()`. + +The algorithm works like this: + +* Determine whether this instance is assigned to a local zone and endpoint. +* Collects all endpoints in this zone if they are connected. +* If there's two endpoints, but only us seeing ourselves and the application start is less than 60 seconds in the past, do nothing (wait for cluster reconnect to take place, grace period). +* Sort the collected endpoints by name. +* Iterate over all config types and their respective objects + * Ignore !active objects + * Ignore objects which are !HARunOnce. This means, they can run multiple times in a zone and don't need an authority update. + * If this instance doesn't have a local zone, set authority to true. This is for non-clustered standalone environments where everything belongs to this instance. + * Calculate the object authority based on the connected endpoint names. + * Set the authority (true or false) + +The object authority calculation works "offline" without any message exchange. +Each instance alculates the SDBM hash of the config object name, puts that in contrast +modulo the connected endpoints size. +This index is used to lookup the corresponding endpoint in the connected endpoints array, +including the local endpoint. Whether the local endpoint is equal to the selected endpoint, +or not, this sets the authority to `true` or `false`. + +```cpp +authority = endpoints[Utility::SDBM(object->GetName()) % endpoints.size()] == my_endpoint; +``` + +`ConfigObject::SetAuthority(bool authority)` triggers the following events: + +* Authority is true and object now paused: Resume the object and set `paused` to `false`. +* Authority is false, object not paused: Pause the object and set `paused` to true. + +**This results in activated but paused objects on one endpoint.** You can verify +that by querying the `paused` attribute for all objects via REST API +or debug console on both endpoints. + +Endpoints inside a HA zone calculate the object authority independent from each other. +This object authority is important for selected features explained below. + +Since features are configuration objects too, you must ensure that all nodes +inside the HA zone share the same enabled features. If configured otherwise, +one might have a checker feature on the left node, nothing on the right node. +This leads to late check results because one half is not executed by the right +node which holds half of the object authorities. + +By default, features are enabled to "Run-Everywhere". Specific features which +support HA awareness, provide the `enable_ha` configuration attribute. When `enable_ha` +is set to `true` (usually the default), "Run-Once" is set and the feature pauses on one side. + +``` +vim /etc/icinga2/features-enabled/graphite.conf + +object GraphiteWriter "graphite" { + ... + enable_ha = true +} +``` + +Once such a feature is paused, there won't be any more event handling, e.g. the Elasticsearch +feature won't process any checkresults nor write to the Elasticsearch REST API. + +When the cluster connection drops, the feature configuration object is updated with +the new object authority by the ApiListener timer and resumes its operation. You can see +that by grepping the log file for `resumed` and `paused`. + +``` +[2018-10-24 13:28:28 +0200] information/GraphiteWriter: 'g-ha' paused. +``` + +``` +[2018-10-24 13:28:28 +0200] information/GraphiteWriter: 'g-ha' resumed. +``` + +Specific features with HA capabilities are explained below. + +#### High Availability: Checker + +The `checker` feature only executes checks for `Checkable` objects (Host, Service) +where it is authoritative. + +That way each node only executes checks for a segment of the overall configuration objects. + +The cluster message routing ensures that all check results are synchronized +to nodes which are not authoritative for this configuration object. + + +#### High Availability: Notifications + +The `notification` feature only sends notifications for `Notification` objects +where it is authoritative. + +That way each node only executes notifications for a segment of all notification objects. + +Notified users and other event details are synchronized throughout the cluster. +This is required if for example the DB IDO feature is active on the other node. + +#### High Availability: DB IDO + +If you don't have HA enabled for the IDO feature, both nodes will +write their status and historical data to their own separate database +backends. + +In order to avoid data separation and a split view (each node would require its +own Icinga Web 2 installation on top), the high availability option was added +to the DB IDO feature. This is enabled by default with the `enable_ha` setting. + +This requires a central database backend. Best practice is to use a MySQL cluster +with a virtual IP. + +Both Icinga 2 nodes require the connection and credential details configured in +their DB IDO feature. + +During startup Icinga 2 calculates whether the feature configuration object +is authoritative on this node or not. The order is an alpha-numeric +comparison, e.g. if you have `master1` and `master2`, Icinga 2 will enable +the DB IDO feature on `master2` by default. + +If the connection between endpoints drops, the object authority is re-calculated. + +In order to prevent data duplication in a split-brain scenario where both +nodes would write into the same database, there is another safety mechanism +in place. + +The split-brain decision which node will write to the database is calculated +from a quorum inside the `programstatus` table. Each node +verifies whether the `endpoint_name` column is not itself on database connect. +In addition to that the DB IDO feature compares the `last_update_time` column +against the current timestamp plus the configured `failover_timeout` offset. + +That way only one active DB IDO feature writes to the database, even if they +are not currently connected in a cluster zone. This prevents data duplication +in historical tables. + +### Health Checks + +#### cluster-zone + +This built-in check provides the possibility to check for connectivity between +zones. + +If you for example need to know whether the `master` zone is connected and processing +messages with the child zone called `satellite` in this example, you can configure +the [cluster-zone](10-icinga-template-library.md#itl-icinga-cluster-zone) check as new service on all `master` zone hosts. + +``` +vim /etc/zones.d/master/host1.conf + +object Service "cluster-zone-satellite" { + check_command = "cluster-zone" + host_name = "host1" + + vars.cluster_zone = "satellite" +} +``` + +The check itself changes to NOT-OK if one or more child endpoints in the child zone +are not connected to parent zone endpoints. + +In addition to the overall connectivity check, the log lag is calculated based +on the to-be-sent replay log. Each instance stores that for its configured endpoint +objects. + +This health check iterates over the target zone (`cluster_zone`) and their endpoints. + +The log lag is greater than zero if + +* the replay log synchronization is in progress and not yet finished or +* the endpoint is not connected, and no replay log sync happened (obviously). + +The final log lag value is the worst value detected. If satellite1 has a log lag of +`1.5` and satellite2 only has `0.5`, the computed value will be `1.5.`. + +You can control the check state by using optional warning and critical thresholds +for the log lag value. + +If this service exists multiple times, e.g. for each master host object, the log lag +may differ based on the execution time. This happens for example on restart of +an instance when the log replay is in progress and a health check is executed at different +times. +If the endpoint is not connected, both master instances may have saved a different log replay +position from the last synchronisation. + +The lag value is returned as performance metric key `slave_lag`. + +Icinga 2 v2.9+ adds more performance metrics for these values: + +* `last_messages_sent` and `last_messages_received` as UNIX timestamp +* `sum_messages_sent_per_second` and `sum_messages_received_per_second` +* `sum_bytes_sent_per_second` and `sum_bytes_received_per_second` + + +### Config Sync + +The visible feature for the user is to put configuration files in `/etc/icinga2/zones.d/` +and have them synced automatically to all involved zones and endpoints. + +This not only includes host and service objects being checked +in a satellite zone, but also additional config objects such as +commands, groups, timeperiods and also templates. + +Additional thoughts and complexity added: + +- Putting files into zone directory names removes the burden to set the `zone` attribute on each object in this directory. This is done automatically by the config compiler. +- Inclusion of `zones.d` happens automatically, the user shouldn't be bothered about this. +- Before the REST API was created, only static configuration files in `/etc/icinga2/zones.d` existed. With the addition of config packages, additional `zones.d` targets must be registered (e.g. used by the Director) +- Only one config master is allowed. This one identifies itself with configuration files in `/etc/icinga2/zones.d`. This is not necessarily the zone master seen in the debug logs, that one is important for message routing internally. +- Objects and templates which cannot be bound into a specific zone (e.g. hosts in the satellite zone) must be made available "globally". +- Users must be able to deny the synchronisation of specific zones, e.g. for security reasons. + +#### Config Sync: Config Master + +All zones must be configured and included in the `zones.conf` config file beforehand. +The zone names are the identifier for the directories underneath the `/etc/icinga2/zones.d` +directory. If a zone is not configured, it will not be included in the config sync - keep this +in mind for troubleshooting. + +When the config master starts, the content of `/etc/icinga2/zones.d` is automatically +included. There's no need for an additional entry in `icinga2.conf` like `conf.d`. +You can verify this by running the config validation on debug level: + +``` +icinga2 daemon -C -x debug | grep 'zones.d' + +[2019-06-19 15:16:19 +0200] notice/ConfigCompiler: Compiling config file: /etc/icinga2/zones.d/global-templates/commands.conf +``` + +Once the config validation succeeds, the startup routine for the daemon +copies the files into the "production" directory in `/var/lib/icinga2/api/zones`. +This directory is used for all endpoints where Icinga stores the received configuration. +With the exception of the config master retrieving this from `/etc/icinga2/zones.d` instead. + +These operations are logged for better visibility. + +``` +[2019-06-19 15:26:38 +0200] information/ApiListener: Copying 1 zone configuration files for zone 'global-templates' to '/var/lib/icinga2/api/zones/global-templates'. +[2019-06-19 15:26:38 +0200] information/ApiListener: Updating configuration file: /var/lib/icinga2/api/zones/global-templates//_etc/commands.conf +``` + +The master is finished at this point. Depending on the cluster configuration, +the next iteration is a connected endpoint after successful TLS handshake and certificate +authentication. + +It calls `SendConfigUpdate(client)` which sends the [config::Update](19-technical-concepts.md#technical-concepts-json-rpc-messages-config-update) +JSON-RPC message including all required zones and their configuration file content. + + +#### Config Sync: Receive Config + +The secondary master endpoint and endpoints in a child zone will be connected to the config +master. The endpoint receives the [config::Update](19-technical-concepts.md#technical-concepts-json-rpc-messages-config-update) +JSON-RPC message and processes the content in `ConfigUpdateHandler()`. This method checks +whether config should be accepted. In addition to that, it locks a local mutex to avoid race conditions +with multiple syncs in parallel. + +After that, the received configuration content is analysed. + +> **Note** +> +> The cluster design allows that satellite endpoints may connect to the secondary master first. +> There is no immediate need to always connect to the config master first, especially since +> the satellite endpoints don't know that. +> +> The secondary master not only stores the master zone config files, but also all child zones. +> This is also the case for any HA enabled zone with more than one endpoint. + + +2.11 puts the received configuration files into a staging directory in +`/var/lib/icinga2/api/zones-stage`. Previous versions directly wrote the +files into production which could have led to broken configuration on the +next manual restart. + +``` +[2019-06-19 16:08:29 +0200] information/ApiListener: New client connection for identity 'master1' to [127.0.0.1]:5665 +[2019-06-19 16:08:30 +0200] information/ApiListener: Applying config update from endpoint 'master1' of zone 'master'. +[2019-06-19 16:08:30 +0200] information/ApiListener: Received configuration for zone 'agent' from endpoint 'master1'. Comparing the checksums. +[2019-06-19 16:08:30 +0200] information/ApiListener: Stage: Updating received configuration file '/var/lib/icinga2/api/zones-stage/agent//_etc/host.conf' for zone 'agent'. +[2019-06-19 16:08:30 +0200] information/ApiListener: Applying configuration file update for path '/var/lib/icinga2/api/zones-stage/agent' (176 Bytes). +[2019-06-19 16:08:30 +0200] information/ApiListener: Received configuration for zone 'master' from endpoint 'master1'. Comparing the checksums. +[2019-06-19 16:08:30 +0200] information/ApiListener: Applying configuration file update for path '/var/lib/icinga2/api/zones-stage/master' (17 Bytes). +[2019-06-19 16:08:30 +0200] information/ApiListener: Received configuration from endpoint 'master1' is different to production, triggering validation and reload. +``` + +It then validates the received configuration in its own config stage. There is +an parameter override in place which disables the automatic inclusion of the production +config in `/var/lib/icinga2/api/zones`. + +Once completed, the reload is triggered. This follows the same configurable timeout +as with the global reload. + +``` +[2019-06-19 16:52:26 +0200] information/ApiListener: Config validation for stage '/var/lib/icinga2/api/zones-stage/' was OK, replacing into '/var/lib/icinga2/api/zones/' and triggering reload. +[2019-06-19 16:52:27 +0200] information/Application: Got reload command: Started new instance with PID '19945' (timeout is 300s). +[2019-06-19 16:52:28 +0200] information/Application: Reload requested, letting new process take over. +``` + +Whenever the staged configuration validation fails, Icinga logs this including a reference +to the startup log file which includes additional errors. + +``` +[2019-06-19 15:45:27 +0200] critical/ApiListener: Config validation failed for staged cluster config sync in '/var/lib/icinga2/api/zones-stage/'. Aborting. Logs: '/var/lib/icinga2/api/zones-stage//startup.log' +``` + + +#### Config Sync: Changes and Reload + +Whenever a new configuration is received, it is validated and upon success, the +daemon automatically reloads. While the daemon continues with checks, the reload +cannot hand over open TCP connections. That being said, reloading the daemon everytime +a configuration is synchronized would lead into many not connected endpoints. + +Therefore the cluster config sync checks whether the configuration files actually +changed, and will only trigger a reload when such a change happened. + +2.11 calculates a checksum from each file content and compares this to the +production configuration. Previous versions used additional metadata with timestamps from +files which sometimes led to problems with asynchronous dates. + +> **Note** +> +> For compatibility reasons, the timestamp metadata algorithm is still intact, e.g. +> when the client is 2.11 already, but the parent endpoint is still on 2.10. + +Icinga logs a warning when this happens. + +``` +Received configuration update without checksums from parent endpoint satellite1. This behaviour is deprecated. Please upgrade the parent endpoint to 2.11+ +``` + + +The debug log provides more details on the actual checksums and checks. Future output +may change, use this solely for troubleshooting and debugging whenever the cluster +config sync fails. + +``` +[2019-06-19 16:13:16 +0200] information/ApiListener: Received configuration for zone 'agent' from endpoint 'master1'. Comparing the checksums. +[2019-06-19 16:13:16 +0200] debug/ApiListener: Checking for config change between stage and production. Old (3): '{"/.checksums":"7ede1276a9a32019c1412a52779804a976e163943e268ec4066e6b6ec4d15d73","/.timestamp":"ec4354b0eca455f7c2ca386fddf5b9ea810d826d402b3b6ac56ba63b55c2892c","/_etc/host.conf":"35d4823684d83a5ab0ca853c9a3aa8e592adfca66210762cdf2e54339ccf0a44"}' vs. new (3): '{"/.checksums":"84a586435d732327e2152e7c9b6d85a340cc917b89ae30972042f3dc344ea7cf","/.timestamp":"0fd6facf35e49ab1b2a161872fa7ad794564eba08624373d99d31c32a7a4c7d3","/_etc/host.conf":"0d62075e89be14088de1979644b40f33a8f185fcb4bb6ff1f7da2f63c7723fcb"}'. +[2019-06-19 16:13:16 +0200] debug/ApiListener: Checking /_etc/host.conf for checksum: 35d4823684d83a5ab0ca853c9a3aa8e592adfca66210762cdf2e54339ccf0a44 +[2019-06-19 16:13:16 +0200] debug/ApiListener: Path '/_etc/host.conf' doesn't match old checksum '0d62075e89be14088de1979644b40f33a8f185fcb4bb6ff1f7da2f63c7723fcb' with new checksum '35d4823684d83a5ab0ca853c9a3aa8e592adfca66210762cdf2e54339ccf0a44'. +``` + + +#### Config Sync: Trust + +The config sync follows the "top down" approach, where the master endpoint in the master +zone is allowed to synchronize configuration to the child zone, e.g. the satellite zone. + +Endpoints in the same zone, e.g. a secondary master, receive configuration for the same +zone and all child zones. + +Endpoints in the satellite zone trust the parent zone, and will accept the pushed +configuration via JSON-RPC cluster messages. By default, this is disabled and must +be enabled with the `accept_config` attribute in the ApiListener feature (manually or with CLI +helpers). + +The satellite zone will not only accept zone configuration for its own zone, but also +all configured child zones. That is why it is important to configure the zone hierarchy +on the satellite as well. + +Child zones are not allowed to sync configuration up to the parent zone. Each Icinga instance +evaluates this in startup and knows on endpoint connect which config zones need to be synced. + + +Global zones have a special trust relationship: They are synced to all child zones, be it +a satellite zone or agent zone. Since checkable objects such as a Host or a Service object +must have only one endpoint as authority, they cannot be put into a global zone (denied by +the config compiler). + +Apply rules and templates are allowed, since they are evaluated in the endpoint which received +the synced configuration. Keep in mind that there may be differences on the master and the satellite +when e.g. hostgroup membership is used for assign where expressions, but the groups are only +available on the master. + + +### Cluster: Message Routing + +One fundamental part of the cluster message routing is the MessageOrigin object. +This is created when a new JSON-RPC message is received in `JsonRpcConnection::MessageHandler()`. + +It contains + +- FromZone being extracted from the endpoint object which owns the JsonRpcConnection +- FromClient being the JsonRpcConnection bound to the endpoint object + +These attributes are checked in message receive api handlers for security access. E.g. whether a +message origin is from a child zone which is not allowed, etc. +This is explained in the [JSON-RPC messages](19-technical-concepts.md#technical-concepts-json-rpc-messages) chapter. + +Whenever such a message is processed on the client, it may trigger additional cluster events +which are sent back to other endpoints. Therefore it is key to always pass the MessageOrigin +`origin` when processing these messages locally. + +Example: + +- Client receives a CheckResult from another endpoint in the same zone, call it `sender` for now +- Calls ProcessCheckResult() to store the CR and calculcate states, notifications, etc. +- Calls the OnNewCheckResult() signal to trigger IDO updates + +OnNewCheckResult() also calls a registered cluster handler which forwards the CheckResult to other cluster members. + +Without any origin details, this CheckResult would be relayed to the `sender` endpoint again. +Which processes the message, ProcessCheckResult(), OnNewCheckResult(), sends back and so on. + +That creates a loop which our cluster protocol needs to prevent at all cost. + +RelayMessageOne() takes care of the routing. This involves fetching the targetZone for this message and its endpoints. + +- Don't relay messages to ourselves. +- Don't relay messages to disconnected endpoints. +- Don't relay the message to the zone through more than one endpoint unless this is our own zone. +- Don't relay messages back to the endpoint which we got the message from. **THIS** +- Don't relay messages back to the zone which we got the message from. +- Only relay message to the zone master if we're not currently the zone master. + +``` + e1 is zone master, e2 and e3 are zone members. + + Message is sent from e2 or e3: + !isMaster == true + targetEndpoint e1 is zone master -> send the message + targetEndpoint e3 is not zone master -> skip it, avoid routing loops + + Message is sent from e1: + !isMaster == false -> send the messages to e2 and e3 being the zone routing master. +``` + +With passing the `origin` the following condition prevents sending a message back to sender: + +```cpp +if (origin && origin->FromClient && targetEndpoint == origin->FromClient->GetEndpoint()) { +``` + +This message then simply gets skipped for this specific Endpoint and is never sent. + +This analysis originates from a long-lasting [downtime loop bug](https://github.com/Icinga/icinga2/issues/7198). + +## TLS Network IO + +### TLS Connection Handling + +Icinga supports two connection directions, controlled via the `host` attribute +inside the Endpoint objects: + +* Outgoing connection attempts +* Incoming connection handling + +Once the connection is established, higher layers can exchange JSON-RPC and +HTTP messages. It doesn't matter which direction these message go. + +This offers a big advantage over single direction connections, just like +polling via HTTP only. Also, connections are kept alive as long as data +is transmitted. + +When the master connects to the child zone member(s), this requires more +resources there. Keep this in mind when endpoints are not reachable, the +TCP timeout blocks other resources. Moving a satellite zone in the middle +between masters and agents helps to split the tasks - the master +processes and stores data, deploys configuration and serves the API. The +satellites schedule the checks, connect to the agents and receive +check results. + +Agents/Clients can also connect to the parent endpoints - be it a master or +a satellite. This is the preferred way out of a DMZ, and also reduces the +overhead with connecting to e.g. 2000 agents on the master. You can +benchmark this when TCP connections are broken and timeouts are encountered. + +#### Master Processes Incoming Connection + +* The node starts a new ApiListener, this invokes `AddListener()` + * Setup TLS Context (SslContext) + * Initialize global I/O engine and create a TCP acceptor + * Resolve bind host/port (optional) + * Listen on IPv4 and IPv6 + * Re-use socket address and port + * Listen on port 5665 with `INT_MAX` possible sockets +* Spawn a new Coroutine which listens for new incoming connections as 'TCP server' pattern + * Accept new connections asynchronously + * Spawn a new Coroutine which handles the new client connection in a different context, Role: Server + +#### Master Connects Outgoing + +* The node starts a timer in a 10 seconds interval with `ApiReconnectTimerHandler()` as callback + * Loop over all configured zones, exclude global zones and not direct parent/child zones + * Get the endpoints configured in the zones, exclude: local endpoint, no 'host' attribute, already connected or in progress + * Call `AddConnection()` +* Spawn a new Coroutine after making the TLS context + * Use the global I/O engine for socket I/O + * Create TLS stream + * Connect to endpoint host/port details + * Handle the client connection, Role: Client + +#### TLS Handshake + +* Create a TLS connection in sslConn and perform an asynchronous TLS handshake +* Get the peer certificate +* Verify the presented certificate: `ssl::verify_peer` and `ssl::verify_client_once` +* Get the certificate CN and compare it against the endpoint name - if not matching, return and close the connection + +#### Data Exchange + +Everything runs through TLS, we don't use any "raw" connections nor plain message handling. + +HTTP and JSON-RPC messages share the same port and API, so additional handling is required. + +On a new connection and successful TLS handshake, the first byte is read. This either +is a JSON-RPC message in Netstring format starting with a number, or plain HTTP. + +``` +HTTP/1.1 + +2:{} +``` + +Depending on this, `ClientJsonRpc` or `ClientHttp` are assigned. + +JSON-RPC: + +* Create a new JsonRpcConnection object + * When the endpoint object is configured, spawn a Coroutine which takes care of syncing the client (file and runtime config, replay log, etc.) + * No endpoint treats this connection as anonymous client, with a configurable limit. This client may send a CSR signing request for example. + * Start the JsonRpcConnection - this spawns Coroutines to HandleIncomingMessages, WriteOutgoingMessages, HandleAndWriteHeartbeats and CheckLiveness + +HTTP: + +* Create a new HttpServerConnection + * Start the HttpServerConnection - this spawns Coroutines to ProcessMessages and CheckLiveness + + +All the mentioned Coroutines run asynchronously using the global I/O engine's context. +More details on this topic can be found in [this blogpost](https://www.netways.de/blog/2019/04/04/modern-c-programming-coroutines-with-boost/). + +The lower levels of context switching and sharing or event polling are +hidden in Boost ASIO, Beast, Coroutine and Context libraries. + +#### Data Exchange: Coroutines and I/O Engine + +Light-weight and fast operations such as connection handling or TLS handshakes +are performed in the default `IoBoundWorkSlot` pool inside the I/O engine. + +The I/O engine has another pool available: `CpuBoundWork`. + +This is used for processing CPU intensive tasks, such as handling a HTTP request. +Depending on the available CPU cores, this is limited to `std::thread::hardware_concurrency() * 3u / 2u`. + +``` +1 core * 3 / 2 = 1 +2 cores * 3 / 2 = 3 +8 cores * 3 / 2 = 12 +16 cores * 3 / 2 = 24 +``` + +The I/O engine itself is used with all network I/O in Icinga, not only the cluster +and the REST API. Features such as Graphite, InfluxDB, etc. also consume its functionality. + +There are 2 * CPU cores threads available which run the event loop +in the I/O engine. This polls the I/O service with `m_IoService.run();` +and triggers an asynchronous event progress for waiting coroutines. + + + +## JSON-RPC Message API + +**The JSON-RPC message API is not a public API for end users.** In case you want +to interact with Icinga, use the [REST API](12-icinga2-api.md#icinga2-api). + +This section describes the internal cluster messages exchanged between endpoints. + +> **Tip** +> +> Debug builds with `icinga2 daemon -DInternal.DebugJsonRpc=1` unveils the JSON-RPC messages. + +### Registered Handler Functions + +Functions by example: + +Event Sender: `Checkable::OnNewCheckResult` + +``` +On.connect(&xyzHandler) +``` + +Event Receiver (Client): `CheckResultAPIHandler` in `REGISTER_APIFUNCTION` + +``` +APIHandler() +``` + +### Messages + +#### icinga::Hello + +> Location: `apilistener.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | icinga::Hello +params | Dictionary + +##### Params + +Key | Type | Description +---------------------|-------------|------------------ +capabilities | Number | Bitmask, see `lib/remote/apilistener.hpp`. +version | Number | Icinga 2 version, e.g. 21300 for v2.13.0. + +##### Functions + +Event Sender: When a new client connects in `NewClientHandlerInternal()`. +Event Receiver: `HelloAPIHandler` + +##### Permissions + +None, this is a required message. + +#### event::Heartbeat + +> Location: `jsonrpcconnection-heartbeat.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::Heartbeat +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +timeout | Number | Heartbeat timeout, sender sets 120s. + + +##### Functions + +Event Sender: `JsonRpcConnection::HeartbeatTimerHandler` +Event Receiver: `HeartbeatAPIHandler` + +Both sender and receiver exchange this heartbeat message. If the sender detects +that a client endpoint hasn't sent anything in the updated timeout span, it disconnects +the client. This is to avoid stale connections with no message processing. + +##### Permissions + +None, this is a required message. + +#### event::CheckResult + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::CheckResult +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +host | String | Host name +service | String | Service name +cr | Serialized CR | Check result + +##### Functions + +Event Sender: `Checkable::OnNewCheckResult` +Event Receiver: `CheckResultAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Hosts/services do not exist +* Origin is a remote command endpoint different to the configured, and whose zone is not allowed to access this checkable. + +#### event::SetNextCheck + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetNextCheck +params | Dictionary + +##### Params + +Key | Type | Description +------------|---------------|------------------ +host | String | Host name +service | String | Service name +next\_check | Timestamp | Next scheduled time as UNIX timestamp. + +##### Functions + +Event Sender: `Checkable::OnNextCheckChanged` +Event Receiver: `NextCheckChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetLastCheckStarted + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetLastCheckStarted +params | Dictionary + +##### Params + +Key | Type | Description +---------------------|-----------|------------------ +host | String | Host name +service | String | Service name +last\_check\_started | Timestamp | Last check's start time as UNIX timestamp. + +##### Functions + +Event Sender: `Checkable::OnLastCheckStartedChanged` +Event Receiver: `LastCheckStartedChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetStateBeforeSuppression + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------------------------------- +jsonrpc | 2.0 +method | event::SetStateBeforeSuppression +params | Dictionary + +##### Params + +Key | Type | Description +---------------------------|--------|----------------------------------------------- +host | String | Host name +service | String | Service name +state\_before\_suppression | Number | Checkable state before the current suppression + +##### Functions + +Event Sender: `Checkable::OnStateBeforeSuppressionChanged` +Event Receiver: `StateBeforeSuppressionChangedAPIHandler` + +Used to sync the checkable state from before a notification suppression (for example +because the checkable is in a downtime) started within the same HA zone. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint is not within the local zone. + +#### event::SetSuppressedNotifications + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetSuppressedNotifications +params | Dictionary + +##### Params + +Key | Type | Description +-------------------------|---------------|------------------ +host | String | Host name +service | String | Service name +supressed\_notifications | Number | Bitmask for suppressed notifications. + +##### Functions + +Event Sender: `Checkable::OnSuppressedNotificationsChanged` +Event Receiver: `SuppressedNotificationsChangedAPIHandler` + +Used to sync the notification state of a host or service object within the same HA zone. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint is not within the local zone. + +#### event::SetSuppressedNotificationTypes + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetSuppressedNotificationTypes +params | Dictionary + +##### Params + +Key | Type | Description +-------------------------|--------|------------------ +notification | String | Notification name +supressed\_notifications | Number | Bitmask for suppressed notifications. + +Used to sync the state of a notification object within the same HA zone. + +##### Functions + +Event Sender: `Notification::OnSuppressedNotificationsChanged` +Event Receiver: `SuppressedNotificationTypesChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Notification does not exist. +* Origin endpoint is not within the local zone. + + +#### event::SetNextNotification + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetNextNotification +params | Dictionary + +##### Params + +Key | Type | Description +-------------------|---------------|------------------ +host | String | Host name +service | String | Service name +notification | String | Notification name +next\_notification | Timestamp | Next scheduled notification time as UNIX timestamp. + +##### Functions + +Event Sender: `Notification::OnNextNotificationChanged` +Event Receiver: `NextNotificationChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Notification does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetForceNextCheck + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetForceNextCheck +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +host | String | Host name +service | String | Service name +forced | Boolean | Forced next check (execute now) + +##### Functions + +Event Sender: `Checkable::OnForceNextCheckChanged` +Event Receiver: `ForceNextCheckChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetForceNextNotification + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetForceNextNotification +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +host | String | Host name +service | String | Service name +forced | Boolean | Forced next check (execute now) + +##### Functions + +Event Sender: `Checkable::SetForceNextNotification` +Event Receiver: `ForceNextNotificationChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetAcknowledgement + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetAcknowledgement +params | Dictionary + +##### Params + +Key | Type | Description +-----------|---------------|------------------ +host | String | Host name +service | String | Service name +author | String | Acknowledgement author name. +comment | String | Acknowledgement comment content. +acktype | Number | Acknowledgement type (0=None, 1=Normal, 2=Sticky) +notify | Boolean | Notification should be sent. +persistent | Boolean | Whether the comment is persistent. +expiry | Timestamp | Optional expire time as UNIX timestamp. + +##### Functions + +Event Sender: `Checkable::OnForceNextCheckChanged` +Event Receiver: `ForceNextCheckChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::ClearAcknowledgement + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::ClearAcknowledgement +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +host | String | Host name +service | String | Service name + +##### Functions + +Event Sender: `Checkable::OnAcknowledgementCleared` +Event Receiver: `AcknowledgementClearedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SendNotifications + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SendNotifications +params | Dictionary + +##### Params + +Key | Type | Description +----------|---------------|------------------ +host | String | Host name +service | String | Service name +cr | Serialized CR | Check result +type | Number | enum NotificationType, same as `types` for notification objects. +author | String | Author name +text | String | Notification text + +##### Functions + +Event Sender: `Checkable::OnNotificationsRequested` +Event Receiver: `SendNotificationsAPIHandler` + +Signals that notifications have to be sent within the same HA zone. This is relevant if the checkable and its +notifications are active on different endpoints. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint is not within the local zone. + +#### event::NotificationSentUser + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::NotificationSentUser +params | Dictionary + +##### Params + +Key | Type | Description +--------------|-----------------|------------------ +host | String | Host name +service | String | Service name +notification | String | Notification name. +user | String | Notified user name. +type | Number | enum NotificationType, same as `types` in Notification objects. +cr | Serialized CR | Check result. +author | String | Notification author (for specific types) +text | String | Notification text (for specific types) +command | String | Notification command name. + +##### Functions + +Event Sender: `Checkable::OnNotificationSentToUser` +Event Receiver: `NotificationSentUserAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone the same as the receiver. This binds notification messages to the HA zone. + +#### event::NotificationSentToAllUsers + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::NotificationSentToAllUsers +params | Dictionary + +##### Params + +Key | Type | Description +----------------------------|-----------------|------------------ +host | String | Host name +service | String | Service name +notification | String | Notification name. +users | Array of String | Notified user names. +type | Number | enum NotificationType, same as `types` in Notification objects. +cr | Serialized CR | Check result. +author | String | Notification author (for specific types) +text | String | Notification text (for specific types) +last\_notification | Timestamp | Last notification time as UNIX timestamp. +next\_notification | Timestamp | Next scheduled notification time as UNIX timestamp. +notification\_number | Number | Current notification number in problem state. +last\_problem\_notification | Timestamp | Last problem notification time as UNIX timestamp. +no\_more\_notifications | Boolean | Whether to send future notifications when this notification becomes active on this HA node. + +##### Functions + +Event Sender: `Checkable::OnNotificationSentToAllUsers` +Event Receiver: `NotificationSentToAllUsersAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone the same as the receiver. This binds notification messages to the HA zone. + +#### event::ExecuteCommand + +> Location: `clusterevents-check.cpp` and `checkable-check.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::ExecuteCommand +params | Dictionary + +##### Params + +Key | Type | Description +---------------|---------------|------------------ +host | String | Host name. +service | String | Service name. +command\_type | String | `check_command` or `event_command`. +command | String | CheckCommand or EventCommand name. +check\_timeout | Number | Check timeout of the checkable object, if specified as `check_timeout` attribute. +macros | Dictionary | Command arguments as key/value pairs for remote execution. +endpoint | String | The endpoint to execute the command on. +deadline | Number | A Unix timestamp indicating the execution deadline +source | String | The execution UUID + + +##### Functions + +**Event Sender:** This gets constructed directly in `Checkable::ExecuteCheck()`, `Checkable::ExecuteEventHandler()` or `ApiActions::ExecuteCommand()` when a remote command endpoint is configured. + +* `Get{CheckCommand,EventCommand}()->Execute()` simulates an execution and extracts all command arguments into the `macro` dictionary (inside lib/methods tasks). +* When the endpoint is connected, the message is constructed and sent directly. +* When the endpoint is not connected and not syncing replay logs and 5m after application start, generate an UNKNOWN check result for the user ("not connected"). + +**Event Receiver:** `ExecuteCommandAPIHandler` + +Special handling, calls `ClusterEvents::EnqueueCheck()` for command endpoint checks. +This function enqueues check tasks into a queue which is controlled in `RemoteCheckThreadProc()`. +If the `endpoint` parameter is specified and is not equal to the local endpoint then the message is forwarded to the correct endpoint zone. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Origin endpoint's zone is not a parent zone of the receiver endpoint. +* `accept_commands = false` in the `api` feature configuration sends back an UNKNOWN check result to the sender. + +The receiver constructs a virtual host object and looks for the local CheckCommand object. + +Returns UNKNOWN as check result to the sender + +* when the CheckCommand object does not exist. +* when there was an exception triggered from check execution, e.g. the plugin binary could not be executed or similar. + +The returned messages are synced directly to the sender's endpoint, no cluster broadcast. + +> **Note**: EventCommand errors are just logged on the remote endpoint. + +### event::UpdateExecutions + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::UpdateExecutions +params | Dictionary + +##### Params + +Key | Type | Description +---------------|---------------|------------------ +host | String | Host name. +service | String | Service name. +executions | Dictionary | Executions to be updated + +##### Functions + +**Event Sender:** `ClusterEvents::ExecutedCommandAPIHandler`, `ClusterEvents::UpdateExecutionsAPIHandler`, `ApiActions::ExecuteCommand` +**Event Receiver:** `ClusterEvents::UpdateExecutionsAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +### event::ExecutedCommand + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::ExecutedCommand +params | Dictionary + +##### Params + +Key | Type | Description +---------------|---------------|------------------ +host | String | Host name. +service | String | Service name. +execution | String | The execution ID executed. +exitStatus | Number | The command exit status. +output | String | The command output. +start | Number | The unix timestamp at the start of the command execution +end | Number | The unix timestamp at the end of the command execution + +##### Functions + +**Event Sender:** `ClusterEvents::ExecuteCheckFromQueue`, `ClusterEvents::ExecuteCommandAPIHandler` +**Event Receiver:** `ClusterEvents::ExecutedCommandAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint's zone is not allowed to access this checkable. + +#### event::SetRemovalInfo + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetRemovalInfo +params | Dictionary + +##### Params + +Key | Type | Description +---------------|-------------|--------------------------------- +object\_type | String | Object type (`"Comment"` or `"Downtime"`) +object\_name | String | Object name +removed\_by | String | Name of the removal requestor +remove\_time | Timestamp | Time of the remove operation + +##### Functions + +**Event Sender**: `Comment::OnRemovalInfoChanged` and `Downtime::OnRemovalInfoChanged` +**Event Receiver**: `SetRemovalInfoAPIHandler` + +This message is used to synchronize information about manual comment and downtime removals before deleting the +corresponding object. + +##### Permissions + +This message is only accepted from the local zone and from parent zones. + +#### config::Update + +> Location: `apilistener-filesync.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | config::Update +params | Dictionary + +##### Params + +Key | Type | Description +-----------|---------------|------------------ +update | Dictionary | Config file paths and their content. +update\_v2 | Dictionary | Additional meta config files introduced in 2.4+ for compatibility reasons. + +##### Functions + +**Event Sender:** `SendConfigUpdate()` called in `ApiListener::SyncClient()` when a new client endpoint connects. +**Event Receiver:** `ConfigUpdateHandler` reads the config update content and stores them in `/var/lib/icinga2/api`. +When it detects a configuration change, the function requests and application restart. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* The origin sender is not in a parent zone of the receiver. +* `api` feature does not accept config. + +Config updates will be ignored when: + +* The zone is not configured on the receiver endpoint. +* The zone is authoritative on this instance (this only happens on a master which has `/etc/icinga2/zones.d` populated, and prevents sync loops) + +#### config::UpdateObject + +> Location: `apilistener-configsync.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | config::UpdateObject +params | Dictionary + +##### Params + +Key | Type | Description +---------------------|-------------|------------------ +name | String | Object name. +type | String | Object type name. +version | Number | Object version. +config | String | Config file content for `_api` packages. +modified\_attributes | Dictionary | Modified attributes at runtime as key value pairs. +original\_attributes | Array | Original attributes as array of keys. + + +##### Functions + +**Event Sender:** Either on client connect (full sync), or runtime created/updated object + +`ApiListener::SendRuntimeConfigObjects()` gets called when a new endpoint is connected +and runtime created config objects need to be synced. This invokes a call to `UpdateConfigObject()` +to only sync this JsonRpcConnection client. + +`ConfigObject::OnActiveChanged` (created or deleted) or `ConfigObject::OnVersionChanged` (updated) +also call `UpdateConfigObject()`. + +**Event Receiver:** `ConfigUpdateObjectAPIHandler` calls `ConfigObjectUtility::CreateObject()` in order +to create the object if it is not already existing. Afterwards, all modified attributes are applied +and in case, original attributes are restored. The object version is set as well, keeping it in sync +with the sender. + +##### Permissions + +###### Sender + +Client receiver connects: + +The sender only syncs config object updates to a client which can access +the config object, in `ApiListener::SendRuntimeConfigObjects()`. + +In addition to that, the client endpoint's zone is checked whether this zone may access +the config object. + +Runtime updated object: + +Only if the config object belongs to the `_api` package. + + +###### Receiver + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Origin sender endpoint's zone is in a child zone. +* `api` feature does not accept config +* The received config object type does not exist (this is to prevent failures with older nodes and new object types). + +Error handling: + +* Log an error if `CreateObject` fails (only if the object does not already exist) +* Local object version is newer than the received version, object will not be updated. +* Compare modified and original attributes and restore any type of change here. + + +#### config::DeleteObject + +> Location: `apilistener-configsync.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | config::DeleteObject +params | Dictionary + +##### Params + +Key | Type | Description +--------------------|-------------|------------------ +name | String | Object name. +type | String | Object type name. +version | Number | Object version. + +##### Functions + +**Event Sender:** + +`ConfigObject::OnActiveChanged` (created or deleted) or `ConfigObject::OnVersionChanged` (updated) +call `DeleteConfigObject()`. + +**Event Receiver:** `ConfigDeleteObjectAPIHandler` + +##### Permissions + +###### Sender + +Runtime deleted object: + +Only if the config object belongs to the `_api` package. + +###### Receiver + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Origin sender endpoint's zone is in a child zone. +* `api` feature does not accept config +* The received config object type does not exist (this is to prevent failures with older nodes and new object types). +* The object in question was not created at runtime, it does not belong to the `_api` package. + +Error handling: + +* Log an error if `DeleteObject` fails (only if the object does not already exist) + +#### pki::RequestCertificate + +> Location: `jsonrpcconnection-pki.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | pki::RequestCertificate +params | Dictionary + +##### Params + +Key | Type | Description +--------------|---------------|------------------ +ticket | String | Own ticket, or as satellite in CA proxy from local store. +cert\_request | String | Certificate request content from local store, optional. + +##### Functions + +Event Sender: `RequestCertificateHandler` +Event Receiver: `RequestCertificateHandler` + +##### Permissions + +This is an anonymous request, and the number of anonymous clients can be configured +in the `api` feature. + +Only valid certificate request messages are processed, and valid signed certificates +won't be signed again. + +#### pki::UpdateCertificate + +> Location: `jsonrpcconnection-pki.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | pki::UpdateCertificate +params | Dictionary + +##### Params + +Key | Type | Description +---------------------|---------------|------------------ +status\_code | Number | Status code, 0=ok. +cert | String | Signed certificate content. +ca | String | Public CA certificate content. +fingerprint\_request | String | Certificate fingerprint from the CSR. + + +##### Functions + +**Event Sender:** + +* When a client requests a certificate in `RequestCertificateHandler` and the satellite +already has a signed certificate, the `pki::UpdateCertificate` message is constructed and sent back. +* When the endpoint holding the master's CA private key (and TicketSalt private key) is able to sign +the request, the `pki::UpdateCertificate` message is constructed and sent back. + +**Event Receiver:** `UpdateCertificateHandler` + +##### Permissions + +Message updates are dropped when + +* The origin sender is not in a parent zone of the receiver. +* The certificate fingerprint is in an invalid format. + +#### log::SetLogPosition + +> Location: `apilistener.cpp` and `jsonrpcconnection.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | log::SetLogPosition +params | Dictionary + +##### Params + +Key | Type | Description +--------------------|---------------|------------------ +log\_position | Timestamp | The endpoint's log position as UNIX timestamp. + + +##### Functions + +**Event Sender:** + +During log replay to a client endpoint in `ApiListener::ReplayLog()`, each processed +file generates a message which updates the log position timestamp. + +`ApiListener::ApiTimerHandler()` invokes a check to keep all connected endpoints and +their log position in sync during replay log. + +**Event Receiver:** `SetLogPositionHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. diff --git a/doc/20-script-debugger.md b/doc/20-script-debugger.md new file mode 100644 index 0000000..e8ee6db --- /dev/null +++ b/doc/20-script-debugger.md @@ -0,0 +1,177 @@ +# Script Debugger + +You can run the Icinga 2 daemon with the `-X` (`--script-debugger`) +parameter to enable the script debugger: + +```bash +icinga2 daemon -X +``` + +When an exception occurs or the [debugger](17-language-reference.md#breakpoints) +keyword is encountered in a user script, Icinga 2 launches a console that +allows the user to debug the script. + +You can also attach the script debugger to the [configuration validation](11-cli-commands.md#config-validation): + +```bash +icinga2 daemon -C -X +``` + +Here is a list of common errors which can be diagnosed with the script debugger: + +* Configuration errors e.g. [apply rules](03-monitoring-basics.md#using-apply) +* Errors in user-defined [functions](17-language-reference.md#functions) + +## Debugging Configuration Errors + +The following example illustrates the problem of a service [apply rule](03-monitoring-basics.md#using-apply-for) +which expects a dictionary value for `config`, but the host custom variable only +provides a string value: + +``` +object Host "script-debugger-host" { + check_command = "icinga" + + vars.http_vhosts["example.org"] = "192.168.1.100" // a string value +} + +apply Service for (http_vhost => config in host.vars.http_vhosts) { + import "generic-service" + + vars += config // expects a dictionary + + check_command = "http" +} +``` + +The error message on config validation will warn about the wrong value type, +but does not provide any context which objects are affected. + +Enable the script debugger and run the config validation: + +``` +# icinga2 daemon -C -X + +Breakpoint encountered in /etc/icinga2/conf.d/services.conf: 59:67-65:1 +Exception: Error: Error while evaluating expression: Cannot convert value of type 'String' to an object. +Location: +/etc/icinga2/conf.d/services.conf(62): check_command = "http" +/etc/icinga2/conf.d/services.conf(63): +/etc/icinga2/conf.d/services.conf(64): vars += config + ^^^^^^^^^^^^^^ +/etc/icinga2/conf.d/services.conf(65): } +/etc/icinga2/conf.d/services.conf(66): +You can inspect expressions (such as variables) by entering them at the prompt. +To leave the debugger and continue the program use "$continue". +<1> => +``` + +You can print the variables `vars` and `config` to get an idea about +their values: + +``` +<1> => vars +null +<2> => config +"192.168.1.100" +<3> => +``` + +The `vars` attribute has to be a dictionary. Trying to set this attribute to a string caused +the error in our configuration example. + +In order to determine the name of the host where the value of the `config` variable came from +you can inspect attributes of the service object: + +``` +<3> => host_name +"script-debugger-host-01" +<4> => name +"http" +``` + +Additionally you can view the service object attributes by printing the value of `this`. + +## Using Breakpoints + +In order to halt execution in a script you can use the `debugger` keyword: + +``` +object Host "script-debugger-host-02" { + check_command = "dummy" + check_interval = 5s + + vars.dummy_text = {{ + var text = "Hello from " + macro("$name$") + debugger + return text + }} +} +``` + +Icinga 2 will spawn a debugger console every time the function is executed: + +``` +# icinga2 daemon -X +... +Breakpoint encountered in /etc/icinga2/tests/script-debugger.conf: 7:5-7:12 +You can inspect expressions (such as variables) by entering them at the prompt. +To leave the debugger and continue the program use "$continue". +<1> => text +"Hello from script-debugger-host-02" +<2> => $continue +``` + +## Debugging API Filters + +Queries against the [Icinga 2 REST API](12-icinga2-api.md#icinga2-api) can use +filters, just like available in `assign where` expressions. If these filters cause +an internal error, they return an empty result to the caller. + +In order to analyse these server-side errors, you can use the script debugger. + +The following example tries filter for all host objects where the custom variable +`os` is set. There are various possibilities to check that, one of them would be +`host.vars.os != ""`. Another idea is to use the [contains](18-library-reference.md#dictionary-contains) method on the custom +attribute dictionary like this: `host.vars.contains("os")`. + +```bash +curl -k -s -u root:icinga -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' \ + -X POST 'https://localhost:5665/v1/objects/services' \ + -d '{ "filter": "host.vars.contains(\"os\")", "attrs": [ "__name" ], "joins": [ "host.name", "host.vars" ], "pretty": true }' +``` + +This will fail on all hosts which don't have any custom variable specified. + +``` +# icinga2 daemon -X + +Breakpoint encountered. +Exception: Error: Argument is not a callable object. +Location: in : 1:0-1:23 +You can inspect expressions (such as variables) by entering them at the prompt. +To leave the debugger and continue the program use "$continue". + +<1> => this.host + +... + + vars = null + +<2> => $continue +``` + +By definition, a type method can only be invoked on an actual object. + +In order to stay safe, add more checks to the API filter: + +- `host.vars && host.vars.contains("os")` or +- `host.vars && typeof(host.vars) == Dictionary && host.vars.contains("os")` + +Example: + +```bash +curl -k -s -u root:icinga -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' \ + -X POST 'https://localhost:5665/v1/objects/services' \ + -d '{ "filter": "host.vars && typeof(host.vars) == Dictionary && host.vars.contains(\"os\")", "attrs": [ "__name" ], "joins": [ "host.name", "host.vars" ], "pretty": true }' +``` diff --git a/doc/21-development.md b/doc/21-development.md new file mode 100644 index 0000000..01c8620 --- /dev/null +++ b/doc/21-development.md @@ -0,0 +1,2698 @@ +# Development + +This chapter provides hints on Icinga 2 debugging, +development, package builds and tests. + +* [Debug Icinga 2](21-development.md#development-debug) + * [GDB Backtrace](21-development.md#development-debug-gdb-backtrace) + * [Core Dump](21-development.md#development-debug-core-dump) +* [Test Icinga 2](21-development.md#development-tests) + * [Snapshot Packages (Nightly Builds)](21-development.md#development-tests-snapshot-packages) +* [Develop Icinga 2](21-development.md#development-develop) + * [Preparations](21-development.md#development-develop-prepare) + * [Design Patterns](21-development.md#development-develop-design-patterns) + * [Build Tools](21-development.md#development-develop-builds-tools) + * [Unit Tests](21-development.md#development-develop-tests) + * [Style Guide](21-development.md#development-develop-styleguide) +* [Development Environment](21-development.md#development-environment) + * [Linux Dev Environment](21-development.md#development-linux-dev-env) + * [macOS Dev Environment](21-development.md#development-macos-dev-env) + * [Windows Dev Environment](21-development.md#development-windows-dev-env) +* [Package Builds](21-development.md#development-package-builds) + * [RPM](21-development.md#development-package-builds-rpms) + * [DEB](21-development.md#development-package-builds-deb) + * [Windows](21-development.md#development-package-builds-windows) +* [Continuous Integration](21-development.md#development-ci) +* [Advanced Tips](21-development.md#development-advanced) + + + +## Debug Icinga 2 + +This chapter targets all users who have been asked by developers to provide +a stack trace or coredump if the application crashed. It is also useful +for developers working with different debuggers. + +> **Note:** +> +> This is intentionally mentioned before any development insights +> as debugging is a more frequent and commonly asked question. + +### Debug Requirements + +Make sure that the debug symbols are available for Icinga 2. +The Icinga 2 packages provide a debug package which must be +installed separately for all involved binaries, like `icinga2-bin` +or `icinga2-ido-mysql`. + +Distribution | Command +-------------------|------------------------------------------ +Debian/Ubuntu | `apt-get install icinga2-dbg` +RHEL/CentOS | `yum install icinga2-debuginfo` +Fedora | `dnf install icinga2-debuginfo icinga2-bin-debuginfo icinga2-ido-mysql-debuginfo` +SLES/openSUSE | `zypper install icinga2-bin-debuginfo icinga2-ido-mysql-debuginfo` + +Furthermore, you may also have to install debug symbols for Boost and your C++ library. + +If you're building your own binaries, you should use the `-DCMAKE_BUILD_TYPE=Debug` cmake +build flag for debug builds. + + +### GDB as Debugger + +Install GDB in your development environment. + +Distribution | Command +-------------------|------------------------------------------ +Debian/Ubuntu | `apt-get install gdb` +RHEL/CentOS | `yum install gdb` +Fedora | `dnf install gdb` +SLES/openSUSE | `zypper install gdb` + +#### GDB Run + +Run the icinga2 binary `/usr/lib{,64}/icinga2/sbin/icinga2` with gdb, `/usr/bin/icinga2` is a shell wrapper. + +``` +gdb --args /usr/lib/icinga2/sbin/icinga2 daemon + +(gdb) set follow-fork-mode child +``` + +When gdb halts on SIGUSR2, press `c` to continue. This signal originates from the umbrella +process and can safely be ignored. + + +> **Note** +> +> Since v2.11 we would attach to the umbrella process spawned with `/usr/lib/icinga2/sbin/icinga2`, +> therefore rather attach to a running process. +> +```bash +# Typically the order of PIDs is: 1) umbrella 2) spawn helper 3) main process +pidof icinga2 + +gdb -p $(pidof icinga2 | cut -d ' ' -f3) +``` + +> **Note** +> +> If gdb tells you it's missing debug symbols, quit gdb and install +> them: `Missing separate debuginfos, use: debuginfo-install ...` + +Run/restart the application. + +``` +(gdb) r +``` + +Kill the running application. + +``` +(gdb) k +``` + +Continue after breakpoint. + +``` +(gdb) c +``` + +#### GDB Core Dump + +Either attach to the running process using `gdb -p PID` or start +a new gdb run. + +``` +(gdb) r +(gdb) generate-core-file +``` + +#### GDB Backtrace + +If Icinga 2 aborted its operation abnormally, generate a backtrace. + +> **Note** +> +> Please install the [required debug symbols](21-development.md#debug-requirements) +> prior to generating a backtrace. + +`thread apply all` is important here since this includes all running threads. +We need this information when e.g. debugging dead locks and hanging features. + +``` +(gdb) bt +(gdb) thread apply all bt full +``` + +If gdb stops at a SIGPIPE signal please disable the signal before +running Icinga 2. This isn't an error, but we need to workaround it. + +``` +(gdb) handle SIGPIPE nostop noprint pass +(gdb) r +``` + +If you create a [new issue](https://github.com/Icinga/icinga2/issues), +make sure to attach as much detail as possible. + +#### GDB Backtrace from Running Process + +If Icinga 2 is still running, generate a full backtrace from the running +process and store it into a new file (e.g. for debugging dead locks). + +> **Note** +> +> Please install the [required debug symbols](21-development.md#debug-requirements) +> prior to generating a backtrace. + +Icinga 2 runs with 2 processes: main and command executor, therefore generate two backtrace logs +and add them to the GitHub issue. + +```bash +for pid in $(pidof icinga2); do gdb -p $pid -batch -ex "thread apply all bt full" -ex "detach" -ex "q" > gdb_bt_${pid}_`date +%s`.log; done +``` + +#### GDB Thread List from Running Process + +Instead of a full backtrace, you sometimes just need a list of running threads. + +```bash +for pid in $(pidof icinga2); do gdb -p $pid -batch -ex "info threads" -ex "detach" -ex "q" > gdb_threads_${pid}_`date +%s`.log; done +``` + +#### GDB Backtrace Stepping + +Identifying the problem may require stepping into the backtrace, analysing +the current scope, attributes, and possible unmet requirements. `p` prints +the value of the selected variable or function call result. + +``` +(gdb) up +(gdb) down +(gdb) p checkable +(gdb) p checkable.px->m_Name +``` + +#### GDB Breakpoints + +To set a breakpoint to a specific function call, or file specific line. + +``` +(gdb) b checkable.cpp:125 +(gdb) b icinga::Checkable::SetEnablePerfdata +``` + +GDB will ask about loading the required symbols later, select `yes` instead +of `no`. + +Then run Icinga 2 until it reaches the first breakpoint. Continue with `c` +afterwards. + +``` +(gdb) run +(gdb) c +``` + +In case you want to step into the next line of code, use `n`. If there is a +function call where you want to step into, use `s`. + +``` +(gdb) n + +(gdb) s +``` + +If you want to delete all breakpoints, use `d` and select `yes`. + +``` +(gdb) d +``` + +> **Tip** +> +> When debugging exceptions, set your breakpoint like this: `b __cxa_throw`. + +Breakpoint Example: + +``` +(gdb) b __cxa_throw +(gdb) r +(gdb) up +.... +(gdb) up +#11 0x00007ffff7cbf9ff in icinga::Utility::GlobRecursive(icinga::String const&, icinga::String const&, boost::function const&, int) (path=..., pattern=..., callback=..., type=1) + at /home/michi/coding/icinga/icinga2/lib/base/utility.cpp:609 +609 callback(cpath); +(gdb) l +604 +605 #endif /* _WIN32 */ +606 +607 std::sort(files.begin(), files.end()); +608 BOOST_FOREACH(const String& cpath, files) { +609 callback(cpath); +610 } +611 +612 std::sort(dirs.begin(), dirs.end()); +613 BOOST_FOREACH(const String& cpath, dirs) { +(gdb) p files +$3 = std::vector of length 11, capacity 16 = {{static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/agent.conf"}, {static NPos = 18446744073709551615, + m_Data = "/etc/icinga2/conf.d/commands.conf"}, {static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/downtimes.conf"}, {static NPos = 18446744073709551615, + m_Data = "/etc/icinga2/conf.d/groups.conf"}, {static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/notifications.conf"}, {static NPos = 18446744073709551615, + m_Data = "/etc/icinga2/conf.d/satellite.conf"}, {static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/services.conf"}, {static NPos = 18446744073709551615, + m_Data = "/etc/icinga2/conf.d/templates.conf"}, {static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/test.conf"}, {static NPos = 18446744073709551615, + m_Data = "/etc/icinga2/conf.d/timeperiods.conf"}, {static NPos = 18446744073709551615, m_Data = "/etc/icinga2/conf.d/users.conf"}} +``` + + +### Core Dump + +When the Icinga 2 daemon crashes with a `SIGSEGV` signal +a core dump file should be written. This will help +developers to analyze and fix the problem. + +#### Core Dump File Size Limit + +This requires setting the core dump file size to `unlimited`. + + +##### Systemd + +``` +systemctl edit icinga2.service + +[Service] +... +LimitCORE=infinity + +systemctl daemon-reload + +systemctl restart icinga2 +``` + +##### Init Script + +``` +vim /etc/init.d/icinga2 +... +ulimit -c unlimited + +service icinga2 restart +``` + +##### Verify + +Verify that the Icinga 2 process core file size limit is set to `unlimited`. + +``` +for pid in $(pidof icinga2); do cat /proc/$pid/limits; done + +... +Max core file size unlimited unlimited bytes +``` + + +#### Core Dump Kernel Format + +The Icinga 2 daemon runs with the SUID bit set. Therefore you need +to explicitly enable core dumps for SUID on Linux. + +```bash +sysctl -w fs.suid_dumpable=2 +``` + +Adjust the coredump kernel format and file location on Linux: + +```bash +sysctl -w kernel.core_pattern=/var/lib/cores/core.%e.%p + +install -m 1777 -d /var/lib/cores +``` + +MacOS: + +```bash +sysctl -w kern.corefile=/cores/core.%P + +chmod 777 /cores +``` + +#### Core Dump Analysis + +Once Icinga 2 crashes again a new coredump file will be written. Please +attach this file to your bug report in addition to the general details. + +Simple test case for a `SIGSEGV` simulation with `sleep`: + +``` +ulimit -c unlimited +sleep 1800& +[1] +kill -SEGV +gdb `which sleep` /var/lib/cores/core.sleep. +(gdb) bt +rm /var/lib/cores/core.sleep.* +``` + +Analyzing Icinga 2: + +``` +gdb /usr/lib64/icinga2/sbin/icinga2 core.icinga2. +(gdb) bt +``` + +### LLDB as Debugger + +LLDB is available on macOS with the Xcode command line tools. + +```bash +xcode-select --install +``` + +In order to run Icinga 2 with LLDB you need to pass the binary as argument. +Since v2.11 we would attach to the umbrella process, therefore rather +attach to a running process. + +```bash +# Typically the order of PIDs is: 1) umbrella 2) spawn helper 3) main process +pidof icinga2 + +lldb -p $(pidof icinga2 | cut -d ' ' -f3) +``` + +In case you'll need to attach to the main process immediately, you can delay +the forked child process and attach to the printed PID. + +``` +$ icinga2 daemon -DInternal.DebugWorkerDelay=120 +Closed FD 6 which we inherited from our parent process. +[2020-01-29 12:22:33 +0100] information/cli: Icinga application loader (version: v2.11.0-477-gfe8701d77; debug) +[2020-01-29 12:22:33 +0100] information/RunWorker: DEBUG: Current PID: 85253. Sleeping for 120 seconds to allow lldb/gdb -p attachment. +``` + +```bash +lldb -p 85253 +``` + +When lldb halts on SIGUSR2, press `c` to continue. This signal originates from the umbrella +process and can safely be ignored. + + +Breakpoint: + +``` +> b checkable.cpp:57 +> b icinga::Checkable::ProcessCheckResult +``` + +Full backtrace: + +``` +> bt all +``` + +Select thread: + +``` +> thr sel 5 +``` + +Step into: + +``` +> s +``` + +Next step: + +``` +> n +``` + +Continue: + +``` +> c +``` + +Up/down in stacktrace: + +``` +> up +> down +``` + + +### Debug on Windows + + +Whenever the application crashes, the Windows error reporting (WER) can be [configured](https://docs.microsoft.com/en-gb/windows/win32/wer/collecting-user-mode-dumps) +to create user-mode dumps. + + +Tail the log file with Powershell: + +``` +Get-Content .\icinga2.log -tail 10 -wait +``` + + +#### Debug on Windows: Dependencies + +Similar to `ldd` or `nm` on Linux/Unix. + +Extract the dependent DLLs from a binary with Visual Studio's `dumpbin` tool +in Powershell: + +``` +C:> &'C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.22.27905\bin\Hostx64\x64\dumpbin.exe' /dependents .\debug\Bin\Debug\Debug\boosttest-test-base.exe +DEBUG: 1+ >>>> &'C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.22.27905\bin\Hostx64\x64\dumpbin.exe' /dependents .\debug\Bin\Debug\Debug\boosttest-test-base.exe +Microsoft (R) COFF/PE Dumper Version 14.22.27905.0 +Copyright (C) Microsoft Corporation. All rights reserved. + + +Dump of file .\debug\Bin\Debug\Debug\boosttest-test-base.exe + +File Type: EXECUTABLE IMAGE + + Image has the following dependencies: + + boost_coroutine-vc142-mt-gd-x64-1_80.dll + boost_date_time-vc142-mt-gd-x64-1_80.dll + boost_filesystem-vc142-mt-gd-x64-1_80.dll + boost_thread-vc142-mt-gd-x64-1_80.dll + boost_regex-vc142-mt-gd-x64-1_80.dll + libssl-1_1-x64.dll + libcrypto-1_1-x64.dll + WS2_32.dll + dbghelp.dll + SHLWAPI.dll + msi.dll + boost_unit_test_framework-vc142-mt-gd-x64-1_80.dll + KERNEL32.dll + SHELL32.dll + ADVAPI32.dll + MSVCP140D.dll + MSWSOCK.dll + bcrypt.dll + VCRUNTIME140D.dll + ucrtbased.dll + + Summary + + 1000 .00cfg + 68000 .data + B000 .idata + 148000 .pdata + 69C000 .rdata + 25000 .reloc + 1000 .rsrc + E7A000 .text + 1000 .tls +``` + + +## Test Icinga 2 + +### Snapshot Packages (Nightly Builds) + +Icinga provides snapshot packages as nightly builds from [Git master](https://github.com/icinga/icinga2). + +These packages contain development code which should be considered "work in progress". +While developers ensure that tests are running fine with CI actions on PRs, +things might break, or changes are not yet documented in the changelog. + +You can help the developers and test the snapshot packages, e.g. when larger +changes or rewrites are taking place for a new major version. Your feedback +is very much appreciated. + +Snapshot packages are available for all supported platforms including +Linux and Windows and can be obtained from [https://packages.icinga.com](https://packages.icinga.com). + +The [Vagrant boxes](https://github.com/Icinga/icinga-vagrant) also use +the Icinga snapshot packages to allow easier integration tests. It is also +possible to use Docker with base OS images and installing the snapshot +packages. + +If you encounter a problem, please [open a new issue](https://github.com/Icinga/icinga2/issues/new/choose) +on GitHub and mention that you're testing the snapshot packages. + +#### RHEL/CentOS + +2.11+ requires the EPEL repository for Boost 1.66+. + +In addition to that, the `icinga-rpm-release` package already provides the `icinga-snapshot-builds` +repository but it is disabled by default. + +```bash +yum -y install https://packages.icinga.com/epel/icinga-rpm-release-7-latest.noarch.rpm +yum -y install epel-release +yum makecache + +yum install --enablerepo=icinga-snapshot-builds icinga2 +``` + +#### Debian + +2.11+ requires Boost 1.66+ which either is provided by the OS, backports or Icinga stable repositories. +It is advised to configure both Icinga repositories, stable and snapshot and selectively +choose the repository with the `-t` flag on `apt-get install`. + +```bash +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb https://packages.icinga.com/debian icinga-${DIST} main" > \ + /etc/apt/sources.list.d/${DIST}-icinga.list + echo "deb-src https://packages.icinga.com/debian icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga.list + +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb http://packages.icinga.com/debian icinga-${DIST}-snapshots main" > \ + /etc/apt/sources.list.d/${DIST}-icinga-snapshots.list + echo "deb-src http://packages.icinga.com/debian icinga-${DIST}-snapshots main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga-snapshots.list + +apt-get update +``` + +On Debian Stretch, you'll also need to add Debian Backports. + +```bash +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ + echo "deb https://deb.debian.org/debian ${DIST}-backports main" > \ + /etc/apt/sources.list.d/${DIST}-backports.list + +apt-get update +``` + +Then install the snapshot packages. + +```bash +DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release); \ +apt-get install -t icinga-${DIST}-snapshots icinga2 +``` + +#### Ubuntu + +```bash +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +. /etc/os-release; if [ ! -z ${UBUNTU_CODENAME+x} ]; then DIST="${UBUNTU_CODENAME}"; else DIST="$(lsb_release -c| awk '{print $2}')"; fi; \ + echo "deb https://packages.icinga.com/ubuntu icinga-${DIST} main" > \ + /etc/apt/sources.list.d/${DIST}-icinga.list + echo "deb-src https://packages.icinga.com/ubuntu icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga.list + +. /etc/os-release; if [ ! -z ${UBUNTU_CODENAME+x} ]; then DIST="${UBUNTU_CODENAME}"; else DIST="$(lsb_release -c| awk '{print $2}')"; fi; \ + echo "deb https://packages.icinga.com/ubuntu icinga-${DIST}-snapshots main" > \ + /etc/apt/sources.list.d/${DIST}-icinga-snapshots.list + echo "deb-src https://packages.icinga.com/ubuntu icinga-${DIST}-snapshots main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga-snapshots.list + +apt-get update +``` + +Then install the snapshot packages. + +```bash +. /etc/os-release; if [ ! -z ${UBUNTU_CODENAME+x} ]; then DIST="${UBUNTU_CODENAME}"; else DIST="$(lsb_release -c| awk '{print $2}')"; fi; \ +apt-get install -t icinga-${DIST}-snapshots icinga2 +``` + +#### SLES + +The required Boost packages are provided with the stable release repository. + +```bash +rpm --import https://packages.icinga.com/icinga.key + +zypper ar https://packages.icinga.com/SUSE/ICINGA-release.repo +zypper ref + +zypper ar https://packages.icinga.com/SUSE/ICINGA-snapshot.repo +zypper ref +``` + +Selectively install the snapshot packages using the `-r` parameter. + +```bash +zypper in -r icinga-snapshot-builds icinga2 +``` + + +### Unit Tests + +Build the binaries and run the tests. + + +```bash +make -j4 -C debug +make test -C debug +``` + +Run a specific boost test: + +```bash +debug/Bin/Debug/boosttest-test-base --run_test=remote_url +``` + + + +## Develop Icinga 2 + +Icinga 2 can be built on many platforms such as Linux, Unix and Windows. +There are limitations in terms of support, e.g. Windows is only supported for agents, +not a full-featured master or satellite. + +Before you start with actual development, there is a couple of pre-requisites. + +### Preparations + +#### Choose your Editor + +Icinga 2 can be developed with your favorite editor. Icinga developers prefer +these tools: + +- vim +- CLion (macOS, Linux) +- MS Visual Studio (Windows) +- Atom + +Editors differ on the functionality. The more helpers you get for C++ development, +the faster your development workflow will be. + +#### Get to know the architecture + +Icinga 2 can run standalone or in distributed environments. It contains a whole lot +more than a simple check execution engine. + +Read more about it in the [Technical Concepts](19-technical-concepts.md#technical-concepts) chapter. + +#### Get to know the code + +First off, you really need to know C++ and portions of C++11 and the boost libraries. +Best is to start with a book or online tutorial to get into the basics. +Icinga developers gained their knowledge through studies, training and self-teaching +code by trying it out and asking senior developers for guidance. + +Here's a few books we can recommend: + +* [Accelerated C++: Practical Programming by Example](https://www.amazon.com/Accelerated-C-Practical-Programming-Example/dp/020170353X) (Andrew Koenig, Barbara E. Moo) +* [Effective C++](https://www.amazon.com/Effective-Specific-Improve-Programs-Designs/dp/0321334876) (Scott Meyers) +* [Boost C++ Application Development Cookbook - Second Edition: Recipes to simplify your application development](https://www.amazon.com/dp/1787282244/ref=cm_sw_em_r_mt_dp_U_dN1OCbERS00EQ) (Antony Polukhin) +* [Der C++ Programmierer](https://www.amazon.de/Programmierer-lernen-Professionell-anwenden-L%C3%B6sungen/dp/3446416447), German (Ulrich Breymann) +* [C++11 programmieren](https://www.amazon.de/gp/product/3836217325/), German (Torsten T. Will) + +In addition, it is a good bet to also know SQL when diving into backend development. + +* [SQL Performance Explained](https://www.amazon.de/gp/product/3950307826/) (Markus Winand) + +Last but not least, if you are developing on Windows, get to know the internals about services and the Win32 API. + +### Design Patterns + +Icinga 2 heavily relies on object-oriented programming and encapsulates common +functionality into classes and objects. It also uses modern programming techniques +to e.g. work with shared pointer memory management. + +Icinga 2 consists of libraries bundled into the main binary. Therefore you'll +find many code parts in the `lib/` directory wheras the actual application is +built from `icinga-app/`. Accompanied with Icinga 2, there's the Windows plugins +which are standalone and compiled from `plugins/`. + +Library | Description +---------------|------------------------------------ +base | Objects, values, types, streams, tockets, TLS, utilities, etc. +config | Configuration compiler, expressions, etc. +cli | CLI (sub) commands and helpers. +icinga | Icinga specific objects and event handling. +remote | Cluster and HTTP client/server and REST API related code. +checker | Checker feature, check scheduler. +notification | Notification feature, notification scheduler. +methods | Command execution methods, plugins and built-in checks. +perfdata | Performance data related, including Graphite, Elastic, etc. +db\_ido | IDO database abstraction layer. +db\_ido\_mysql | IDO database driver for MySQL. +db\_ido\_pgsql | IDO database driver for PgSQL. +mysql\_shin | Library stub for linking against the MySQL client libraries. +pgsql\_shim | Library stub for linking against the PgSQL client libraries. + +#### Class Compiler + +Another thing you will recognize are the `.ti` files which are compiled +by our own class compiler into actual source code. The meta language allows +developers to easily add object attributes and specify their behaviour. + +Some object attributes need to be stored over restarts in the state file +and therefore have the `state` attribute set. Others are treated as `config` +attribute and automatically get configuration validation functions created. +Hidden or read-only REST API attributes are marked with `no_user_view` and +`no_user_modify`. + +The most beneficial thing are getters and setters being generated. The actual object +inherits from `ObjectImpl` and therefore gets them "for free". + +Example: + +``` +vim lib/perfdata/gelfwriter.ti + + [config] enable_tls; + +vim lib/perfdata/gelfwriter.cpp + + if (GetEnableTls()) { +``` + +The logic is hidden in `tools/mkclass/` in case you want to learn more about it. +The first steps during CMake & make also tell you about code generation. + +### Build Tools + +#### CMake + +In its early development stages in 2012, Icinga 2 was built with autoconf/automake +and separate Windows project files. We've found this very fragile, and have changed +this into CMake as our build tool. + +The most common benefits: + +* Everything is described in CMakeLists.txt in each directory +* CMake only needs to know that a sub directory needs to be included. +* The global CMakeLists.txt acts as main entry point for requirement checks and library/header includes. +* Separate binary build directories, the actual source tree stays clean. +* CMake automatically generates a Visual Studio project file `icinga2.sln` on Windows. + +#### Unity Builds + +Another thing you should be aware of: Unity builds on and off. + +Typically, we already use caching mechanisms to reduce recompile time with ccache. +For release builds, there's always a new build needed as the difference is huge compared +to a previous (major) release. + +Therefore we've invented the Unity builds, which basically concatenates all source files +into one big library source code file. The compiler then doesn't need to load the many small +files but compiles and links this huge one. + +Unity builds require more memory which is why you should disable them for development +builds in small sized VMs (Linux, Windows) and also Docker containers. + +There's a couple of header files which are included everywhere. If you touch/edit them, +the cache is invalidated and you need to recompile a lot more files then. `base/utility.hpp` +and `remote/zone.hpp` are good candidates for this. + +### Unit Tests + +New functions and classes must implement new unit tests. Whenever +you decide to add new functions, ensure that you don't need a complex +mock or runtime attributes in order to test them. Better isolate +code into function interfaces which can be invoked in the Boost tests +framework. + +Look into the existing tests in the [test/](https://github.com/Icinga/icinga2/tree/master/test) directory +and adopt new test cases. + +Specific tests require special time windows, they are only +enabled in debug builds for developers. This is the case e.g. +for testing the flapping algorithm with expected state change +detection at a specific point from now. + + +### Style Guide + +Overview of project files: + +File Type | File Name/Extension | Description +---------------|---------------------|----------------------------- +Header | .hpp | Classes, enums, typedefs inside the icinga Namespace. +Source | .cpp | Method implementation for class functions, static/global variables. +CMake | CMakeLists.txt | Build configuration, source and header file references. +CMake Source | .cmake | Source/Header files generated from CMake placeholders. +ITL/conf.d | .conf | Template library and example files as configuration +Class Compiler | .ti | Object classes in our own language, generates source code as `-ti.{c,h}pp`. +Lexer/Parser | .ll, .yy | Flex/Bison code generated into source code from CMake builds. +Docs | .md | Markdown docs and READMEs. + +Anything else are additional tools and scripts for developers and build systems. + +All files must include the copyright header. We don't use the +current year as this implies yearly updates we don't want. + +Depending on the file type, this must be a comment. + +```cpp +/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */ +``` + +```bash +# Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ +``` + +#### Code Formatting + +**Tabs instead of spaces.** Inside Visual Studio, choose to keep tabs instead of +spaces. Tabs should use 4 spaces indent by default, depending on your likings. + +We follow the clang format, with some exceptions. + +- Curly braces for functions and classes always start at a new line. + +```cpp +String ConfigObjectUtility::EscapeName(const String& name) +{ +//... +} + +String ConfigObjectUtility::CreateObjectConfig(const Type::Ptr& type, const String& fullName, + bool ignoreOnError, const Array::Ptr& templates, const Dictionary::Ptr& attrs) +{ +//... +} +``` + +- Too long lines break at a parameter, the new line needs a tab indent. + +```cpp + static String CreateObjectConfig(const Type::Ptr& type, const String& fullName, + bool ignoreOnError, const Array::Ptr& templates, const Dictionary::Ptr& attrs); +``` + +- Conditions require curly braces if it is not a single if with just one line. + + +```cpp + if (s == "OK") { + //... + } else { + //... + } + + if (!n) + return; +``` + +- There's a space between `if` and the opening brace `(`. Also after the closing brace `)` and opening curly brace `{`. +- Negation with `!` doesn't need an extra space. +- Else branches always start in the same line after the closing curly brace. + + +#### Code Comments + +Add comments wherever you think that another developer will have a hard +time to understand the complex algorithm. Or you might have forgotten +it in a year and struggle again. Also use comments to highlight specific +stages in a function. Generally speaking, make things easier for the +team and external contributors. + +Comments can also be used to mark additional references and TODOs. +If there is a specific GitHub issue or discussion going on, +use that information as a summary and link over to it on purpose. + +- Single line comments may use `//` or `/* ... */` +- Multi line comments must use this format: + +```cpp +/* Ensure to check for XY + * This relies on the fact that ABC has been set before. + */ +``` + +#### Function Docs + +Function header documentation must be added. The current code basis +needs rework, future functions must provide this. + +Editors like CLion or Visual Studio allow you to type `/**` followed +by Enter and generate the skeleton from the implemented function. + +Add a short summary in the first line about the function's purpose. +Edit the param section with short description on their intention. +The `return` value should describe the value type and additional details. + +Example: + +```cpp +/** + * Reads a message from the connected peer. + * + * @param stream ASIO TLS Stream + * @param yc Yield Context for ASIO + * @param maxMessageLength maximum size of bytes read. + * + * @return A JSON string + */ +String JsonRpc::ReadMessage(const std::shared_ptr& stream, boost::asio::yield_context yc, ssize_t maxMessageLength) +``` + +While we can generate code docs from it, the main idea behind it is +to provide on-point docs to fully understand all parameters and the +function's purpose in the same spot. + + +#### Header + +Only include other headers which are mandatory for the header definitions. +If the source file requires additional headers, add them there to avoid +include loops. + +The included header order is important. + +- First, include the library header `i2-.hpp`, e.g. `i2-base.hpp`. +- Second, include all headers from Icinga itself, e.g. `remote/apilistener.hpp`. `base` before `icinga` before `remote`, etc. +- Third, include third-party and external library headers, e.g. openssl and boost. +- Fourth, include STL headers. + +#### Source + +The included header order is important. + +- First, include the header whose methods are implemented. +- Second, include all headers from Icinga itself, e.g. `remote/apilistener.hpp`. `base` before `icinga` before `remote`, etc. +- Third, include third-party and external library headers, e.g. openssl and boost. +- Fourth, include STL headers. + +Always use an empty line after the header include parts. + +#### Namespace + +The icinga namespace is used globally, as otherwise we would need to write `icinga::Utility::FormatDateTime()`. + +```cpp +using namespace icinga; +``` + +Other namespaces must be declared in the scope they are used. Typically +this is inside the function where `boost::asio` and variants would +complicate the code. + +```cpp + namespace ssl = boost::asio::ssl; + + auto context (std::make_shared(ssl::context::sslv23)); +``` + +#### Functions + +Ensure to pass values and pointers as const reference. By default, all +values will be copied into the function scope, and we want to avoid this +wherever possible. + +```cpp +std::vector EventQueue::GetQueuesForType(const String& type) +``` + +C++ only allows to return a single value. This can be abstracted with +returning a specific class object, or with using a map/set. Array and +Dictionary objects increase the memory footprint, use them only where needed. + +A common use case for Icinga value types is where a function can return +different values - an object, an array, a boolean, etc. This happens in the +inner parts of the config compiler expressions, or config validation. + +The function caller is responsible to determine the correct value type +and handle possible errors. + +Specific algorithms may require to populate a list, which can be passed +by reference to the function. The inner function can then append values. +Do not use a global shared resource here, unless this is locked by the caller. + + +#### Conditions and Cases + +Prefer if-else-if-else branches. When integers are involved, +switch-case statements increase readability. Don't forget about `break` though! + +Avoid using ternary operators where possible. Putting a condition +after an assignment complicates reading the source. The compiler +optimizes this anyways. + +Wrong: + +```cpp + int res = s == "OK" ? 0 : s == "WARNING" ? 1; + + return res; +``` + +Better: + +```cpp + int res = 3; + + if (s == "OK") { + res = 0; + } else if (s == "WARNING") { + res = 1; + } +``` + +Even better: Create a lookup map instead of if branches. The complexity +is reduced to O(log(n)). + +```cpp + std::map stateMap = { + { "OK", 1 }, + { "WARNING", 2 } + } + + auto it = stateMap.find(s); + + if (it == stateMap.end()) { + return 3 + } + + return it.second; +``` + +The code is not as short as with a ternary operator, but one can re-use +this design pattern for other generic definitions with e.g. moving the +lookup into a utility class. + +Once a unit test is written, everything works as expected in the future. + +#### Locks and Guards + +Lock access to resources where multiple threads can read and write. +Icinga objects can be locked with the `ObjectLock` class. + +Object locks and guards must be limited to the scope where they are needed. Otherwise we could create dead locks. + +```cpp + { + ObjectLock olock(frame.Locals); + for (const Dictionary::Pair& kv : frame.Locals) { + AddSuggestion(matches, word, kv.first); + } + } +``` + +#### Objects and Pointers + +Use shared pointers for objects. Icinga objects implement the `Ptr` +typedef returning an `intrusive_ptr` for the class object (object.hpp). +This also ensures reference counting for the object's lifetime. + +Use raw pointers with care! + +Some methods and classes require specific shared pointers, especially +when interacting with the Boost library. + +#### Value Types + +Icinga has its own value types. These provide methods to allow +generic serialization into JSON for example, and other type methods +which are made available in the DSL too. + +- Always use `String` instead of `std::string`. If you need a C-string, use the `CStr()` method. +- Avoid casts and rather use the `Convert` class methods. + +```cpp + double s = static_cast(v); //Wrong + + double s = Convert::ToDouble(v); //Correct, ToDouble also provides overloads with different value types +``` + +- Prefer STL containers for internal non-user interfaces. Icinga value types add a small overhead which may decrease performance if e.g. the function is called 100k times. +- `Array::FromVector` and variants implement conversions, use them. + +#### Utilities + +Don't re-invent the wheel. The `Utility` class provides +many helper functions which allow you e.g. to format unix timestamps, +search in filesystem paths. + +Also inspect the Icinga objects, they also provide helper functions +for formatting, splitting strings, joining arrays into strings, etc. + +#### Libraries + +2.11 depends on [Boost 1.66](https://www.boost.org/doc/libs/1_66_0/). +Use the existing libraries and header-only includes +for this specific version. + +Note: Prefer C++11 features where possible, e.g. std::atomic and lambda functions. + +General: + +- [exception](https://www.boost.org/doc/libs/1_66_0/libs/exception/doc/boost-exception.html) (header only) +- [algorithm](https://www.boost.org/doc/libs/1_66_0/libs/algorithm/doc/html/index.html) (header only) +- [lexical_cast](https://www.boost.org/doc/libs/1_66_0/doc/html/boost_lexical_cast.html) (header only) +- [regex](https://www.boost.org/doc/libs/1_66_0/libs/regex/doc/html/index.html) +- [uuid](https://www.boost.org/doc/libs/1_66_0/libs/uuid/doc/uuid.html) (header only) +- [range](https://www.boost.org/doc/libs/1_66_0/libs/range/doc/html/index.html) (header only) +- [variant](https://www.boost.org/doc/libs/1_66_0/doc/html/variant.html) (header only) +- [multi_index](https://www.boost.org/doc/libs/1_66_0/libs/multi_index/doc/index.html) (header only) +- [function_types](https://www.boost.org/doc/libs/1_66_0/libs/function_types/doc/html/index.html) (header only) +- [circular_buffer](https://www.boost.org/doc/libs/1_66_0/doc/html/circular_buffer.html) (header only) +- [math](https://www.boost.org/doc/libs/1_66_0/libs/math/doc/html/index.html) (header only) +- [stacktrace](https://www.boost.org/doc/libs/1_66_0/doc/html/stacktrace.html) (header only) + +Events and Runtime: + +- [system](https://www.boost.org/doc/libs/1_66_0/libs/system/doc/index.html) +- [thread](https://www.boost.org/doc/libs/1_66_0/doc/html/thread.html) +- [signals2](https://www.boost.org/doc/libs/1_66_0/doc/html/signals2.html) (header only) +- [program_options](https://www.boost.org/doc/libs/1_66_0/doc/html/program_options.html) +- [date_time](https://www.boost.org/doc/libs/1_66_0/doc/html/date_time.html) +- [filesystem](https://www.boost.org/doc/libs/1_66_0/libs/filesystem/doc/index.htm) + +Network I/O: + +- [asio](https://www.boost.org/doc/libs/1_66_0/doc/html/boost_asio.html) (header only) +- [beast](https://www.boost.org/doc/libs/1_66_0/libs/beast/doc/html/index.html) (header only) +- [coroutine](https://www.boost.org/doc/libs/1_66_0/libs/coroutine/doc/html/index.html) +- [context](https://www.boost.org/doc/libs/1_66_0/libs/context/doc/html/index.html) + +Consider abstracting their usage into `*utility.{c,h}pp` files with +wrapping existing Icinga types. That also allows later changes without +rewriting large code parts. + +> **Note** +> +> A new Boost library should be explained in a PR and discussed with the team. +> +> This requires package dependency changes. + +If you consider an external library or code to be included with Icinga, the following +requirements must be fulfilled: + +- License is compatible with GPLv2+. Boost license, MIT works, Apache is not. +- C++11 is supported, C++14 or later doesn't work +- Header only implementations are preferred, external libraries require packages on every distribution. +- No additional frameworks, Boost is the only allowed. +- The code is proven to be robust and the GitHub repository is alive, or has 1k+ stars. Good libraries also provide a user list, if e.g. Ceph is using it, this is a good candidate. + + +#### Log + +Icinga allows the user to configure logging backends, e.g. syslog or file. + +Any log message inside the code must use the `Log()` function. + +- The first parameter is the severity level, use them with care. +- The second parameter defines the location/scope where the log +happened. Typically we use the class name here, to better analyse +the logs the user provide in GitHub issues and on the community +channels. +- The third parameter takes a log message string + +If the message string needs to be computed from existing values, +everything must be converted to the String type beforehand. +This conversion for every value is very expensive which is why +we try to avoid it. + +Instead, use Log() with the shift operator where everything is written +on the stream and conversions are explicitly done with templates +in the background. + +The trick here is that the Log object is destroyed immediately +after being constructed once. The destructor actually +evaluates the values and sends it to registers loggers. + +Since flushing the stream every time a log entry occurs is +very expensive, a timer takes care of flushing the stream +every second. + +> **Tip** +> +> If logging stopped, the flush timer thread may be dead. +> Inspect that with gdb/lldb. + +Avoid log messages which could irritate the user. During +implementation, developers can change log levels to better +see what's going one, but remember to change this back to `debug` +or remove it entirely. + + +#### Goto + +Avoid using `goto` statements. There are rare occasions where +they are allowed: + +- The code would become overly complicated within nested loops and conditions. +- Event processing and C interfaces. +- Question/Answer loops within interactive CLI commands. + +#### Typedef and Auto Keywords + +Typedefs allow developers to use shorter names for specific types, +classes and structs. + +```cpp + typedef std::map >::iterator Iterator; +``` + +These typedefs should be part of the Class definition in the header, +or may be defined in the source scope where they are needed. + +Avoid declaring global typedefs, unless necessary. + +Using the `auto` keyword allows to ignore a specific value type. +This comes in handy with maps/sets where no specific access +is required. + +The following example iterates over a map returned from `GetTypes()`. + +```cpp + for (const auto& kv : GetTypes()) { + result.insert(kv.second); + } +``` + +The long example would require us to define a map iterator, and a slightly +different algorithm. + +```cpp + typedef std::map TypeMap; + typedef std::map::const_iterator TypeMapIterator; + + TypeMap types = GetTypes(); + + for (TypeMapIterator it = types.begin(); it != types.end(); it++) { + result.insert(it.second); + } +``` + +We could also use a pair here, but requiring to know +the specific types of the map keys and values. + +```cpp + typedef std::pair kv_pair; + + for (const kv_pair& kv : GetTypes()) { + result.insert(kv.second); + } +``` + +After all, `auto` shortens the code and one does not always need to know +about the specific types. Function documentation for `GetTypes()` is +required though. + + + +#### Whitespace Cleanup + +Patches must be cleaned up and follow the indent style (tabs instead of spaces). +You should also remove any trailing whitespaces. + +`git diff` allows to highlight such. + +``` +vim $HOME/.gitconfig + +[color "diff"] + whitespace = red reverse +[core] + whitespace=fix,-indent-with-non-tab,trailing-space,cr-at-eol +``` + +`vim` also can match these and visually alert you to remove them. + +``` +vim $HOME/.vimrc + +highlight ExtraWhitespace ctermbg=red guibg=red +match ExtraWhitespace /\s\+$/ +autocmd BufWinEnter * match ExtraWhitespace /\s\+$/ +autocmd InsertEnter * match ExtraWhitespace /\s\+\%#\@ + +### Linux Dev Environment + +Based on CentOS 7, we have an early draft available inside the Icinga Vagrant boxes: +[centos7-dev](https://github.com/Icinga/icinga-vagrant/tree/master/centos7-dev). + +If you're compiling Icinga 2 natively without any virtualization layer in between, +this usually is faster. This is also the reason why developers on macOS prefer native builds +over Linux or Windows VMs. Don't forget to test the actual code on Linux later! Socket specific +stuff like `epoll` is not available on Unix kernels. + +Depending on your workstation and environment, you may either develop and run locally, +use a container deployment pipeline or put everything in a high end resource remote VM. + +Fork https://github.com/Icinga/icinga2 into your own repository, e.g. `https://github.com/dnsmichi/icinga2`. + +Create two build directories for different binary builds. + +* `debug` contains the debug build binaries. They contain more debug information and run tremendously slower than release builds from packages. Don't use them for benchmarks. +* `release` contains the release build binaries, as you would install them on a live system. This helps comparing specific scenarios for race conditions and more. + +```bash +mkdir -p release debug +``` + +Proceed with the specific distribution examples below. Keep in mind that these instructions +are best effort and sometimes out-of-date. Git Master may contain updates. + +* [CentOS 7](21-development.md#development-linux-dev-env-centos) +* [Debian 10 Buster](21-development.md#development-linux-dev-env-debian) +* [Ubuntu 18 Bionic](21-development.md#development-linux-dev-env-ubuntu) + + +#### CentOS 7 + +```bash +yum -y install gdb vim git bash-completion htop + +yum -y install rpmdevtools ccache \ + cmake make gcc-c++ flex bison \ + openssl-devel boost169-devel systemd-devel \ + mysql-devel postgresql-devel libedit-devel \ + libstdc++-devel + +groupadd icinga +groupadd icingacmd +useradd -c "icinga" -s /sbin/nologin -G icingacmd -g icinga icinga + +ln -s /bin/ccache /usr/local/bin/gcc +ln -s /bin/ccache /usr/local/bin/g++ + +git clone https://github.com/icinga/icinga2.git && cd icinga2 +``` + +The debug build binaries contain specific code which runs +slower but allows for better debugging insights. + +For benchmarks, change `CMAKE_BUILD_TYPE` to `RelWithDebInfo` and +build inside the `release` directory. + +First, off export some generics for Boost. + +```bash +export I2_BOOST="-DBoost_NO_BOOST_CMAKE=TRUE -DBoost_NO_SYSTEM_PATHS=TRUE -DBOOST_LIBRARYDIR=/usr/lib64/boost169 -DBOOST_INCLUDEDIR=/usr/include/boost169 -DBoost_ADDITIONAL_VERSIONS='1.69;1.69.0'" +``` + +Second, add the prefix path to it. + +```bash +export I2_GENERIC="$I2_BOOST -DCMAKE_INSTALL_PREFIX=/usr/local/icinga2" +``` + +Third, define the two build types with their specific CMake variables. + +```bash +export I2_DEBUG="-DCMAKE_BUILD_TYPE=Debug -DICINGA2_UNITY_BUILD=OFF $I2_GENERIC" +export I2_RELEASE="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DICINGA2_WITH_TESTS=ON -DICINGA2_UNITY_BUILD=ON $I2_GENERIC" +``` + +Fourth, depending on your likings, you may add a bash alias for building, +or invoke the commands inside: + +```bash +alias i2_debug="cd /root/icinga2; mkdir -p debug; cd debug; cmake $I2_DEBUG ..; make -j2; sudo make -j2 install; cd .." +alias i2_release="cd /root/icinga2; mkdir -p release; cd release; cmake $I2_RELEASE ..; make -j2; sudo make -j2 install; cd .." +``` + +This is taken from the [centos7-dev](https://github.com/Icinga/icinga-vagrant/tree/master/centos7-dev) Vagrant box. + + +The source installation doesn't set proper permissions, this is +handled in the package builds which are officially supported. + +```bash +chown -R icinga:icinga /usr/local/icinga2/var/ + +/usr/local/icinga2/lib/icinga2/prepare-dirs /usr/local/icinga2/etc/sysconfig/icinga2 +/usr/local/icinga2/sbin/icinga2 api setup +vim /usr/local/icinga2/etc/icinga2/conf.d/api-users.conf + +/usr/local/icinga2/lib/icinga2/sbin/icinga2 daemon +``` + +#### Debian 10 + +Debian Buster doesn't need updated Boost packages from packages.icinga.com, +the distribution already provides 1.66+. For older versions such as Stretch, +include the release repository for packages.icinga.com as shown in the [setup instructions](02-installation.md). + +```bash +docker run -ti debian:buster bash + +apt-get update +apt-get -y install apt-transport-https wget gnupg + +apt-get -y install gdb vim git cmake make ccache build-essential libssl-dev bison flex default-libmysqlclient-dev libpq-dev libedit-dev monitoring-plugins +apt-get -y install libboost-all-dev +``` + +```bash +ln -s /usr/bin/ccache /usr/local/bin/gcc +ln -s /usr/bin/ccache /usr/local/bin/g++ + +groupadd icinga +groupadd icingacmd +useradd -c "icinga" -s /sbin/nologin -G icingacmd -g icinga icinga + +git clone https://github.com/icinga/icinga2.git && cd icinga2 + +mkdir debug release + +export I2_DEB="-DBoost_NO_BOOST_CMAKE=TRUE -DBoost_NO_SYSTEM_PATHS=TRUE -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu -DBOOST_INCLUDEDIR=/usr/include -DCMAKE_INSTALL_RPATH=/usr/lib/x86_64-linux-gnu" +export I2_GENERIC="-DCMAKE_INSTALL_PREFIX=/usr/local/icinga2 -DICINGA2_PLUGINDIR=/usr/local/sbin" +export I2_DEBUG="$I2_DEB $I2_GENERIC -DCMAKE_BUILD_TYPE=Debug -DICINGA2_UNITY_BUILD=OFF" + +cd debug +cmake .. $I2_DEBUG +cd .. + +make -j2 install -C debug +``` + + +The source installation doesn't set proper permissions, this is +handled in the package builds which are officially supported. + +```bash +chown -R icinga:icinga /usr/local/icinga2/var/ + +/usr/local/icinga2/lib/icinga2/prepare-dirs /usr/local/icinga2/etc/sysconfig/icinga2 +/usr/local/icinga2/sbin/icinga2 api setup +vim /usr/local/icinga2/etc/icinga2/conf.d/api-users.conf + +/usr/local/icinga2/lib/icinga2/sbin/icinga2 daemon +``` + + +#### Ubuntu 18 Bionic + +Requires Boost packages from packages.icinga.com. + +```bash +docker run -ti ubuntu:bionic bash + +apt-get update +apt-get -y install apt-transport-https wget gnupg + +wget -O - https://packages.icinga.com/icinga.key | apt-key add - + +. /etc/os-release; if [ ! -z ${UBUNTU_CODENAME+x} ]; then DIST="${UBUNTU_CODENAME}"; else DIST="$(lsb_release -c| awk '{print $2}')"; fi; \ + echo "deb https://packages.icinga.com/ubuntu icinga-${DIST} main" > \ + /etc/apt/sources.list.d/${DIST}-icinga.list + echo "deb-src https://packages.icinga.com/ubuntu icinga-${DIST} main" >> \ + /etc/apt/sources.list.d/${DIST}-icinga.list + +apt-get update +``` + +```bash +apt-get -y install gdb vim git cmake make ccache build-essential libssl-dev bison flex default-libmysqlclient-dev libpq-dev libedit-dev monitoring-plugins + +apt-get install -y libboost1.67-icinga-all-dev + +ln -s /usr/bin/ccache /usr/local/bin/gcc +ln -s /usr/bin/ccache /usr/local/bin/g++ + +groupadd icinga +groupadd icingacmd +useradd -c "icinga" -s /sbin/nologin -G icingacmd -g icinga icinga + +git clone https://github.com/icinga/icinga2.git && cd icinga2 + +mkdir debug release + +export I2_DEB="-DBoost_NO_BOOST_CMAKE=TRUE -DBoost_NO_SYSTEM_PATHS=TRUE -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu/icinga-boost -DBOOST_INCLUDEDIR=/usr/include/icinga-boost -DCMAKE_INSTALL_RPATH=/usr/lib/x86_64-linux-gnu/icinga-boost" +export I2_GENERIC="-DCMAKE_INSTALL_PREFIX=/usr/local/icinga2 -DICINGA2_PLUGINDIR=/usr/local/sbin" +export I2_DEBUG="$I2_DEB $I2_GENERIC -DCMAKE_BUILD_TYPE=Debug -DICINGA2_UNITY_BUILD=OFF" + +cd debug +cmake .. $I2_DEBUG +cd .. +``` + +```bash +make -j2 install -C debug +``` + +The source installation doesn't set proper permissions, this is +handled in the package builds which are officially supported. + +```bash +chown -R icinga:icinga /usr/local/icinga2/var/ + +/usr/local/icinga2/lib/icinga2/prepare-dirs /usr/local/icinga2/etc/sysconfig/icinga2 +/usr/local/icinga2/sbin/icinga2 api setup +vim /usr/local/icinga2/etc/icinga2/conf.d/api-users.conf + +/usr/local/icinga2/lib/icinga2/sbin/icinga2 daemon +``` + +### macOS Dev Environment + +It is advised to use Homebrew to install required build dependencies. +Macports have been reported to work as well, typically you'll get more help +with Homebrew from Icinga developers. + +The idea is to run Icinga with the current user, avoiding root permissions. +This requires at least v2.11. + +> **Note** +> +> This is a pure development setup for Icinga developers reducing the compile +> time in contrast to VMs. There are no packages, startup scripts or dependency management involved. +> +> **macOS agents are not officially supported.** +> +> macOS uses its own TLS implementation, Icinga relies on extra OpenSSL packages +> requiring updates apart from vendor security updates. + +#### Requirements + +Explicitly use OpenSSL 1.1.x, older versions are out of support. + +```bash +brew install ccache boost cmake bison flex openssl@1.1 mysql-connector-c++ postgresql libpq +``` + +##### ccache + +```bash +sudo mkdir /opt/ccache + +sudo ln -s `which ccache` /opt/ccache/clang +sudo ln -s `which ccache` /opt/ccache/clang++ + +vim $HOME/.bash_profile + +# ccache is managed with symlinks to avoid collision with cgo +export PATH="/opt/ccache:$PATH" + +source $HOME/.bash_profile +``` + +#### Builds + +Icinga is built as release (optimized build for packages) and debug (more symbols and details for debugging). Debug builds +typically run slower than release builds and must not be used for performance benchmarks. + +The preferred installation prefix is `/usr/local/icinga/icinga2`. This allows to put e.g. Icinga Web 2 into the `/usr/local/icinga` directory as well. + +```bash +mkdir -p release debug + +export I2_USER=$(id -u -n) +export I2_GROUP=$(id -g -n) +export I2_GENERIC="-DCMAKE_INSTALL_PREFIX=/usr/local/icinga/icinga2 -DICINGA2_USER=$I2_USER -DICINGA2_GROUP=$I2_GROUP -DOPENSSL_INCLUDE_DIR=/usr/local/opt/openssl@1.1/include -DOPENSSL_SSL_LIBRARY=/usr/local/opt/openssl@1.1/lib/libssl.dylib -DOPENSSL_CRYPTO_LIBRARY=/usr/local/opt/openssl@1.1/lib/libcrypto.dylib -DICINGA2_PLUGINDIR=/usr/local/sbin -DICINGA2_WITH_PGSQL=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +export I2_DEBUG="-DCMAKE_BUILD_TYPE=Debug -DICINGA2_UNITY_BUILD=OFF $I2_GENERIC" +export I2_RELEASE="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DICINGA2_WITH_TESTS=ON -DICINGA2_UNITY_BUILD=ON $I2_GENERIC" + +cd debug +cmake $I2_DEBUG .. +cd .. + +make -j4 -C debug +make -j4 install -C debug +``` + +In order to run Icinga without any path prefix, and also use Bash completion it is advised to source additional +things into the local dev environment. + +```bash +export PATH=/usr/local/icinga/icinga2/sbin/:$PATH + +test -f /usr/local/icinga/icinga2/etc/bash_completion.d/icinga2 && source /usr/local/icinga/icinga2/etc/bash_completion.d/icinga2 +``` + +##### Build Aliases + +This is derived from [dnsmichi's flavour](https://github.com/dnsmichi/dotfiles) and not generally best practice. + +```bash +vim $HOME/.bash_profile + +export I2_USER=$(id -u -n) +export I2_GROUP=$(id -g -n) +export I2_GENERIC="-DCMAKE_INSTALL_PREFIX=/usr/local/icinga/icinga2 -DICINGA2_USER=$I2_USER -DICINGA2_GROUP=$I2_GROUP -DOPENSSL_INCLUDE_DIR=/usr/local/opt/openssl@1.1/include -DOPENSSL_SSL_LIBRARY=/usr/local/opt/openssl@1.1/lib/libssl.dylib -DOPENSSL_CRYPTO_LIBRARY=/usr/local/opt/openssl@1.1/lib/libcrypto.dylib -DICINGA2_PLUGINDIR=/usr/local/sbin -DICINGA2_WITH_PGSQL=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" + +export I2_DEBUG="-DCMAKE_BUILD_TYPE=Debug -DICINGA2_UNITY_BUILD=OFF $I2_GENERIC" +export I2_RELEASE="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DICINGA2_WITH_TESTS=ON -DICINGA2_UNITY_BUILD=ON $I2_GENERIC" + +alias i2_debug="mkdir -p debug; cd debug; cmake $I2_DEBUG ..; make -j4; make -j4 install; cd .." +alias i2_release="mkdir -p release; cd release; cmake $I2_RELEASE ..; make -j4; make -j4 install; cd .." + +export PATH=/usr/local/icinga/icinga2/sbin/:$PATH +test -f /usr/local/icinga/icinga2/etc/bash_completion.d/icinga2 && source /usr/local/icinga/icinga2/etc/bash_completion.d/icinga2 + + +source $HOME/.bash_profile +``` + +#### Permissions + +`make install` doesn't set all required permissions, override this. + +```bash +chown -R $I2_USER:$I2_GROUP /usr/local/icinga/icinga2 +``` + +#### Run + +Start Icinga in foreground. + +```bash +icinga2 daemon +``` + +Reloads triggered with HUP or cluster syncs just put the process into background. + +#### Plugins + +```bash +brew install monitoring-plugins + +sudo vim /usr/local/icinga/icinga2/etc/icinga2/constants.conf +``` + +``` +const PluginDir = "/usr/local/sbin" +``` + +#### Backends: Redis + +```bash +brew install redis +brew services start redis +``` + +#### Databases: MariaDB + +```bash +brew install mariadb +mkdir -p /usr/local/etc/my.cnf.d +brew services start mariadb + +mysql_secure_installation +``` + +``` +vim $HOME/.my.cnf + +[client] +user = root +password = supersecurerootpassword + +sudo -i +ln -s /Users/michi/.my.cnf $HOME/.my.cnf +exit +``` + +```bash +mysql -e 'create database icinga;' +mysql -e "grant all on icinga.* to 'icinga'@'localhost' identified by 'icinga';" +mysql icinga < $HOME/dev/icinga/icinga2/lib/db_ido_mysql/schema/mysql.sql +``` + +#### API + +```bash +icinga2 api setup +cd /usr/local/icinga/icinga2/var/lib/icinga2/certs +HOST_NAME=mbpmif.int.netways.de +icinga2 pki new-cert --cn ${HOST_NAME} --csr ${HOST_NAME}.csr --key ${HOST_NAME}.key +icinga2 pki sign-csr --csr ${HOST_NAME}.csr --cert ${HOST_NAME}.crt +echo "const NodeName = \"${HOST_NAME}\"" >> /usr/local/icinga/icinga2/etc/icinga2/constants.conf +``` + +#### Web + +While it is recommended to use Docker or the Icinga Web 2 development VM pointing to the shared IDO database resource/REST API, you can also install it locally on macOS. + +The required steps are described in [this script](https://github.com/dnsmichi/dotfiles/blob/master/icingaweb2.sh). + + + +### Windows Dev Environment + +The following sections explain how to setup the required build tools +and how to run and debug the code. + +#### TL;DR + +If you're going to setup a dev environment on a fresh Windows machine +and don't care for the details, + +1. ensure there are 35 GB free space on C: +2. run the following in an administrative Powershell: + 1. `Enable-WindowsOptionalFeature -FeatureName "NetFx3" -Online` + (reboot when asked!) + 2. `powershell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-Expression (New-Object Net.WebClient).DownloadString('https://raw.githubusercontent.com/Icinga/icinga2/master/doc/win-dev.ps1')"` + (will take some time) + +This installs everything needed for cloning and building Icinga 2 +on the command line (Powershell) as follows: + +(Don't forget to open a new Powershell window +to be able to use the newly installed Git.) + +``` +git clone https://github.com/Icinga/icinga2.git +cd .\icinga2\ +mkdir build +cd .\build\ + +& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" ` + -DBoost_INCLUDE_DIR=C:\local\boost_1_80_0-Win64 ` + -DBISON_EXECUTABLE=C:\ProgramData\chocolatey\lib\winflexbison3\tools\win_bison.exe ` + -DFLEX_EXECUTABLE=C:\ProgramData\chocolatey\lib\winflexbison3\tools\win_flex.exe ` + -DICINGA2_WITH_MYSQL=OFF -DICINGA2_WITH_PGSQL=OFF .. + +& "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe" .\icinga2.sln +``` + +Building icinga2.sln via Visual Studio itself seems to require a reboot +after installing the build tools and building once via command line. + +#### Chocolatey + +Open an administrative command prompt (Win key, type “cmd”, right-click and “run as administrator”) and paste the following instructions: + +``` +@powershell -NoProfile -ExecutionPolicy Bypass -Command "iex ((new-object net.webclient).DownloadString('https://chocolatey.org/install.ps1'))" && SET PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin +``` + +#### Git, Posh and Vim + +In case you are used to `vim`, start a new administrative Powershell: + +``` +choco install -y vim +``` + +The same applies for Git integration in Powershell: + +``` +choco install -y poshgit +``` + +![Powershell Posh Git](images/development/windows_powershell_posh_git.png) + +In order to fix the colors for commands like `git status` or `git diff`, +edit `$HOME/.gitconfig` in your Powershell and add the following lines: + +``` +vim $HOME/.gitconfig + +[color "status"] + changed = cyan bold + untracked = yellow bold + added = green bold + branch = cyan bold + unmerged = red bold + +[color "diff"] + frag = cyan + new = green bold + commit = yellow + old = red white + +[color "branch"] + current = yellow reverse + local = yellow + remote = green bold + remote = red bold +``` + +#### Visual Studio + +Thanks to Microsoft they’ll now provide their Professional Edition of Visual Studio +as community version, free for use for open source projects such as Icinga. +The installation requires ~9GB disk space. [Download](https://www.visualstudio.com/downloads/) +the web installer and start the installation. + +Note: Only Visual Studio 2019 is covered here. Older versions are not supported. + +You need a free Microsoft account to download and also store your preferences. + +Install the following complete workloads: + +* C++ Desktop Development +* .NET Desktop Development + +In addition also choose these individual components on Visual Studio: + +* .NET + * .NET Framework 4.x targeting packs + * .NET Framework 4.x.y SDKs +* Code tools + * Git for Windows + * GitHub Extension for Visual Studio + * NuGet package manager +* Compilers, build tools and runtimes + * C# and Visual Basic Roslyn compilers + * C++ 2019 Redistributable Update + * C++ CMake tools for Windows + * C++/CLI Support for v142 build tools (14.22) + * MSBuild + * MSVC v142 - VS 2019 C++ x64/x86 build tools (v14.22) +* Debugging and testing + * .NET profiling tools + * C++ profiling tools + * Just-in-Time debugger +* Development activities + * C# and Visual Basic + * C++ core features + * IntelliCode + * Live Share +* Games and Graphics + * Graphics debugger and GPU profiler for DirectX (required by C++ profiling tools) +* SDKs, libraries and frameworks + * Windows 10 SDK (10.0.18362.0 or later) + * Windows Universal C Runtime + +![Visual Studio Installer](images/development/windows_visual_studio_installer_01.png) +![Visual Studio Installer](images/development/windows_visual_studio_installer_02.png) +![Visual Studio Installer](images/development/windows_visual_studio_installer_03.png) + +After a while, Visual Studio will be ready. + +##### Style Guide for Visual Studio + +Navigate into `Tools > Options > Text Editor` and repeat the following for + +- C++ +- C# + +Navigate into `Tabs` and set: + +- Indenting: Smart (default) +- Tab size: 4 +- Indent size: 4 +- Keep tabs (instead of spaces) + +![Visual Studio Tabs](images/development/windows_visual_studio_tabs_c++.png) + + +#### Flex and Bison + +Install it using [chocolatey](https://www.wireshark.org/docs/wsdg_html_chunked/ChSetupWin32.html): + +``` +choco install -y winflexbison +``` + +Chocolatey installs these tools into the hidden directory `C:\ProgramData\chocolatey\lib\winflexbison\tools`. + +#### OpenSSL + +Icinga 2 requires the OpenSSL library. [Download](https://slproweb.com/products/Win32OpenSSL.html) the Win64 package +and install it into `c:\local\OpenSSL-Win64`. + +Once asked for `Copy OpenSSLs DLLs to` select `The Windows system directory`. That way CMake/Visual Studio +will automatically detect them for builds and packaging. + +> **Note** +> +> We cannot use the chocolatey package as this one does not provide any development headers. +> +> Choose 1.1.1 LTS from manual downloads for best compatibility. + +#### Boost + +Icinga needs the development header and library files from the Boost library. + +Visual Studio translates into the following compiler versions: + +- `msvc-14.2` = Visual Studio 2019 + +##### Pre-built Binaries + +Prefer the pre-built package over self-compiling, if the newest version already exists. + +Download the [boost-binaries](https://sourceforge.net/projects/boost/files/boost-binaries/) for + +- msvc-14.2 is Visual Studio 2019 +- 64 for 64 bit builds + +``` +https://sourceforge.net/projects/boost/files/boost-binaries/1.80.0/boost_1_80_0-msvc-14.2-64.exe/download +``` + +Run the installer and leave the default installation path in `C:\local\boost_1_80_0`. + + +##### Source & Compile + +In order to use the boost development header and library files you need to [download](https://www.boost.org/users/download/) +Boost and then extract it to e.g. `C:\local\boost_1_80_0`. + +> **Note** +> +> Just use `C:\local`, the zip file already contains the sub folder. Extraction takes a while, +> the archive contains more than 70k files. + +In order to integrate Boost into Visual Studio, open the `Developer Command Prompt` from the start menu, +and navigate to `C:\local\boost_1_80_0`. + +Execute `bootstrap.bat` first. + +``` +cd C:\local\boost_1_80_0 +bootstrap.bat +``` + +Once finished, specify the required `toolset` to compile boost against Visual Studio. +This takes quite some time in a Windows VM. Boost Context uses Assembler code, +which isn't treated as exception safe by the VS compiler. Therefore set the +additional compilation flag according to [this entry](https://lists.boost.org/Archives/boost/2015/08/224570.php). + +``` +b2 --toolset=msvc-14.2 link=static threading=multi runtime-link=static address-model=64 asmflags=\safeseh +``` + +![Windows Boost Build in VS Development Console](images/development/windows_boost_build_dev_cmd.png) + +#### TortoiseGit + +TortoiseGit provides a graphical integration into the Windows explorer. This makes it easier to checkout, commit +and whatnot. + +[Download](https://tortoisegit.org/download/) TortoiseGit on your system. + +In order to clone via Git SSH you also need to create a new directory called `.ssh` +inside your user's home directory. +Therefore open a command prompt (win key, type `cmd`, enter) and run `mkdir .ssh`. +Add your `id_rsa` private key and `id_rsa.pub` public key files into that directory. + +Start the setup routine and choose `OpenSSH` as default secure transport when asked. + +Open a Windows Explorer window and navigate into + +``` +cd %HOMEPATH%\source\repos +``` + +Right click and select `Git Clone` from the context menu. + +Use `ssh://git@github.com/icinga/icinga2.git` for SSH clones, `https://github.com/icinga/icinga2.git` otherwise. + +#### Packages + +CMake uses CPack and NSIS to create the setup executable including all binaries and libraries +in addition to setup dialogues and configuration. Therefore we’ll need to install [NSIS](http://nsis.sourceforge.net/Download) +first. + +We also need to install the Windows Installer XML (WIX) toolset. This has .NET 3.5 as a dependency which might need a +reboot of the system which is not handled properly by Chocolatey. Therefore install it first and reboot when asked. + +``` +Enable-WindowsOptionalFeature -FeatureName "NetFx3" -Online +choco install -y wixtoolset +``` + +#### CMake + +Icinga 2 uses CMake to manage the build environment. You can generate the Visual Studio project files +using CMake. [Download](https://cmake.org/download/) and install CMake. Select to add it to PATH for all users +when asked. + +> **Note** +> +> In order to properly detect the Boost libraries and VS 2019, install CMake 3.15.2+. +> +> **Tip** +> +> Cheatsheet: https://www.brianlheim.com/2018/04/09/cmake-cheat-sheet.html + +Once setup is completed, open a command prompt and navigate to + +``` +cd %HOMEPATH%\source\repos +``` + +Build Icinga with specific CMake variables. This generates a new Visual Studio project file called `icinga2.sln`. + +Visual Studio translates into the following: + +- `msvc-14.2` = Visual Studio 2019 + +You need to specify the previously installed component paths. + +Variable | Value | Description +----------------------|----------------------------------------------------------------------|------------------------------------------------------- +`BOOST_ROOT` | `C:\local\boost_1_80_0` | Root path where you've extracted and compiled Boost. +`BOOST_LIBRARYDIR` | Binary: `C:\local\boost_1_80_0\lib64-msvc-14.2`, Source: `C:\local\boost_1_80_0\stage` | Path to the static compiled Boost libraries, directory must contain `lib`. +`BISON_EXECUTABLE` | `C:\ProgramData\chocolatey\lib\winflexbison\tools\win_bison.exe` | Path to the Bison executable. +`FLEX_EXECUTABLE` | `C:\ProgramData\chocolatey\lib\winflexbison\tools\win_flex.exe` | Path to the Flex executable. +`ICINGA2_WITH_MYSQL` | OFF | Requires extra setup for MySQL if set to `ON`. Not supported for client setups. +`ICINGA2_WITH_PGSQL` | OFF | Requires extra setup for PgSQL if set to `ON`. Not supported for client setups. +`ICINGA2_UNITY_BUILD` | OFF | Disable unity builds for development environments. + +Tip: If you have previously opened a terminal, run `refreshenv` to re-read updated PATH variables. + +##### Build Scripts + +Icinga provides the build scripts inside the Git repository. + +Open a new Powershell and navigate into the cloned Git repository. Set +specific environment variables and run the build scripts. + +``` +cd %HOMEPATH%\source\repos\icinga2 + +.\tools\win32\configure-dev.ps1 +.\tools\win32\build.ps1 +.\tools\win32\test.ps1 +``` + +The debug MSI package is located in the `debug` directory. + +If you did not follow the above steps with Boost binaries and OpenSSL +paths, you can still modify the environment variables. + +``` +$env:CMAKE_GENERATOR='Visual Studio 16 2019' +$env:CMAKE_GENERATOR_PLATFORM='x64' + +$env:ICINGA2_INSTALLPATH = 'C:\Program Files\Icinga2-debug' +$env:ICINGA2_BUILDPATH='debug' +$env:CMAKE_BUILD_TYPE='Debug' +$env:OPENSSL_ROOT_DIR='C:\OpenSSL-Win64' +$env:BOOST_ROOT='C:\local\boost_1_80_0' +$env:BOOST_LIBRARYDIR='C:\local\boost_1_80_0\lib64-msvc-14.2' +``` + +#### Icinga 2 in Visual Studio + +This requires running the configure script once. + +Navigate to + +``` +cd %HOMEPATH%\source\repos\icinga2\debug +``` + +Open `icinga2.sln`. Log into Visual Studio when asked. + +On the right panel, select to build the `Bin/icinga-app` solution. + +The executable binaries are located in `Bin\Release\Debug` in your `icinga2` +project directory. + +Navigate there and run `icinga2.exe --version`. + +``` +cd %HOMEPATH%\source\repos\icinga2\Bin\Release\Debug +icinga2.exe --version +``` + + +#### Release Package + +This is part of the build process script. Override the build type and pick a different +build directory. + +``` +cd %HOMEPATH%\source\repos\icinga2 + +$env:ICINGA2_BUILDPATH='release' +$env:CMAKE_BUILD_TYPE='RelWithDebInfo' + +.\tools\win32\configure-dev.ps1 +.\tools\win32\build.ps1 +.\tools\win32\test.ps1 +``` + +The release MSI package is located in the `release` directory. + + +### Embedded Dev Env: Pi + +> **Note** +> +> This isn't officially supported yet, just a few hints how you can do it yourself. + +The following examples source from armhf on Raspberry Pi. + +#### ccache + +```bash +apt install -y ccache + +/usr/sbin/update-ccache-symlinks + +echo 'export PATH="/usr/lib/ccache:$PATH"' | tee -a ~/.bashrc + +source ~/.bashrc && echo $PATH +``` + +#### Build + +Copy the icinga2 source code into `$HOME/icinga2`. Clone the `deb-icinga2` repository into `debian/`. + +```bash +git clone https://github.com/Icinga/icinga2 $HOME/icinga2 +git clone https://github.com/Icinga/deb-icinga2 $HOME/icinga2/debian +``` + +Then build a Debian package and install it like normal. + +```bash +dpkg-buildpackage -uc -us +``` + +## Package Builds + +This documentation is explicitly meant for packagers and the Icinga +build infrastructure. + +The following requirements need to be fulfilled in order to build the +Icinga application using a dist tarball (including notes for distributions): + +* cmake >= 2.6 +* GNU make (make) or ninja-build +* C++ compiler which supports C++11 + * RHEL/Fedora/SUSE: gcc-c++ >= 4.7 (extra Developer Tools on RHEL5/6 see below) + * Debian/Ubuntu: build-essential + * Alpine: build-base + * you can also use clang++ +* pkg-config +* OpenSSL library and header files >= 1.0.1 + * RHEL/Fedora: openssl-devel + * SUSE: libopenssl-devel + * Debian/Ubuntu: libssl-dev + * Alpine: libressl-dev +* Boost library and header files >= 1.66.0 + * RHEL/Fedora: boost166-devel + * Debian/Ubuntu: libboost-all-dev + * Alpine: boost-dev +* GNU bison (bison) +* GNU flex (flex) >= 2.5.35 +* systemd headers + * Only required when using systemd + * Debian/Ubuntu: libsystemd-dev + * RHEL/Fedora: systemd-devel + +### Optional features + +* MySQL (disable with CMake variable `ICINGA2_WITH_MYSQL` to `OFF`) + * RHEL/Fedora: mysql-devel + * SUSE: libmysqlclient-devel + * Debian/Ubuntu: default-libmysqlclient-dev | libmysqlclient-dev + * Alpine: mariadb-dev +* PostgreSQL (disable with CMake variable `ICINGA2_WITH_PGSQL` to `OFF`) + * RHEL/Fedora: postgresql-devel + * Debian/Ubuntu: libpq-dev + * postgresql-dev on Alpine +* libedit (CLI console) + * RHEL/Fedora: libedit-devel on CentOS (RHEL requires rhel-7-server-optional-rpms) + * Debian/Ubuntu/Alpine: libedit-dev +* Termcap (only required if libedit doesn't already link against termcap/ncurses) + * RHEL/Fedora: libtermcap-devel + * Debian/Ubuntu: (not necessary) + +### Special requirements + +**FreeBSD**: libexecinfo (automatically used when Icinga 2 is installed via port or package) + +**RHEL6**: Requires a newer boost version which is available on packages.icinga.com +with a version suffixed name. + +### Runtime user environment + +By default Icinga will run as user `icinga` and group `icinga`. Additionally the +external command pipe and livestatus features require a dedicated command group +`icingacmd`. You can choose your own user/group names and pass them to CMake +using the `ICINGA2_USER`, `ICINGA2_GROUP` and `ICINGA2_COMMAND_GROUP` variables. + +```bash +groupadd icinga +groupadd icingacmd +useradd -c "icinga" -s /sbin/nologin -G icingacmd -g icinga icinga +``` + +On Alpine (which uses ash busybox) you can run: + +```bash +addgroup -S icinga +addgroup -S icingacmd +adduser -S -D -H -h /var/spool/icinga2 -s /sbin/nologin -G icinga -g icinga icinga +adduser icinga icingacmd +``` + +Add the web server user to the icingacmd group in order to grant it write +permissions to the external command pipe and livestatus socket: + +```bash +usermod -a -G icingacmd www-data +``` + +Make sure to replace "www-data" with the name of the user your web server +is running as. + +### Building Icinga 2: Example + +Once you have installed all the necessary build requirements you can build +Icinga 2 using the following commands: + +```bash +mkdir release && cd release +cmake .. +cd .. +make -C release +make install -C release +``` + +You can specify an alternative installation prefix using `-DCMAKE_INSTALL_PREFIX`: + +```bash +cmake .. -DCMAKE_INSTALL_PREFIX=/tmp/icinga2 +``` + +### CMake Variables + +In addition to `CMAKE_INSTALL_PREFIX` here are most of the supported Icinga-specific cmake variables. + +For all variables regarding defaults paths on in CMake, see +[GNUInstallDirs](https://cmake.org/cmake/help/latest/module/GNUInstallDirs.html). + +Also see `CMakeLists.txt` for details. + +#### System Environment + +* `CMAKE_INSTALL_SYSCONFDIR`: The configuration directory; defaults to `CMAKE_INSTALL_PREFIX/etc` +* `CMAKE_INSTALL_LOCALSTATEDIR`: The state directory; defaults to `CMAKE_INSTALL_PREFIX/var` +* `ICINGA2_CONFIGDIR`: Main config directory; defaults to `CMAKE_INSTALL_SYSCONFDIR/icinga2` usually `/etc/icinga2` +* `ICINGA2_CACHEDIR`: Directory for cache files; defaults to `CMAKE_INSTALL_LOCALSTATEDIR/cache/icinga2` usually `/var/cache/icinga2` +* `ICINGA2_DATADIR`: Data directory for the daemon; defaults to `CMAKE_INSTALL_LOCALSTATEDIR/lib/icinga2` usually `/var/lib/icinga2` +* `ICINGA2_LOGDIR`: Logfiles of the daemon; defaults to `CMAKE_INSTALL_LOCALSTATEDIR/log/icinga2 usually `/var/log/icinga2` +* `ICINGA2_SPOOLDIR`: Spooling directory ; defaults to `CMAKE_INSTALL_LOCALSTATEDIR/spool/icinga2` usually `/var/spool/icinga2` +* `ICINGA2_INITRUNDIR`: Runtime data for the init system; defaults to `CMAKE_INSTALL_LOCALSTATEDIR/run/icinga2` usually `/run/icinga2` +* `ICINGA2_GIT_VERSION_INFO`: Whether to use Git to determine the version number; defaults to `ON` +* `ICINGA2_USER`: The user Icinga 2 should run as; defaults to `icinga` +* `ICINGA2_GROUP`: The group Icinga 2 should run as; defaults to `icinga` +* `ICINGA2_COMMAND_GROUP`: The command group Icinga 2 should use; defaults to `icingacmd` +* `ICINGA2_SYSCONFIGFILE`: Where to put the config file the initscript/systemd pulls it's dirs from; +* defaults to `CMAKE_INSTALL_PREFIX/etc/sysconfig/icinga2` +* `ICINGA2_PLUGINDIR`: The path for the Monitoring Plugins project binaries; defaults to `/usr/lib/nagios/plugins` + +#### Build Optimization + +* `ICINGA2_UNITY_BUILD`: Whether to perform a unity build; defaults to `ON`. Note: This requires additional memory and is not advised for building VMs, Docker for Mac and embedded hardware. +* `ICINGA2_LTO_BUILD`: Whether to use link time optimization (LTO); defaults to `OFF` + +#### Init System + +* `USE_SYSTEMD=ON|OFF`: Use systemd or a classic SysV initscript; defaults to `OFF` +* `INSTALL_SYSTEMD_SERVICE_AND_INITSCRIPT=ON|OFF` Force install both the systemd service definition file + and the SysV initscript in parallel, regardless of how `USE_SYSTEMD` is set. + Only use this for special packaging purposes and if you know what you are doing. + Defaults to `OFF`. + +#### Features + +* `ICINGA2_WITH_CHECKER`: Determines whether the checker module is built; defaults to `ON` +* `ICINGA2_WITH_COMPAT`: Determines whether the compat module is built; defaults to `ON` +* `ICINGA2_WITH_LIVESTATUS`: Determines whether the Livestatus module is built; defaults to `ON` +* `ICINGA2_WITH_NOTIFICATION`: Determines whether the notification module is built; defaults to `ON` +* `ICINGA2_WITH_PERFDATA`: Determines whether the perfdata module is built; defaults to `ON` +* `ICINGA2_WITH_TESTS`: Determines whether the unit tests are built; defaults to `ON` + +#### MySQL or MariaDB + +The following settings can be tuned for the MySQL / MariaDB IDO feature. + +* `ICINGA2_WITH_MYSQL`: Determines whether the MySQL IDO module is built; defaults to `ON` +* `MYSQL_CLIENT_LIBS`: Client implementation used (mysqlclient / mariadbclient); defaults searches for `mysqlclient` and `mariadbclient` +* `MYSQL_INCLUDE_DIR`: Directory containing include files for the mysqlclient; default empty - + checking multiple paths like `/usr/include/mysql` + +See [FindMySQL.cmake](https://github.com/Icinga/icinga2/blob/master/third-party/cmake/FindMySQL.cmake) +for implementation details. + +#### PostgreSQL + +The following settings can be tuned for the PostgreSQL IDO feature. + +* `ICINGA2_WITH_PGSQL`: Determines whether the PostgreSQL IDO module is built; defaults to `ON` +* `PostgreSQL_INCLUDE_DIR`: Top-level directory containing the PostgreSQL include directories +* `PostgreSQL_LIBRARY`: File path to PostgreSQL library : libpq.so (or libpq.so.[ver] file) + +See [FindPostgreSQL.cmake](https://github.com/Icinga/icinga2/blob/master/third-party/cmake/FindPostgreSQL.cmake) +for implementation details. + +#### Version detection + +CMake determines the Icinga 2 version number using `git describe` if the +source directory is contained in a Git repository. Otherwise the version number +is extracted from the [ICINGA2_VERSION](ICINGA2_VERSION) file. This behavior can be +overridden by creating a file called `icinga-version.h.force` in the source +directory. Alternatively the `-DICINGA2_GIT_VERSION_INFO=OFF` option for CMake +can be used to disable the usage of `git describe`. + + +### Building RPMs + +#### Build Environment on RHEL, CentOS, Fedora, Amazon Linux + +Setup your build environment: + +```bash +yum -y install rpmdevtools +``` + +#### Build Environment on SuSE/SLES + +SLES: + +```bash +zypper addrepo http://download.opensuse.org/repositories/devel:tools/SLE_12_SP4/devel:tools.repo +zypper refresh +zypper install rpmdevtools spectool +``` + +OpenSuSE: + +```bash +zypper addrepo http://download.opensuse.org/repositories/devel:tools/openSUSE_Leap_15.0/devel:tools.repo +zypper refresh +zypper install rpmdevtools spectool +``` + +#### Package Builds + +Prepare the rpmbuild directory tree: + +```bash +cd $HOME +rpmdev-setuptree +``` + +Snapshot builds: + +```bash +curl https://raw.githubusercontent.com/Icinga/rpm-icinga2/master/icinga2.spec -o $HOME/rpmbuild/SPECS/icinga2.spec +``` + +> **Note** +> +> The above command builds snapshot packages. Change to the `release` branch +> for release package builds. + +Copy the tarball to `rpmbuild/SOURCES` e.g. by using the `spectool` binary +provided with `rpmdevtools`: + +```bash +cd $HOME/rpmbuild/SOURCES +spectool -g ../SPECS/icinga2.spec + +cd $HOME/rpmbuild +``` + +Install the build dependencies. Example for CentOS 7: + +```bash +yum -y install libedit-devel ncurses-devel gcc-c++ libstdc++-devel openssl-devel \ +cmake flex bison boost-devel systemd mysql-devel postgresql-devel httpd \ +selinux-policy-devel checkpolicy selinux-policy selinux-policy-doc +``` + +Note: If you are using Amazon Linux, systemd is not required. + +A shorter way is available using the `yum-builddep` command on RHEL based systems: + +```bash +yum-builddep SPECS/icinga2.spec +``` + +Build the RPM: + +```bash +rpmbuild -ba SPECS/icinga2.spec +``` + +#### Additional Hints + +##### SELinux policy module + +The following packages are required to build the SELinux policy module: + +* checkpolicy +* selinux-policy (selinux-policy on CentOS 6, selinux-policy-devel on CentOS 7) +* selinux-policy-doc + +##### RHEL/CentOS 6 + +The RedHat Developer Toolset is required for building Icinga 2 beforehand. +This contains a modern version of flex and a C++ compiler which supports +C++11 features. + +```bash +cat >/etc/yum.repos.d/devtools-2.repo <$HOME/.rpmmacros < + +Setup your build environment on Debian/Ubuntu, copy the 'debian' directory from +the Debian packaging Git repository (https://github.com/Icinga/deb-icinga2) +into your source tree and run the following command: + +```bash +dpkg-buildpackage -uc -us +``` + +### Build Alpine Linux packages + +A simple way to setup a build environment is installing Alpine in a chroot. +In this way, you can set up an Alpine build environment in a chroot under a +different Linux distro. +There is a script that simplifies these steps with just two commands, and +can be found [here](https://github.com/alpinelinux/alpine-chroot-install). + +Once the build environment is installed, you can setup the system to build +the packages by following [this document](https://wiki.alpinelinux.org/wiki/Creating_an_Alpine_package). + +### Build Post Install Tasks + +After building Icinga 2 yourself, your package build system should at least run the following post +install requirements: + +* enable the `checker`, `notification` and `mainlog` feature by default +* run 'icinga2 api setup' in order to enable the `api` feature and generate TLS certificates for the node + +### Run Icinga 2 + +Icinga 2 comes with a binary that takes care of loading all the relevant +components (e.g. for check execution, notifications, etc.): + +``` +icinga2 daemon + +[2016-12-08 16:44:24 +0100] information/cli: Icinga application loader (version: v2.5.4-231-gb10a6b7; debug) +[2016-12-08 16:44:24 +0100] information/cli: Loading configuration file(s). +[2016-12-08 16:44:25 +0100] information/ConfigItem: Committing config item(s). +... +``` + +#### Init Script + +Icinga 2 can be started as a daemon using the provided init script: + +``` +/etc/init.d/icinga2 +Usage: /etc/init.d/icinga2 {start|stop|restart|reload|checkconfig|status} +``` + +#### Systemd + +If your distribution uses systemd: + +``` +systemctl {start|stop|reload|status|enable|disable} icinga2 +``` + +In case the distribution is running systemd >227, you'll also +need to package and install the `etc/initsystem/icinga2.service.limits.conf` +file into `/etc/systemd/system/icinga2.service.d`. + +#### openrc + +Or if your distribution uses openrc (like Alpine): + +``` +rc-service icinga2 +Usage: /etc/init.d/icinga2 {start|stop|restart|reload|checkconfig|status} +``` + +Note: the openrc's init.d is not shipped by default. +A working init.d with openrc can be found here: (https://git.alpinelinux.org/cgit/aports/plain/community/icinga2/icinga2.initd). If you have customized some path, edit the file and adjust it according with your setup. +Those few steps can be followed: + +```bash +wget https://git.alpinelinux.org/cgit/aports/plain/community/icinga2/icinga2.initd +mv icinga2.initd /etc/init.d/icinga2 +chmod +x /etc/init.d/icinga2 +``` + +Icinga 2 reads a single configuration file which is used to specify all +configuration settings (global settings, hosts, services, etc.). The +configuration format is explained in detail in the [doc/](doc/) directory. + +By default `make install` installs example configuration files in +`/usr/local/etc/icinga2` unless you have specified a different prefix or +sysconfdir. + + +### Windows Builds + +The Windows MSI packages are located at https://packages.icinga.com/windows/ + +The build infrastructure is based on GitLab CI and an Ansible provisioned +Windows VM running in OpenStack. + +The runner uses the scripts located in `tools/win32` to configure, build +and test the packages. Uploading them to the package repository is a +separate step. For manual package creation, please refer to [this chapter](21-development.md#development-windows-dev-env). + +![Windows build pipeline in GitLab](images/development/windows_builds_gitlab_pipeline.png) + + +## Continuous Integration + +Icinga uses the integrated CI capabilities on GitHub in the development workflow. +This ensures that incoming pull requests and branches are built on create/push events. +Contributors and developers can immediately see whether builds fail or succeed and +help the final reviews. + +* For Linux, we are currently using Travis CI. +* For Windows, AppVeyor has been integrated. + +Future plans involve making use of GitHub Actions. + +In addition to our development platform on GitHub, +we are using GitLab's CI platform to build binary packages for +all supported operating systems and distributions. +These CI pipelines provide even more detailed insights into +specific platform failures and developers can react faster. + +### CI: Travis CI + +[Travis CI](https://travis-ci.org/Icinga/icinga2) provides Ubuntu as base +distribution where Icinga is compiled from sources followed by running the +unit tests and a config validation check. + +For details, please refer to the [.travis.yml](https://github.com/Icinga/icinga2/blob/master/.travis.yml) +configuration file. + +### CI: AppVeyor + +[AppVeyor](https://ci.appveyor.com/project/icinga/icinga2) provides Windows +as platform where Visual Studio and Boost libraries come pre-installed. + +Icinga is built using the Powershell scripts located in `tools/win32`. +In addition to that, the unit tests are run. + +Please check the [appveyor.yml](https://github.com/Icinga/icinga2/blob/master/appveyor.yml) configuration +file for details. + + +## Advanced Development Tips + +### GDB Pretty Printers + +Install the `boost`, `python` and `icinga2` pretty printers. Absolute paths are required, +so please make sure to update the installation paths accordingly (`pwd`). + +```bash +mkdir -p ~/.gdb_printers && cd ~/.gdb_printers +``` + +Boost Pretty Printers compatible with Python 3: + +``` +$ git clone https://github.com/mateidavid/Boost-Pretty-Printer.git && cd Boost-Pretty-Printer +$ git checkout python-3 +$ pwd +/home/michi/.gdb_printers/Boost-Pretty-Printer +``` + +Python Pretty Printers: + +```bash +cd ~/.gdb_printers +svn co svn://gcc.gnu.org/svn/gcc/trunk/libstdc++-v3/python +``` + +Icinga 2 Pretty Printers: + +```bash +mkdir -p ~/.gdb_printers/icinga2 && cd ~/.gdb_printers/icinga2 +wget https://raw.githubusercontent.com/Icinga/icinga2/master/tools/debug/gdb/icingadbg.py +``` + +Now you'll need to modify/setup your `~/.gdbinit` configuration file. +You can download the one from Icinga 2 and modify all paths. + +Example on Fedora 22: + +``` +$ wget https://raw.githubusercontent.com/Icinga/icinga2/master/tools/debug/gdb/gdbinit -O ~/.gdbinit +$ vim ~/.gdbinit + +set print pretty on + +python +import sys +sys.path.insert(0, '/home/michi/.gdb_printers/icinga2') +from icingadbg import register_icinga_printers +register_icinga_printers() +end + +python +import sys +sys.path.insert(0, '/home/michi/.gdb_printers/python') +from libstdcxx.v6.printers import register_libstdcxx_printers +try: + register_libstdcxx_printers(None) +except: + pass +end + +python +import sys +sys.path.insert(0, '/home/michi/.gdb_printers/Boost-Pretty-Printer') +import boost_print +boost_print.register_printers() +end +``` + +If you are getting the following error when running gdb, the `libstdcxx` +printers are already preloaded in your environment and you can remove +the duplicate import in your `~/.gdbinit` file. + +``` +RuntimeError: pretty-printer already registered: libstdc++-v6 +``` diff --git a/doc/22-selinux.md b/doc/22-selinux.md new file mode 100644 index 0000000..6c64c6f --- /dev/null +++ b/doc/22-selinux.md @@ -0,0 +1,312 @@ +# SELinux + +## Introduction + +SELinux is a mandatory access control (MAC) system on Linux which adds a fine-grained permission system for access to all system resources such as files, devices, networks and inter-process communication. + +The most important questions are answered briefly in the [FAQ of the SELinux Project](https://selinuxproject.org/page/FAQ). For more details on SELinux and how to actually use and administrate it on your system have a look at [Red Hat Enterprise Linux 7 - SELinux User's and Administrator's Guide](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/SELinux_Users_and_Administrators_Guide/index.html). For a simplified (and funny) introduction download the [SELinux Coloring Book](https://github.com/mairin/selinux-coloring-book). + +This documentation will use a format similar to the SELinux User's and Administrator's Guide. + +### Policy + +Icinga 2 provides its own SELinux policy. Development target is a policy package for Red Hat Enterprise Linux 7 and derivatives running the targeted policy which confines Icinga 2 with all features and all checks executed. All other distributions will require some tweaks. + +### Installation + +There are two ways of installing the SELinux Policy for Icinga 2 on Enterprise Linux 7. The preferred way is to install the package. The other option involves installing the SELinux policy manually which might be necessary if you need some fixes which haven't made their way into a release yet. + +If the system runs in enforcing mode and you encounter problems you can set Icinga 2's domain to permissive mode. + +``` +# sestatus +SELinux status: enabled +SELinuxfs mount: /sys/fs/selinux +SELinux root directory: /etc/selinux +Loaded policy name: targeted +Current mode: enforcing +Mode from config file: enforcing +Policy MLS status: enabled +Policy deny_unknown status: allowed +Max kernel policy version: 28 +``` + +You can change the configured mode by editing `/etc/selinux/config` and the current mode by executing `setenforce 0`. + +#### Package installation + +Simply add the `icinga2-selinux` package to your installation. + +```bash +yum install icinga2-selinux +``` + +Ensure that the `icinga2` process is running in its own `icinga2_t` domain after installing the policy package: + +``` +# systemctl restart icinga2.service +# ps -eZ | grep icinga2 +system_u:system_r:icinga2_t:s0 2825 ? 00:00:00 icinga2 +``` + +#### Manual installation + +This section describes the installation to support development and testing. It assumes that Icinga 2 is already installed from packages and running on the system. + +As a prerequisite install the `git`, `selinux-policy-devel` and `audit` packages. Enable and start the audit daemon afterwards: + +```bash +yum install git selinux-policy-devel audit +systemctl enable auditd.service +systemctl start auditd.service +``` + +After that clone the icinga2 git repository: + +```bash +git clone https://github.com/icinga/icinga2 +``` + +To create and install the policy package run the installation script which also labels the resources. (The script assumes Icinga 2 was started once after system startup, the labeling of the port will only happen once and fail later on.) + +```bash +cd tools/selinux/ +./icinga.sh +``` + +After that restart Icinga 2 and verify it running in its own domain `icinga2_t`. + +``` +# systemctl restart icinga2.service +# ps -eZ | grep icinga2 +system_u:system_r:icinga2_t:s0 2825 ? 00:00:00 icinga2 +``` + +### General + +When the SELinux policy package for Icinga 2 is installed, the Icinga 2 daemon (icinga2) runs in its own domain `icinga2_t` and is separated from other confined services. + +Files have to be labeled correctly in order for Icinga 2 to be able to access them. For example the Icinga 2 log files have to have the `icinga2_log_t` label. Also the API port is labeled with `icinga_port_t`. Furthermore Icinga 2 can open high ports and UNIX sockets to connect to databases and features like Graphite. It executes the Nagios plugins and transitions to their context if those are labeled for example `nagios_services_plugin_exec_t` or `nagios_system_plugin_exec_t`. + +Additionally the Apache web server is allowed to connect to Icinga 2's command pipe in order to allow web interfaces to send commands to icinga2. This will perhaps change later on while investigating Icinga Web 2 for SELinux! + +### Types + +The command pipe is labeled `icinga2_command_t` and other services can request access to it by using the interface `icinga2_send_commands`. + +The nagios plugins use their own contexts and icinga2 will transition to it. This means plugins have to be labeled correctly for their required permissions. The plugins installed from package should have set their permissions by the corresponding policy module and you can restore them using `restorecon -R -v /usr/lib64/nagios/plugins/`. To label your own plugins use `chcon -t type /path/to/plugin`, for the type have a look at table below. + +Type | Domain | Use case | Provided by policy package +----------------------------------|------------------------------|------------------------------------------------------------------|--------------------------- +nagios_admin_plugin_exec_t | nagios_admin_plugin_t | Plugins which require require read access on all file attributes | nagios +nagios_checkdisk_plugin_exec_t | nagios_checkdisk_plugin_t | Plugins which require read access to all filesystem attributes | nagios +nagios_mail_plugin_exec_t | nagios_mail_plugin_t | Plugins which access the local mail service | nagios +nagios_services_plugin_exec_t | nagios_services_plugin_t | Plugins monitoring network services | nagios +nagios_system_plugin_exec_t | nagios_system_plugin_t | Plugins checking local system state | nagios +nagios_unconfined_plugin_exec_t | nagios_unconfined_plugin_t | Plugins running without confinement | nagios +nagios_eventhandler_plugin_exec_t | nagios_eventhandler_plugin_t | Eventhandler (actually running unconfined) | nagios +nagios_openshift_plugin_exec_t | nagios_openshift_plugin_t | Plugins monitoring openshift | nagios +nagios_notification_plugin_exec_t | nagios_notification_plugin_t | Notification commands | icinga (will be moved later) + +If one of those plugin domains causes problems you can set it to permissive by executing `semanage permissive -a domain`. + +The policy provides a role `icinga2adm_r` for confining an user which enables an administrative user managing only Icinga 2 on the system. This user will also execute the plugins in their domain instead of the users one, so you can verify their execution with the same restrictions like they have when executed by icinga2. + +### Booleans + +SELinux is based on the least level of access required for a service to run. Using booleans you can grant more access in a defined way. The Icinga 2 policy package provides the following booleans. + +**icinga2_can_connect_all** + +Having this boolean enabled allows icinga2 to connect to all ports. This can be necessary if you use features which connect to unconfined services, for example the [influxdb writer](14-features.md#influxdb-writer). + +**icinga2_run_sudo** + +To allow Icinga 2 executing plugins via sudo you can toogle this boolean. It is disabled by default, resulting in error messages like `execvpe(sudo) failed: Permission denied`. + +**httpd_can_write_icinga2_command** + +To allow httpd to write to the command pipe of icinga2 this boolean has to be enabled. This is enabled by default, if not needed you can disable it for more security. + +**httpd_can_connect_icinga2_api** + +Enabling this boolean allows httpd to connect to the API of icinga2 (Ports labeled `icinga2_port_t`). This is enabled by default, if not needed you can disable it for more security. + +### Configuration Examples + +#### Run the icinga2 service permissive + +If problems occur while running the system in enforcing mode and those problems are only caused by the policy of the icinga2 domain, you can set this domain to permissive instead of the complete system. This can be done by executing `semanage permissive -a icinga2_t`. + +Make sure to report the bugs in the policy afterwards. + +#### Confining a plugin + +Download and install a plugin, for example check_mysql_health. + +```bash +wget https://labs.consol.de/download/shinken-nagios-plugins/check_mysql_health-2.1.9.2.tar.gz +tar xvzf check_mysql_health-2.1.9.2.tar.gz +cd check_mysql_health-2.1.9.2/ +./configure --libexecdir /usr/lib64/nagios/plugins +make +make install +``` + +It is labeled `nagios_unconfined_plugins_exec_t` by default, so it runs without restrictions. + +``` +# ls -lZ /usr/lib64/nagios/plugins/check_mysql_health +-rwxr-xr-x. root root system_u:object_r:nagios_unconfined_plugin_exec_t:s0 /usr/lib64/nagios/plugins/check_mysql_health +``` + +In this case the plugin is monitoring a service, so it should be labeled `nagios_services_plugin_exec_t` to restrict its permissions. + +``` +# chcon -t nagios_services_plugin_exec_t /usr/lib64/nagios/plugins/check_mysql_health +# ls -lZ /usr/lib64/nagios/plugins/check_mysql_health +-rwxr-xr-x. root root system_u:object_r:nagios_services_plugin_exec_t:s0 /usr/lib64/nagios/plugins/check_mysql_health +``` + +The plugin still runs fine but if someone changes the script to do weird stuff it will fail to do so. + +#### Allow icinga to connect to all ports. + +You are running graphite on a different port than `2003` and want `icinga2` to connect to it. + +Change the port value for the graphite feature according to your graphite installation before enabling it. + +``` +# cat /etc/icinga2/features-enabled/graphite.conf +/** + * The GraphiteWriter type writes check result metrics and + * performance data to a graphite tcp socket. + */ + +library "perfdata" + +object GraphiteWriter "graphite" { + //host = "127.0.0.1" + //port = 2003 + port = 2004 +} +# icinga2 feature enable graphite +``` + +Before you restart the icinga2 service allow it to connect to all ports by enabling the boolean `icinga2_can_connect_all` (now and permanent). + +```bash +setsebool icinga2_can_connect_all true +setsebool -P icinga2_can_connect_all true +``` + +If you restart the daemon now it will successfully connect to graphite. + +#### Running plugins requiring sudo + +Some plugins require privileged access to the system and are designied to be executed via `sudo` to get these privileges. + +In this case it is the CheckCommand [running_kernel](10-icinga-template-library.md#plugin-contrib-command-running_kernel) which is set to use `sudo`. + + # cat /etc/icinga2/conf.d/services.conf + apply Service "kernel" { + import "generic-service" + + check_command = "running_kernel" + + vars.running_kernel_use_sudo = true + + assign where host.name == NodeName + } + +Having this Service defined will result in a UNKNOWN state and the error message `execvpe(sudo) failed: Permission denied` because SELinux dening the execution. + +Switching the boolean `icinga2_run_sudo` to allow the execution will result in the check executed successfully. + + # setsebool icinga2_run_sudo true + # setsebool -P icinga2_run_sudo true + +#### Confining a user + +If you want to have an administrative account capable of only managing icinga2 and not the complete system, you can restrict the privileges by confining +this user. This is completly optional! + +Start by adding the Icinga 2 administrator role `icinga2adm_r` to the administrative SELinux user `staff_u`. + +```bash +semanage user -m -R "staff_r sysadm_r system_r unconfined_r icinga2adm_r" staff_u +``` + +Confine your user login and create a sudo rule. + +```bash +semanage login -a dirk -s staff_u +echo "dirk ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/dirk +``` + +Login to the system using ssh and verify your id. + +``` +$ id -Z +staff_u:staff_r:staff_t:s0-s0:c0.c1023 +``` + +Try to execute some commands as root using sudo. + +``` +$ sudo id -Z +staff_u:staff_r:staff_t:s0-s0:c0.c1023 +$ sudo vi /etc/icinga2/icinga2.conf +"/etc/icinga2/icinga2.conf" [Permission Denied] +$ sudo cat /var/log/icinga2/icinga2.log +cat: /var/log/icinga2/icinga2.log: Permission denied +$ sudo systemctl reload icinga2.service +Failed to get D-Bus connection: No connection to service manager. +``` + +Those commands fail because you only switch to root but do not change your SELinux role. Try again but tell sudo also to switch the SELinux role and type. + +``` +$ sudo -r icinga2adm_r -t icinga2adm_t id -Z +staff_u:icinga2adm_r:icinga2adm_t:s0-s0:c0.c1023 +$ sudo -r icinga2adm_r -t icinga2adm_t vi /etc/icinga2/icinga2.conf +"/etc/icinga2/icinga2.conf" +$ sudo -r icinga2adm_r -t icinga2adm_t cat /var/log/icinga2/icinga2.log +[2015-03-26 20:48:14 +0000] information/DynamicObject: Dumping program state to file '/var/lib/icinga2/icinga2.state' +$ sudo -r icinga2adm_r -t icinga2adm_t systemctl reload icinga2.service +``` + +Now the commands will work, but you have always to remember to add the arguments, so change the sudo rule to set it by default. + +```bash +echo "dirk ALL=(ALL) ROLE=icinga2adm_r TYPE=icinga2adm_t NOPASSWD: ALL" > /etc/sudoers.d/dirk +``` + +Now try the commands again without providing the role and type and they will work, but if you try to read apache logs or restart apache for example it will still fail. + +``` +$ sudo cat /var/log/httpd/error_log +/bin/cat: /var/log/httpd/error_log: Keine Berechtigung +$ sudo systemctl reload httpd.service +Failed to issue method call: Access denied +``` + +## Bugreports + +If you experience any problems while running in enforcing mode try to reproduce it in permissive mode. If the problem persists it is not related to SELinux because in permissive mode SELinux will not deny anything. + +After some feedback Icinga 2 is now running in a enforced domain, but still adds also some rules for other necessary services so no problems should occure at all. But you can help to enhance the policy by testing Icinga 2 running confined by SELinux. + +Please add the following information to [bug reports](https://icinga.com/community/): + +* Versions, configuration snippets, etc. +* Output of `semodule -l | grep -e icinga2 -e nagios -e apache` +* Output of `ps -eZ | grep icinga2` +* Output of `semanage port -l | grep icinga2` +* Output of `audit2allow -li /var/log/audit/audit.log` + +If access to a file is blocked and you can tell which one please provided the output of `ls -lZ /path/to/file` (and perhaps the directory above). + +If asked for full audit.log add `-w /etc/shadow -p w` to `/etc/audit/rules.d/audit.rules`, restart the audit daemon, reproduce the problem and add `/var/log/audit/audit.log` to the bug report. With the added audit rule it will include the path of files access was denied to. + +If asked to provide full audit log with dontaudit rules disabled executed `semodule -DB` before reproducing the problem. After that enable the rules again to prevent auditd spamming your logfile by executing `semodule -B`. diff --git a/doc/23-migrating-from-icinga-1x.md b/doc/23-migrating-from-icinga-1x.md new file mode 100644 index 0000000..c9cfa50 --- /dev/null +++ b/doc/23-migrating-from-icinga-1x.md @@ -0,0 +1,1585 @@ +# Migration from Icinga 1.x + +## Configuration Migration + +The Icinga 2 configuration format introduces plenty of behavioural changes. In +order to ease migration from Icinga 1.x, this section provides hints and tips +on your migration requirements. + + +### Automated Config Migration + +Depending on your previous setup, you may have already used different sources +for generating the 1.x configuration files. If this is the case, +we strongly recommend to use these sources in combination with +the [Icinga Director](https://icinga.com/docs/director/latest/doc/01-Introduction/). + +This can be for example: + +* A CMDB or RDBMS which provides host details and facts +* PuppetDB +* CSV/XSL/JSON files +* Cloud resources (AWS, etc.) + +In case you have been using Icinga Web 1.x or an addon requiring +the underlying IDO database, you can use this as database resource +to import the host details. + +Talks: + +* [This talk from OSMC 2016](https://www.youtube.com/watch?v=T6GBsfeXIZI) shares more insights (German). +* [Automated Monitoring in heterogeneous environments](https://www.youtube.com/watch?v=bkUlS5rlHzM&list=PLeoxx10paaAn_xHJ5wBhnBJyW_d5G7-Bl&index=8) + +Continue reading more about [Import Sources](https://icinga.com/docs/director/latest/doc/70-Import-and-Sync/) +for the Icinga Director. + +### Manual Config Migration + +For a long-term migration of your configuration you should consider re-creating +your configuration based on the proposed Icinga 2 configuration paradigm. + +Please read the [next chapter](23-migrating-from-icinga-1x.md#differences-1x-2) to find out more about the differences +between 1.x and 2. + +### Manual Config Migration Hints + +These hints should provide you with enough details for manually migrating your configuration, +or to adapt your configuration export tool to dump Icinga 2 configuration instead of +Icinga 1.x configuration. + +The examples are taken from Icinga 1.x test and production environments and converted +straight into a possible Icinga 2 format. If you found a different strategy, please +let us know! + +If you require in-depth explanations, please check the [next chapter](23-migrating-from-icinga-1x.md#differences-1x-2). + +#### Manual Config Migration Hints for Intervals + +By default all intervals without any duration literal are interpreted as seconds. Therefore +all existing Icinga 1.x `*_interval` attributes require an additional `m` duration literal. + +Icinga 1.x: + +``` +define service { + service_description service1 + host_name localhost1 + check_command test_customvar + use generic-service + check_interval 5 + retry_interval 1 +} +``` + +Icinga 2: + +``` +object Service "service1" { + import "generic-service" + host_name = "localhost1" + check_command = "test_customvar" + check_interval = 5m + retry_interval = 1m +} +``` + +#### Manual Config Migration Hints for Services + +If you have used the `host_name` attribute in Icinga 1.x with one or more host names this service +belongs to, you can migrate this to the [apply rules](03-monitoring-basics.md#using-apply) syntax. + +Icinga 1.x: + +``` +define service { + service_description service1 + host_name localhost1,localhost2 + check_command test_check + use generic-service +} +``` + +Icinga 2: + +``` +apply Service "service1" { + import "generic-service" + check_command = "test_check" + + assign where host.name in [ "localhost1", "localhost2" ] +} +``` + +In Icinga 1.x you would have organized your services with hostgroups using the `hostgroup_name` attribute +like the following example: + +``` +define service { + service_description servicewithhostgroups + hostgroup_name hostgroup1,hostgroup3 + check_command test_check + use generic-service +} +``` + +Using Icinga 2 you can migrate this to the [apply rules](03-monitoring-basics.md#using-apply) syntax: + +``` +apply Service "servicewithhostgroups" { + import "generic-service" + check_command = "test_check" + + assign where "hostgroup1" in host.groups + assign where "hostgroup3" in host.groups +} +``` + +#### Manual Config Migration Hints for Group Members + +The Icinga 1.x hostgroup `hg1` has two members `host1` and `host2`. The hostgroup `hg2` has `host3` as +a member and includes all members of the `hg1` hostgroup. + +``` +define hostgroup { + hostgroup_name hg1 + members host1,host2 +} + +define hostgroup { + hostgroup_name hg2 + members host3 + hostgroup_members hg1 +} +``` + +This can be migrated to Icinga 2 and [using group assign](17-language-reference.md#group-assign). The additional nested hostgroup +`hg1` is included into `hg2` with the `groups` attribute. + +``` +object HostGroup "hg1" { + groups = [ "hg2" ] + assign where host.name in [ "host1", "host2" ] +} + +object HostGroup "hg2" { + assign where host.name == "host3" +} +``` + +These assign rules can be applied for all groups: `HostGroup`, `ServiceGroup` and `UserGroup` +(requires renaming from `contactgroup`). + +> **Tip** +> +> Define custom variables and assign/ignore members based on these attribute pattern matches. + + + +#### Manual Config Migration Hints for Check Command Arguments + +Host and service check command arguments are separated by a `!` in Icinga 1.x. Their order is important and they +are referenced as `$ARGn$` where `n` is the argument counter. + +``` +define command { + command_name my-ping + command_line $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5 +} + +define service { + use generic-service + host_name my-server + service_description my-ping + check_command my-ping-check!100.0,20%!500.0,60% +} +``` + +While you could manually migrate this like (please note the new generic command arguments and default argument values!): + +``` +object CheckCommand "my-ping-check" { + command = [ + PluginDir + "/check_ping", "-4" + ] + + arguments = { + "-H" = "$ping_address$" + "-w" = "$ping_wrta$,$ping_wpl$%" + "-c" = "$ping_crta$,$ping_cpl$%" + "-p" = "$ping_packets$" + "-t" = "$ping_timeout$" + } + + vars.ping_address = "$address$" + vars.ping_wrta = 100 + vars.ping_wpl = 5 + vars.ping_crta = 200 + vars.ping_cpl = 15 +} + +object Service "my-ping" { + import "generic-service" + host_name = "my-server" + check_command = "my-ping-check" + + vars.ping_wrta = 100 + vars.ping_wpl = 20 + vars.ping_crta = 500 + vars.ping_cpl = 60 +} +``` + +#### Manual Config Migration Hints for Runtime Macros + +Runtime macros have been renamed. A detailed comparison table can be found [here](23-migrating-from-icinga-1x.md#differences-1x-2-runtime-macros). + +For example, accessing the service check output looks like the following in Icinga 1.x: + +``` +$SERVICEOUTPUT$ +``` + +In Icinga 2 you will need to write: + +``` +$service.output$ +``` + +Another example referencing the host's address attribute in Icinga 1.x: + +``` +$HOSTADDRESS$ +``` + +In Icinga 2 you'd just use the following macro to access all `address` attributes (even overridden from the service objects): + +``` +$address$ +``` + +#### Manual Config Migration Hints for Runtime Custom Variables + +Custom variables from Icinga 1.x are available as Icinga 2 custom variables. + +``` +define command { + command_name test_customvar + command_line echo "Host CV: $_HOSTCVTEST$ Service CV: $_SERVICECVTEST$\n" +} + +define host { + host_name localhost1 + check_command test_customvar + use generic-host + _CVTEST host cv value +} + +define service { + service_description service1 + host_name localhost1 + check_command test_customvar + use generic-service + _CVTEST service cv value +} +``` + +Can be written as the following in Icinga 2: + +``` +object CheckCommand "test_customvar" { + command = "echo "Host CV: $host.vars.CVTEST$ Service CV: $service.vars.CVTEST$\n"" +} + +object Host "localhost1" { + import "generic-host" + check_command = "test_customvar" + vars.CVTEST = "host cv value" +} + +object Service "service1" { + host_name = "localhost1" + check_command = "test_customvar" + vars.CVTEST = "service cv value" +} +``` + +If you are just defining `$CVTEST$` in your command definition, its value depends on the +execution scope -- the host check command will fetch the host attribute value of `vars.CVTEST` +while the service check command resolves its value to the service attribute attribute `vars.CVTEST`. + +> **Note** +> +> Custom variables in Icinga 2 are case-sensitive. `vars.CVTEST` is not the same as `vars.CvTest`. + +#### Manual Config Migration Hints for Contacts (Users) + +Contacts in Icinga 1.x act as users in Icinga 2, but do not have any notification commands specified. +This migration part is explained in the [next chapter](23-migrating-from-icinga-1x.md#manual-config-migration-hints-notifications). + +``` +define contact{ + contact_name testconfig-user + use generic-user + alias Icinga Test User + service_notification_options c,f,s,u + email icinga@localhost +} +``` + +The `service_notification_options` can be [mapped](23-migrating-from-icinga-1x.md#manual-config-migration-hints-notification-filters) +into generic `state` and `type` filters, if additional notification filtering is required. `alias` gets +renamed to `display_name`. + +``` +object User "testconfig-user" { + import "generic-user" + display_name = "Icinga Test User" + email = "icinga@localhost" +} +``` + +This user can be put into usergroups (former contactgroups) or referenced in newly migration notification +objects. + +#### Manual Config Migration Hints for Notifications + +If you are migrating a host or service notification, you'll need to extract the following information from +your existing Icinga 1.x configuration objects + +* host/service attribute `contacts` and `contact_groups` +* host/service attribute `notification_options` +* host/service attribute `notification_period` +* host/service attribute `notification_interval` + +The clean approach is to refactor your current contacts and their notification command methods into a +generic strategy + +* host or service has a notification type (for example mail) +* which contacts (users) are notified by mail? +* do the notification filters, periods, intervals still apply for them? (do a cleanup during migration) +* assign users and groups to these notifications +* Redesign the notifications into generic [apply rules](03-monitoring-basics.md#using-apply-notifications) + + +The ugly workaround solution could look like this: + +Extract all contacts from the remaining groups, and create a unique list. This is required for determining +the host and service notification commands involved. + +* contact attributes `host_notification_commands` and `service_notification_commands` (can be a comma separated list) +* get the command line for each notification command and store them for later +* create a new notification name and command name + +Generate a new notification object based on these values. Import the generic template based on the type (`host` or `service`). +Assign it to the host or service and set the newly generated notification command name as `command` attribute. + +``` +object Notification "" { + import "mail-host-notification" + host_name = "" + command = "" +``` + +Convert the `notification_options` attribute from Icinga 1.x to Icinga 2 `states` and `types`. Details +[here](23-migrating-from-icinga-1x.md#manual-config-migration-hints-notification-filters). Add the notification period. + +``` + states = [ OK, Warning, Critical ] + types = [ Recovery, Problem, Custom ] + period = "24x7" +``` + +The current contact acts as `users` attribute. + +``` + users = [ "" ] +} +``` + +Do this in a loop for all notification commands (depending if host or service contact). Once done, dump the +collected notification commands. + +The result of this migration are lots of unnecessary notification objects and commands but it will unroll +the Icinga 1.x logic into the revamped Icinga 2 notification object schema. If you are looking for code +examples, try [LConf](https://www.netways.org). + + + +#### Manual Config Migration Hints for Notification Filters + +Icinga 1.x defines all notification filters in an attribute called `notification_options`. Using Icinga 2 you will +have to split these values into the `states` and `types` attributes. + +> **Note** +> +> `Recovery` type requires the `Ok` state. +> `Custom` and `Problem` should always be set as `type` filter. + + Icinga 1.x option | Icinga 2 state | Icinga 2 type + ----------------------|-----------------------|------------------- + o | OK (Up for hosts) | + w | Warning | Problem + c | Critical | Problem + u | Unknown | Problem + d | Down | Problem + s | . | DowntimeStart / DowntimeEnd / DowntimeRemoved + r | Ok | Recovery + f | . | FlappingStart / FlappingEnd + n | 0 (none) | 0 (none) + . | . | Custom + + + +#### Manual Config Migration Hints for Escalations + +Escalations in Icinga 1.x are a bit tricky. By default service escalations can be applied to hosts and +hostgroups and require a defined service object. + +The following example applies a service escalation to the service `dep_svc01` and all hosts in the `hg_svcdep2` +hostgroup. The default `notification_interval` is set to `10` minutes notifying the `cg_admin` contact. +After 20 minutes (`10*2`, notification_interval * first_notification) the notification is escalated to the +`cg_ops` contactgroup until 60 minutes (`10*6`) have passed. + +``` +define service { + service_description dep_svc01 + host_name dep_hostsvc01,dep_hostsvc03 + check_command test2 + use generic-service + notification_interval 10 + contact_groups cg_admin +} + +define hostgroup { + hostgroup_name hg_svcdep2 + members dep_hostsvc03 +} + +# with hostgroup_name and service_description +define serviceescalation { + hostgroup_name hg_svcdep2 + service_description dep_svc01 + first_notification 2 + last_notification 6 + contact_groups cg_ops +} +``` + +In Icinga 2 the service and hostgroup definition will look quite the same. Save the `notification_interval` +and `contact_groups` attribute for an additional notification. + +``` +apply Service "dep_svc01" { + import "generic-service" + + check_command = "test2" + + assign where host.name == "dep_hostsvc01" + assign where host.name == "dep_hostsvc03" +} + +object HostGroup "hg_svcdep2" { + assign where host.name == "dep_hostsvc03" +} + +apply Notification "email" to Service { + import "service-mail-notification" + + interval = 10m + user_groups = [ "cg_admin" ] + + assign where service.name == "dep_svc01" && (host.name == "dep_hostsvc01" || host.name == "dep_hostsvc03") +} +``` + +Calculate the begin and end time for the newly created escalation notification: + +* begin = first_notification * notification_interval = 2 * 10m = 20m +* end = last_notification * notification_interval = 6 * 10m = 60m = 1h + +Assign the notification escalation to the service `dep_svc01` on all hosts in the hostgroup `hg_svcdep2`. + +``` +apply Notification "email-escalation" to Service { + import "service-mail-notification" + + interval = 10m + user_groups = [ "cg_ops" ] + + times = { + begin = 20m + end = 1h + } + + assign where service.name == "dep_svc01" && "hg_svcdep2" in host.groups +} +``` + +The assign rule could be made more generic and the notification be applied to more than +just this service belonging to hosts in the matched hostgroup. + + +> **Note** +> +> When the notification is escalated, Icinga 1.x suppresses notifications to the default contacts. +> In Icinga 2 an escalation is an additional notification with a defined begin and end time. The +> `email` notification will continue as normal. + + + +#### Manual Config Migration Hints for Dependencies + +There are some dependency examples already in the [basics chapter](03-monitoring-basics.md#dependencies). Dependencies in +Icinga 1.x can be confusing in terms of which host/service is the parent and which host/service acts +as the child. + +While Icinga 1.x defines `notification_failure_criteria` and `execution_failure_criteria` as dependency +filters, this behaviour has changed in Icinga 2. There is no 1:1 migration but generally speaking +the state filter defined in the `execution_failure_criteria` defines the Icinga 2 `state` attribute. +If the state filter matches, you can define whether to disable checks and notifications or not. + +The following example describes service dependencies. If you migrate from Icinga 1.x, you will only +want to use the classic `Host-to-Host` and `Service-to-Service` dependency relationships. + +``` +define service { + service_description dep_svc01 + hostgroup_name hg_svcdep1 + check_command test2 + use generic-service +} + +define service { + service_description dep_svc02 + hostgroup_name hg_svcdep2 + check_command test2 + use generic-service +} + +define hostgroup { + hostgroup_name hg_svcdep2 + members host2 +} + +define host{ + use linux-server-template + host_name host1 + address 192.168.1.10 +} + +# with hostgroup_name and service_description +define servicedependency { + host_name host1 + dependent_hostgroup_name hg_svcdep2 + service_description dep_svc01 + dependent_service_description * + execution_failure_criteria u,c + notification_failure_criteria w,u,c + inherits_parent 1 +} +``` + +Map the dependency attributes accordingly. + + Icinga 1.x | Icinga 2 + ----------------------|--------------------- + host_name | parent_host_name + dependent_host_name | child_host_name (used in assign/ignore) + dependent_hostgroup_name | all child hosts in group (used in assign/ignore) + service_description | parent_service_name + dependent_service_description | child_service_name (used in assign/ignore) + +And migrate the host and services. + +``` +object Host "host1" { + import "linux-server-template" + address = "192.168.1.10" +} + +object HostGroup "hg_svcdep2" { + assign where host.name == "host2" +} + +apply Service "dep_svc01" { + import "generic-service" + check_command = "test2" + + assign where "hp_svcdep1" in host.groups +} + +apply Service "dep_svc02" { + import "generic-service" + check_command = "test2" + + assign where "hp_svcdep2" in host.groups +} +``` + +When it comes to the `execution_failure_criteria` and `notification_failure_criteria` attribute migration, +you will need to map the most common values, in this example `u,c` (`Unknown` and `Critical` will cause the +dependency to fail). Therefore the `Dependency` should be ok on Ok and Warning. `inherits_parents` is always +enabled. + +``` +apply Dependency "all-svc-for-hg-hg_svcdep2-on-host1-dep_svc01" to Service { + parent_host_name = "host1" + parent_service_name = "dep_svc01" + + states = [ Ok, Warning ] + disable_checks = true + disable_notifications = true + + assign where "hg_svcdep2" in host.groups +} +``` + +Host dependencies are explained in the [next chapter](23-migrating-from-icinga-1x.md#manual-config-migration-hints-host-parents). + + + +#### Manual Config Migration Hints for Host Parents + +Host parents from Icinga 1.x are migrated into `Host-to-Host` dependencies in Icinga 2. + +The following example defines the `vmware-master` host as parent host for the guest +virtual machines `vmware-vm1` and `vmware-vm2`. + +By default all hosts in the hostgroup `vmware` should get the parent assigned. This isn't really +solvable with Icinga 1.x parents, but only with host dependencies. + +``` +define host{ + use linux-server-template + host_name vmware-master + hostgroups vmware + address 192.168.1.10 +} + +define host{ + use linux-server-template + host_name vmware-vm1 + hostgroups vmware + address 192.168.27.1 + parents vmware-master +} + +define host{ + use linux-server-template + host_name vmware-vm2 + hostgroups vmware + address 192.168.28.1 + parents vmware-master +} +``` + +By default all hosts in the hostgroup `vmware` should get the parent assigned (but not the `vmware-master` +host itself). This isn't really solvable with Icinga 1.x parents, but only with host dependencies as shown +below: + +``` +define hostdependency { + dependent_hostgroup_name vmware + dependent_host_name !vmware-master + host_name vmware-master + inherits_parent 1 + notification_failure_criteria d,u + execution_failure_criteria d,u + dependency_period testconfig-24x7 +} +``` + +When migrating to Icinga 2, the parents must be changed to a newly created host dependency. + + +Map the following attributes + + Icinga 1.x | Icinga 2 + ----------------------|--------------------- + host_name | parent_host_name + dependent_host_name | child_host_name (used in assign/ignore) + dependent_hostgroup_name | all child hosts in group (used in assign/ignore) + +The Icinga 2 configuration looks like this: + +``` +object Host "vmware-master" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.1.10" + vars.is_vmware_master = true +} + +object Host "vmware-vm1" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.27.1" +} + +object Host "vmware-vm2" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.28.1" +} + +apply Dependency "vmware-master" to Host { + parent_host_name = "vmware-master" + + assign where "vmware" in host.groups + ignore where host.vars.is_vmware_master + ignore where host.name == "vmware-master" +} +``` + +For easier identification you could add the `vars.is_vmware_master` attribute to the `vmware-master` +host and let the dependency ignore that instead of the hardcoded host name. That's different +to the Icinga 1.x example and a best practice hint only. + + +Another way to express the same configuration would be something like: + +``` +object Host "vmware-master" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.1.10" +} + +object Host "vmware-vm1" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.27.1" + vars.parents = [ "vmware-master" ] +} + +object Host "vmware-vm2" { + import "linux-server-template" + groups += [ "vmware" ] + address = "192.168.28.1" + vars.parents = [ "vmware-master" ] +} + +apply Dependency "host-to-parent-" for (parent in host.vars.parents) to Host { + parent_host_name = parent +} +``` + +This example allows finer grained host-to-host dependency, as well as multiple dependency support. + +#### Manual Config Migration Hints for Distributed Setups + +* Icinga 2 does not use active/passive instances calling OSCP commands and requiring the NSCA +daemon for passing check results between instances. +* Icinga 2 does not support any 1.x NEB addons for check load distribution + +* If your current setup consists of instances distributing the check load, you should consider +building a [load distribution](06-distributed-monitoring.md#distributed-monitoring-scenarios) setup with Icinga 2. +* If your current setup includes active/passive clustering with external tools like Pacemaker/DRBD, +consider the [High Availability](06-distributed-monitoring.md#distributed-monitoring-scenarios) setup. +* If you have build your own custom configuration deployment and check result collecting mechanism, +you should re-design your setup and re-evaluate your requirements, and how they may be fulfilled +using the Icinga 2 cluster capabilities. + + +## Differences between Icinga 1.x and 2 + +### Configuration Format + +Icinga 1.x supports two configuration formats: key-value-based settings in the +`icinga.cfg` configuration file and object-based in included files (`cfg_dir`, +`cfg_file`). The path to the `icinga.cfg` configuration file must be passed to +the Icinga daemon at startup. + +icinga.cfg: + +``` +enable_notifications=1 +``` + +objects.cfg: + +``` +define service { + notifications_enabled 0 +} +``` + +Icinga 2 supports objects and (global) variables, but does not make a difference +between the main configuration file or any other included file. + +icinga2.conf: + +``` +const EnableNotifications = true + +object Service "test" { + enable_notifications = false +} +``` + +#### Sample Configuration and ITL + +While Icinga 1.x ships sample configuration and templates spread in various +object files, Icinga 2 moves all templates into the Icinga Template Library (ITL) +and includes them in the sample configuration. + +Additional plugin check commands are shipped with Icinga 2 as well. + +The ITL will be updated on every release and must not be edited by the user. + +There are still generic templates available for your convenience which may or may +not be re-used in your configuration. For instance, `generic-service` includes +all required attributes except `check_command` for a service. + +Sample configuration files are located in the `conf.d/` directory which is +included in `icinga2.conf` by default. + +> **Note** +> +> Add your own custom templates in the `conf.d/` directory as well, e.g. inside +> the [templates.conf](04-configuration.md#templates-conf) file. + +### Main Config File + +In Icinga 1.x there are many global configuration settings available in `icinga.cfg`. +Icinga 2 only uses a small set of [global constants](17-language-reference.md#constants) allowing +you to specify certain different setting such as the `NodeName` in a cluster scenario. + +Aside from that, the [icinga2.conf](04-configuration.md#icinga2-conf) should take care of including +global constants, enabled [features](11-cli-commands.md#enable-features) and the object configuration. + +### Include Files and Directories + +In Icinga 1.x the `icinga.cfg` file contains `cfg_file` and `cfg_dir` +directives. The `cfg_dir` directive recursively includes all files with a `.cfg` +suffix in the given directory. Only absolute paths may be used. The `cfg_file` +and `cfg_dir` directives can include the same file twice which leads to +configuration errors in Icinga 1.x. + +``` +cfg_file=/etc/icinga/objects/commands.cfg +cfg_dir=/etc/icinga/objects +``` + +Icinga 2 supports wildcard includes and relative paths, e.g. for including +`conf.d/*.conf` in the same directory. + +``` +include "conf.d/*.conf" +``` + +If you want to include files and directories recursively, you need to define +a separate option and add the directory and an optional pattern. + +``` +include_recursive "conf.d" +``` + +A global search path for includes is available for advanced features like +the Icinga Template Library (ITL) or additional monitoring plugins check +command configuration. + +``` +include +include +``` + +By convention the `.conf` suffix is used for Icinga 2 configuration files. + +### Resource File and Global Macros + +Global macros such as for the plugin directory, usernames and passwords can be +set in the `resource.cfg` configuration file in Icinga 1.x. By convention the +`USER1` macro is used to define the directory for the plugins. + +Icinga 2 uses global constants instead. In the default config these are +set in the `constants.conf` configuration file: + +``` +/** + * This file defines global constants which can be used in + * the other configuration files. At a minimum the + * PluginDir constant should be defined. + */ + +const PluginDir = "/usr/lib/nagios/plugins" +``` + +[Global macros](17-language-reference.md#constants) can only be defined once. Trying to modify a +global constant will result in an error. + +### Configuration Comments + +In Icinga 1.x comments are made using a leading hash (`#`) or a semi-colon (`;`) +for inline comments. + +In Icinga 2 comments can either be encapsulated by `/*` and `*/` (allowing for +multi-line comments) or starting with two slashes (`//`). A leading hash (`#`) +could also be used. + +### Object Names + +Object names must not contain an exclamation mark (`!`). Use the `display_name` attribute +to specify user-friendly names which should be shown in UIs (supported by +Icinga Web 2 for example). + +Object names are not specified using attributes (e.g. `service_description` for +services) like in Icinga 1.x but directly after their type definition. + +``` +define service { + host_name localhost + service_description ping4 +} + +object Service "ping4" { + host_name = "localhost" +} +``` + +### Templates + +In Icinga 1.x templates are identified using the `register 0` setting. Icinga 2 +uses the `template` identifier: + +``` +template Service "ping4-template" { } +``` + +Icinga 1.x objects inherit from templates using the `use` attribute. +Icinga 2 uses the keyword `import` with template names in double quotes. + +``` +define service { + service_description testservice + use tmpl1,tmpl2,tmpl3 +} + +object Service "testservice" { + import "tmpl1" + import "tmpl2" + import "tmpl3" +} +``` + +The last template overrides previously set values. + +### Object attributes + +Icinga 1.x separates attribute and value pairs with whitespaces/tabs. Icinga 2 +requires an equal sign (=) between them. + +``` +define service { + check_interval 5 +} + +object Service "test" { + check_interval = 5m +} +``` + +Please note that the default time value is seconds if no duration literal +is given. `check_interval = 5` behaves the same as `check_interval = 5s`. + +All strings require double quotes in Icinga 2. Therefore a double quote +must be escaped by a backslash (e.g. in command line). +If an attribute identifier starts with a number, it must be enclosed +in double quotes as well. + +#### Alias vs. Display Name + +In Icinga 1.x a host can have an `alias` and a `display_name` attribute used +for a more descriptive name. A service only can have a `display_name` attribute. +The `alias` is used for group, timeperiod, etc. objects too. +Icinga 2 only supports the `display_name` attribute which is also taken into +account by Icinga web interfaces. + +### Custom Variables + +Icinga 2 allows you to define custom variables in the `vars` dictionary. +The `notes`, `notes_url`, `action_url`, `icon_image`, `icon_image_alt` +attributes for host and service objects are still available in Icinga 2. + +`2d_coords` and `statusmap_image` are not supported in Icinga 2. + +Icinga 1.x custom variable attributes must be prefixed using an underscore (`_`). +In Icinga 2 these attributes must be added to the `vars` dictionary as custom variables. + +``` +vars.dn = "cn=icinga2-dev-host,ou=icinga,ou=main,ou=IcingaConfig,ou=LConf,dc=icinga,dc=org" +vars.cv = "my custom cmdb description" +``` + +These custom variables are also used as [command parameters](03-monitoring-basics.md#command-passing-parameters). + +While Icinga 1.x only supports numbers and strings as custom variable values, +Icinga 2 extends that to arrays and (nested) dictionaries. For more details +look [here](03-monitoring-basics.md#custom-variables). + +### Host Service Relation + +In Icinga 1.x a service object is associated with a host by defining the +`host_name` attribute in the service definition. Alternate methods refer +to `hostgroup_name` or behaviour changing regular expression. + +The preferred way of associating hosts with services in Icinga 2 is by +using the [apply](03-monitoring-basics.md#using-apply) keyword. + +Direct object relations between a service and a host still allow you to use +the `host_name` [Service](09-object-types.md#objecttype-service) object attribute. + +### Users + +Contacts have been renamed to users (same for groups). A contact does not +only provide (custom) attributes and notification commands used for notifications, +but is also used for authorization checks in Icinga 1.x. + +Icinga 2 changes that behavior and makes the user an attribute provider only. +These attributes can be accessed using [runtime macros](03-monitoring-basics.md#runtime-macros) +inside notification command definitions. + +In Icinga 2 notification commands are not directly associated with users. +Instead the notification command is specified inside `Notification` objects next to +user and user group relations. + +The `StatusDataWriter`, `IdoMySqlConnection` and `LivestatusListener` types will +provide the contact and contactgroups attributes for services for compatibility +reasons. These values are calculated from all services, their notifications, +and their users. + +### Macros + +Various object attributes and runtime variables can be accessed as macros in +commands in Icinga 1.x -- Icinga 2 supports all required [custom variables](03-monitoring-basics.md#custom-variables). + +#### Command Arguments + +If you have previously used Icinga 1.x, you may already be familiar with +user and argument definitions (e.g., `USER1` or `ARG1`). Unlike in Icinga 1.x +the Icinga 2 custom variables may have arbitrary names and arguments are no +longer specified in the `check_command` setting. + +In Icinga 1.x arguments are specified in the `check_command` attribute and +are separated from the command name using an exclamation mark (`!`). + +Please check the migration hints for a detailed +[migration example](23-migrating-from-icinga-1x.md#manual-config-migration-hints-check-command-arguments). + +> **Note** +> +> The Icinga 1.x feature named `Command Expander` does not work with Icinga 2. + +#### Environment Macros + +The global configuration setting `enable_environment_macros` does not exist in +Icinga 2. + +Macros exported into the [environment](03-monitoring-basics.md#command-environment-variables) +can be set using the `env` attribute in command objects. + +#### Runtime Macros + +Icinga 2 requires an object specific namespace when accessing configuration +and stateful runtime macros. Custom variables can be accessed directly. + +If a runtime macro from Icinga 1.x is not listed here, it is not supported +by Icinga 2. + +Changes to user (contact) runtime macros + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + CONTACTNAME | user.name + CONTACTALIAS | user.display_name + CONTACTEMAIL | user.email + CONTACTPAGER | user.pager + +`CONTACTADDRESS*` is not supported but can be accessed as `$user.vars.address1$` +if set. + +Changes to service runtime macros + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + SERVICEDESC | service.name + SERVICEDISPLAYNAME | service.display_name + SERVICECHECKCOMMAND | service.check_command + SERVICESTATE | service.state + SERVICESTATEID | service.state_id + SERVICESTATETYPE | service.state_type + SERVICEATTEMPT | service.check_attempt + MAXSERVICEATTEMPT | service.max_check_attempts + LASTSERVICESTATE | service.last_state + LASTSERVICESTATEID | service.last_state_id + LASTSERVICESTATETYPE | service.last_state_type + LASTSERVICESTATECHANGE | service.last_state_change + SERVICEDOWNTIME | service.downtime_depth + SERVICEDURATIONSEC | service.duration_sec + SERVICELATENCY | service.latency + SERVICEEXECUTIONTIME | service.execution_time + SERVICEOUTPUT | service.output + SERVICEPERFDATA | service.perfdata + LASTSERVICECHECK | service.last_check + SERVICENOTES | service.notes + SERVICENOTESURL | service.notes_url + SERVICEACTIONURL | service.action_url + + +Changes to host runtime macros + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + HOSTNAME | host.name + HOSTADDRESS | host.address + HOSTADDRESS6 | host.address6 + HOSTDISPLAYNAME | host.display_name + HOSTALIAS | (use `host.display_name` instead) + HOSTCHECKCOMMAND | host.check_command + HOSTSTATE | host.state + HOSTSTATEID | host.state_id + HOSTSTATETYPE | host.state_type + HOSTATTEMPT | host.check_attempt + MAXHOSTATTEMPT | host.max_check_attempts + LASTHOSTSTATE | host.last_state + LASTHOSTSTATEID | host.last_state_id + LASTHOSTSTATETYPE | host.last_state_type + LASTHOSTSTATECHANGE | host.last_state_change + HOSTDOWNTIME | host.downtime_depth + HOSTDURATIONSEC | host.duration_sec + HOSTLATENCY | host.latency + HOSTEXECUTIONTIME | host.execution_time + HOSTOUTPUT | host.output + HOSTPERFDATA | host.perfdata + LASTHOSTCHECK | host.last_check + HOSTNOTES | host.notes + HOSTNOTESURL | host.notes_url + HOSTACTIONURL | host.action_url + TOTALSERVICES | host.num_services + TOTALSERVICESOK | host.num_services_ok + TOTALSERVICESWARNING | host.num_services_warning + TOTALSERVICESUNKNOWN | host.num_services_unknown + TOTALSERVICESCRITICAL | host.num_services_critical + +Changes to command runtime macros + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + COMMANDNAME | command.name + +Changes to notification runtime macros + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + NOTIFICATIONTYPE | notification.type + NOTIFICATIONAUTHOR | notification.author + NOTIFICATIONCOMMENT | notification.comment + NOTIFICATIONAUTHORNAME | (use `notification.author`) + NOTIFICATIONAUTHORALIAS | (use `notification.author`) + + +Changes to global runtime macros: + + Icinga 1.x | Icinga 2 + -----------------------|---------------------- + TIMET | icinga.timet + LONGDATETIME | icinga.long_date_time + SHORTDATETIME | icinga.short_date_time + DATE | icinga.date + TIME | icinga.time + PROCESSSTARTTIME | icinga.uptime + +Changes to global statistic macros: + + Icinga 1.x | Icinga 2 + ----------------------------------|---------------------- + TOTALHOSTSUP | icinga.num_hosts_up + TOTALHOSTSDOWN | icinga.num_hosts_down + TOTALHOSTSUNREACHABLE | icinga.num_hosts_unreachable + TOTALHOSTSDOWNUNHANDLED | -- + TOTALHOSTSUNREACHABLEUNHANDLED | -- + TOTALHOSTPROBLEMS | down + TOTALHOSTPROBLEMSUNHANDLED | down-(downtime+acknowledged) + TOTALSERVICESOK | icinga.num_services_ok + TOTALSERVICESWARNING | icinga.num_services_warning + TOTALSERVICESCRITICAL | icinga.num_services_critical + TOTALSERVICESUNKNOWN | icinga.num_services_unknown + TOTALSERVICESWARNINGUNHANDLED | -- + TOTALSERVICESCRITICALUNHANDLED | -- + TOTALSERVICESUNKNOWNUNHANDLED | -- + TOTALSERVICEPROBLEMS | ok+warning+critical+unknown + TOTALSERVICEPROBLEMSUNHANDLED | warning+critical+unknown-(downtime+acknowledged) + + + + +### External Commands + +`CHANGE_CUSTOM_CONTACT_VAR` was renamed to `CHANGE_CUSTOM_USER_VAR`. + +The following external commands are not supported: + +``` +CHANGE_*MODATTR +CHANGE_CONTACT_HOST_NOTIFICATION_TIMEPERIOD +CHANGE_HOST_NOTIFICATION_TIMEPERIOD +CHANGE_SVC_NOTIFICATION_TIMEPERIOD +DEL_DOWNTIME_BY_HOSTGROUP_NAME +DEL_DOWNTIME_BY_START_TIME_COMMENT +DISABLE_ALL_NOTIFICATIONS_BEYOND_HOST +DISABLE_CONTACT_HOST_NOTIFICATIONS +DISABLE_CONTACT_SVC_NOTIFICATIONS +DISABLE_CONTACTGROUP_HOST_NOTIFICATIONS +DISABLE_CONTACTGROUP_SVC_NOTIFICATIONS +DISABLE_FAILURE_PREDICTION +DISABLE_HOST_AND_CHILD_NOTIFICATIONS +DISABLE_HOST_FRESHNESS_CHECKS +DISABLE_NOTIFICATIONS_EXPIRE_TIME +DISABLE_SERVICE_FRESHNESS_CHECKS +ENABLE_ALL_NOTIFICATIONS_BEYOND_HOST +ENABLE_CONTACT_HOST_NOTIFICATIONS +ENABLE_CONTACT_SVC_NOTIFICATIONS +ENABLE_CONTACTGROUP_HOST_NOTIFICATIONS +ENABLE_CONTACTGROUP_SVC_NOTIFICATIONS +ENABLE_FAILURE_PREDICTION +ENABLE_HOST_AND_CHILD_NOTIFICATIONS +ENABLE_HOST_FRESHNESS_CHECKS +ENABLE_SERVICE_FRESHNESS_CHECKS +READ_STATE_INFORMATION +SAVE_STATE_INFORMATION +SET_HOST_NOTIFICATION_NUMBER +SET_SVC_NOTIFICATION_NUMBER +START_ACCEPTING_PASSIVE_HOST_CHECKS +START_ACCEPTING_PASSIVE_SVC_CHECKS +START_OBSESSING_OVER_HOST +START_OBSESSING_OVER_HOST_CHECKS +START_OBSESSING_OVER_SVC +START_OBSESSING_OVER_SVC_CHECKS +STOP_ACCEPTING_PASSIVE_HOST_CHECKS +STOP_ACCEPTING_PASSIVE_SVC_CHECKS +STOP_OBSESSING_OVER_HOST +STOP_OBSESSING_OVER_HOST_CHECKS +STOP_OBSESSING_OVER_SVC +STOP_OBSESSING_OVER_SVC_CHECKS +``` + +### Asynchronous Event Execution + +Unlike Icinga 1.x, Icinga 2 does not block when it's waiting for a command +being executed -- whether if it's a check, a notification, an event +handler, a performance data writing update, etc. That way you'll +recognize low to zero (check) latencies with Icinga 2. + +### Checks + +#### Check Output + +Icinga 2 does not make a difference between `output` (first line) and +`long_output` (remaining lines) like in Icinga 1.x. Performance Data is +provided separately. + +There is no output length restriction as known from Icinga 1.x using an +[8KB static buffer](https://docs.icinga.com/latest/en/pluginapi.html#outputlengthrestrictions). + +The `StatusDataWriter`, `IdoMysqlConnection` and `LivestatusListener` types +split the raw output into `output` (first line) and `long_output` (remaining +lines) for compatibility reasons. + +#### Initial State + +Icinga 1.x uses the `max_service_check_spread` setting to specify a timerange +where the initial state checks must have happened. Icinga 2 will use the +`retry_interval` setting instead and `check_interval` divided by 5 if +`retry_interval` is not defined. + +### Comments + +Icinga 2 doesn't support non-persistent comments. + +### Commands + +Unlike in Icinga 1.x there are three different command types in Icinga 2: +`CheckCommand`, `NotificationCommand`, and `EventCommand`. + +For example in Icinga 1.x it is possible to accidentally use a notification +command as an event handler which might cause problems depending on which +runtime macros are used in the notification command. + +In Icinga 2 these command types are separated and will generate an error on +configuration validation if used in the wrong context. + +While Icinga 2 still supports the complete command line in command objects, it's +recommended to use [command arguments](03-monitoring-basics.md#command-arguments) +with optional and conditional command line parameters instead. + +It's also possible to define default argument values for the command itself +which can be overridden by the host or service then. + +#### Command Timeouts + +In Icinga 1.x there were two global options defining a host and service check +timeout. This was essentially bad when there only was a couple of check plugins +requiring some command timeouts to be extended. + +Icinga 2 allows you to specify the command timeout directly on the command. So, +if your VMVware check plugin takes 15 minutes, [increase the timeout](09-object-types.md#objecttype-checkcommand) +accordingly. + + +### Groups + +In Icinga 2 hosts, services, and users are added to groups using the `groups` +attribute in the object. The old way of listing all group members in the group's +`members` attribute is available through `assign where` and `ignore where` +expressions by using [group assign](03-monitoring-basics.md#group-assign-intro). + +``` +object Host "web-dev" { + import "generic-host" +} + +object HostGroup "dev-hosts" { + display_name = "Dev Hosts" + assign where match("*-dev", host.name) +} +``` + +#### Add Service to Hostgroup where Host is Member + +In order to associate a service with all hosts in a host group the [apply](03-monitoring-basics.md#using-apply) +keyword can be used: + +``` +apply Service "ping4" { + import "generic-service" + + check_command = "ping4" + + assign where "dev-hosts" in host.groups +} +``` + +### Notifications + +Notifications are a new object type in Icinga 2. Imagine the following +notification configuration problem in Icinga 1.x: + +* Service A should notify contact X via SMS +* Service B should notify contact X via Mail +* Service C should notify contact Y via Mail and SMS +* Contact X and Y should also be used for authorization + +The only way achieving a semi-clean solution is to + +* Create contact X-sms, set service_notification_command for sms, assign contact + to service A +* Create contact X-mail, set service_notification_command for mail, assign + contact to service B +* Create contact Y, set service_notification_command for sms and mail, assign + contact to service C +* Create contact X without notification commands, assign to service A and B + +Basically you are required to create duplicated contacts for either each +notification method or used for authorization only. + +Icinga 2 attempts to solve that problem in this way + +* Create user X, set SMS and Mail attributes, used for authorization +* Create user Y, set SMS and Mail attributes, used for authorization +* Create notification A-SMS, set command for sms, add user X, + assign notification A-SMS to service A +* Create notification B-Mail, set command for mail, add user X, + assign notification Mail to service B +* Create notification C-SMS, set command for sms, add user Y, + assign notification C-SMS to service C +* Create notification C-Mail, set command for mail, add user Y, + assign notification C-Mail to service C + +Previously in Icinga 1.x it looked like this: + +``` +service -> (contact, contactgroup) -> notification command +``` + +In Icinga 2 it will look like this: + +``` +Service -> Notification -> NotificationCommand + -> User, UserGroup +``` + +#### Escalations + +Escalations in Icinga 1.x require a separated object matching on existing +objects. Escalations happen between a defined start and end time which is +calculated from the notification_interval: + +``` +start = notification start + (notification_interval * first_notification) +end = notification start + (notification_interval * last_notification) +``` + +In theory first_notification and last_notification can be set to readable +numbers. In practice users are manipulating those attributes in combination +with notification_interval in order to get a start and end time. + +In Icinga 2 the notification object can be used as notification escalation +if the start and end times are defined within the 'times' attribute using +duration literals (e.g. 30m). + +The Icinga 2 escalation does not replace the current running notification. +In Icinga 1.x it's required to copy the contacts from the service notification +to the escalation to guarantee the normal notifications once an escalation +happens. +That's not necessary with Icinga 2 only requiring an additional notification +object for the escalation itself. + +#### Notification Options + +Unlike Icinga 1.x with the 'notification_options' attribute with comma-separated +state and type filters, Icinga 2 uses two configuration attributes for that. +All state and type filter use long names OR'd with a pipe together + +``` +notification_options w,u,c,r,f,s + +states = [ Warning, Unknown, Critical ] +types = [ Problem, Recovery, FlappingStart, FlappingEnd, DowntimeStart, DowntimeEnd, DowntimeRemoved ] +``` + +Icinga 2 adds more fine-grained type filters for acknowledgements, downtime, +and flapping type (start, end, ...). + +### Dependencies and Parents + +In Icinga 1.x it's possible to define host parents to determine network reachability +and keep a host's state unreachable rather than down. +Furthermore there are host and service dependencies preventing unnecessary checks and +notifications. A host must not depend on a service, and vice versa. All dependencies +are configured as separate objects and cannot be set directly on the host or service +object. + +A service can now depend on a host, and vice versa. A service has an implicit dependency +(parent) to its host. A host to host dependency acts implicitly as host parent relation. + +The former `host_name` and `dependent_host_name` have been renamed to `parent_host_name` +and `child_host_name` (same for the service attribute). When using apply rules the +child attributes may be omitted. + +For detailed examples on how to use the dependencies please check the [dependencies](03-monitoring-basics.md#dependencies) +chapter. + +Dependencies can be applied to hosts or services using the [apply rules](17-language-reference.md#apply). + +The `StatusDataWriter`, `IdoMysqlConnection` and `LivestatusListener` types +support the Icinga 1.x schema with dependencies and parent attributes for +compatibility reasons. + +### Flapping + +The Icinga 1.x flapping detection uses the last 21 states of a service. This +value is hardcoded and cannot be changed. The algorithm on determining a flapping state +is as follows: + +``` +flapping value = (number of actual state changes / number of possible state changes) +``` + +The flapping value is then compared to the low and high flapping thresholds. + +The algorithm used in Icinga 2 does not store the past states but calculates the flapping +threshold from a single value based on counters and half-life values. Icinga 2 compares +the value with a single flapping threshold configuration attribute. + +### Check Result Freshness + +Freshness of check results must be enabled explicitly in Icinga 1.x. The attribute +`freshness_threshold` defines the threshold in seconds. Once the threshold is triggered, an +active freshness check is executed defined by the `check_command` attribute. Both check +methods (active and passive) use the same freshness check method. + +In Icinga 2 active check freshness is determined by the `check_interval` attribute and no +incoming check results in that period of time (last check + check interval). Passive check +freshness is calculated from the `check_interval` attribute if set. There is no extra +`freshness_threshold` attribute in Icinga 2. If the freshness checks are invalid, a new +service check is forced. + +### Real Reload + +In Nagios / Icinga 1.x a daemon reload does the following: + +* receive reload signal SIGHUP +* stop all events (checks, notifications, etc.) +* read the configuration from disk and validate all config objects in a single threaded fashion +* validation NOT ok: stop the daemon (cannot restore old config state) +* validation ok: start with new objects, dump status.dat / ido + +Unlike Icinga 1.x the Icinga 2 daemon reload does not block any event +execution during config validation: + +* receive reload signal SIGHUP +* fork a child process, start configuration validation in parallel work queues +* parent process continues with old configuration objects and the event scheduling +(doing checks, replicating cluster events, triggering alert notifications, etc.) +* validation NOT ok: child process terminates, parent process continues with old configuration state +(this is **essential** for the [cluster config synchronisation](06-distributed-monitoring.md#distributed-monitoring-top-down-config-sync)) +* validation ok: child process signals parent process to terminate and save its current state +(all events until now) into the icinga2 state file +* parent process shuts down writing icinga2.state file +* child process waits for parent process gone, reads the icinga2 state file and synchronizes all historical and status data +* child becomes the new session leader + +The DB IDO configuration dump and status/historical event updates use a queue +not blocking event execution. Same goes for any other enabled feature. +The configuration validation itself runs in parallel allowing fast verification checks. + +That way your monitoring does not stop during a configuration reload. + + +### State Retention + +Icinga 1.x uses the `retention.dat` file to save its state in order to be able +to reload it after a restart. In Icinga 2 this file is called `icinga2.state`. + +The format is **not** compatible with Icinga 1.x. + +### Logging + +Icinga 1.x supports syslog facilities and writes its own `icinga.log` log file +and archives. These logs are used in Icinga 1.x to generate +historical reports. + +Icinga 2 compat library provides the CompatLogger object which writes the icinga.log and archive +in Icinga 1.x format in order to stay compatible with addons. + +The native Icinga 2 logging facilities are split into three configuration objects: SyslogLogger, +FileLogger, StreamLogger. Each of them has their own severity and target configuration. + +The Icinga 2 daemon log does not log any alerts but is considered an application log only. + +### Broker Modules and Features + +Icinga 1.x broker modules are incompatible with Icinga 2. + +In order to provide compatibility with Icinga 1.x the functionality of several +popular broker modules was implemented for Icinga 2: + +* IDOUtils +* Livestatus +* Cluster (allows for high availability and load balancing) + + +### Distributed Monitoring + +Icinga 1.x uses the native "obsess over host/service" method which requires the NSCA addon +passing the slave's check results passively onto the master's external command pipe. +While this method may be used for check load distribution, it does not provide any configuration +distribution out-of-the-box. Furthermore comments, downtimes, and other stateful runtime data is +not synced between the master and slave nodes. There are addons available solving the check +and configuration distribution problems Icinga 1.x distributed monitoring currently suffers from. + +Icinga 2 implements a new built-in +[distributed monitoring architecture](06-distributed-monitoring.md#distributed-monitoring-scenarios), +including config and check distribution, IPv4/IPv6 support, TLS certificates and zone support for DMZ. +High Availability and load balancing are also part of the Icinga 2 Cluster feature, next to local replay +logs on connection loss ensuring that the event history is kept in sync. diff --git a/doc/24-appendix.md b/doc/24-appendix.md new file mode 100644 index 0000000..e0f0b2f --- /dev/null +++ b/doc/24-appendix.md @@ -0,0 +1,695 @@ +# Appendix + +## External Commands List + +Additional details can be found in the [Icinga 1.x Documentation](https://docs.icinga.com/latest/en/extcommands2.html) + + Command name | Parameters | Description + ------------------------------------------|-----------------------------------|-------------------------- + PROCESS_HOST_CHECK_RESULT | ;<host_name>;<status_code>;<plugin_output> (3) | - + PROCESS_SERVICE_CHECK_RESULT | ;<host_name>;<service_name>;<return_code>;<plugin_output> (4) | - + SCHEDULE_HOST_CHECK | ;<host_name>;<check_time> (2) | - + SCHEDULE_FORCED_HOST_CHECK | ;<host_name>;<check_time> (2) | - + SCHEDULE_SVC_CHECK | ;<host_name>;<service_name>;<check_time> (3) | - + SCHEDULE_FORCED_SVC_CHECK | ;<host_name>;<service_name>;<check_time> (3) | - + ENABLE_HOST_CHECK | ;<host_name> (1) | - + DISABLE_HOST_CHECK | ;<host_name> (1) | - + ENABLE_SVC_CHECK | ;<host_name>;<service_name> (2) | - + DISABLE_SVC_CHECK | ;<host_name>;<service_name> (2) | - + SHUTDOWN_PROCESS | - | - + RESTART_PROCESS | - | - + SCHEDULE_FORCED_HOST_SVC_CHECKS | ;<host_name>;<check_time> (2) | - + SCHEDULE_HOST_SVC_CHECKS | ;<host_name>;<check_time> (2) | - + ENABLE_HOST_SVC_CHECKS | ;<host_name> (1) | - + DISABLE_HOST_SVC_CHECKS | ;<host_name> (1) | - + ACKNOWLEDGE_SVC_PROBLEM | ;<host_name>;<service_name>;<sticky>;<notify>;<persistent>;<author>;<comment> (7) | Note: Icinga 2 treats all comments as persistent. + ACKNOWLEDGE_SVC_PROBLEM_EXPIRE | ;<host_name>;<service_name>;<sticky>;<notify>;<persistent>;<timestamp>;<author>;<comment> (8) | Note: Icinga 2 treats all comments as persistent. + REMOVE_SVC_ACKNOWLEDGEMENT | ;<host_name>;<service_name> (2) | - + ACKNOWLEDGE_HOST_PROBLEM | ;<host_name>;<sticky>;<notify>;<persistent>;<author>;<comment> (6) | Note: Icinga 2 treats all comments as persistent. + ACKNOWLEDGE_HOST_PROBLEM_EXPIRE | ;<host_name>;<sticky>;<notify>;<persistent>;<timestamp>;<author>;<comment> (7) | Note: Icinga 2 treats all comments as persistent. + REMOVE_HOST_ACKNOWLEDGEMENT | ;<host_name> (1) | - + DISABLE_HOST_FLAP_DETECTION | ;<host_name> (1) | - + ENABLE_HOST_FLAP_DETECTION | ;<host_name> (1) | - + DISABLE_SVC_FLAP_DETECTION | ;<host_name>;<service_name> (2) | - + ENABLE_SVC_FLAP_DETECTION | ;<host_name>;<service_name> (2) | - + ENABLE_HOSTGROUP_SVC_CHECKS | ;<hostgroup_name> (1) | - + DISABLE_HOSTGROUP_SVC_CHECKS | ;<hostgroup_name> (1) | - + ENABLE_SERVICEGROUP_SVC_CHECKS | ;<servicegroup_name> (1) | - + DISABLE_SERVICEGROUP_SVC_CHECKS | ;<servicegroup_name> (1) | - + ENABLE_PASSIVE_HOST_CHECKS | ;<host_name> (1) | - + DISABLE_PASSIVE_HOST_CHECKS | ;<host_name> (1) | - + ENABLE_PASSIVE_SVC_CHECKS | ;<host_name>;<service_name> (2) | - + DISABLE_PASSIVE_SVC_CHECKS | ;<host_name>;<service_name> (2) | - + ENABLE_SERVICEGROUP_PASSIVE_SVC_CHECKS | ;<servicegroup_name> (1) | - + DISABLE_SERVICEGROUP_PASSIVE_SVC_CHECKS | ;<servicegroup_name> (1) | - + ENABLE_HOSTGROUP_PASSIVE_SVC_CHECKS | ;<hostgroup_name> (1) | - + DISABLE_HOSTGROUP_PASSIVE_SVC_CHECKS | ;<hostgroup_name> (1) | - + PROCESS_FILE | ;<file_name>;<delete> (2) | - + SCHEDULE_SVC_DOWNTIME | ;<host_name>;<service_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (9) | - + DEL_SVC_DOWNTIME | ;<downtime_id> (1) | - + SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME | ;<host_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME | ;<host_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_HOST_DOWNTIME | ;<host_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + DEL_HOST_DOWNTIME | ;<downtime_id> (1) | - + DEL_DOWNTIME_BY_HOST_NAME | ;<host_name>[;<service_name;>[;<start_time;>[;<comment_text;>]]] (1) | - + SCHEDULE_HOST_SVC_DOWNTIME | ;<host_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_HOSTGROUP_HOST_DOWNTIME | ;<hostgroup_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_HOSTGROUP_SVC_DOWNTIME | ;<hostgroup_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_SERVICEGROUP_HOST_DOWNTIME | ;<servicegroup_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + SCHEDULE_SERVICEGROUP_SVC_DOWNTIME | ;<servicegroup_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> (8) | - + ADD_HOST_COMMENT | ;<host_name>;<persistent>;<author>;<comment> (4) | Note: Icinga 2 treats all comments as persistent. + DEL_HOST_COMMENT | ;<comment_id> (1) | - + ADD_SVC_COMMENT | ;<host_name>;<service_name>;<persistent>;<author>;<comment> (5) | Note: Icinga 2 treats all comments as persistent. + DEL_SVC_COMMENT | ;<comment_id> (1) | - + DEL_ALL_HOST_COMMENTS | ;<host_name> (1) | - + DEL_ALL_SVC_COMMENTS | ;<host_name>;<service_name> (2) | - + SEND_CUSTOM_HOST_NOTIFICATION | ;<host_name>;<options>;<author>;<comment> (4) | - + SEND_CUSTOM_SVC_NOTIFICATION | ;<host_name>;<service_name>;<options>;<author>;<comment> (5) | - + DELAY_HOST_NOTIFICATION | ;<host_name>;<notification_time> (2) | - + DELAY_SVC_NOTIFICATION | ;<host_name>;<service_name>;<notification_time> (3) | - + ENABLE_HOST_NOTIFICATIONS | ;<host_name> (1) | - + DISABLE_HOST_NOTIFICATIONS | ;<host_name> (1) | - + ENABLE_SVC_NOTIFICATIONS | ;<host_name>;<service_name> (2) | - + DISABLE_SVC_NOTIFICATIONS | ;<host_name>;<service_name> (2) | - + ENABLE_HOST_SVC_NOTIFICATIONS | ;<host_name> (1) | - + DISABLE_HOST_SVC_NOTIFICATIONS | ;<host_name> (1) | - + DISABLE_HOSTGROUP_HOST_CHECKS | ;<hostgroup_name> (1) | - + DISABLE_HOSTGROUP_PASSIVE_HOST_CHECKS | ;<hostgroup_name> (1) | - + DISABLE_SERVICEGROUP_HOST_CHECKS | ;<servicegroup_name> (1) | - + DISABLE_SERVICEGROUP_PASSIVE_HOST_CHECKS | ;<servicegroup_name> (1) | - + ENABLE_HOSTGROUP_HOST_CHECKS | ;<hostgroup_name> (1) | - + ENABLE_HOSTGROUP_PASSIVE_HOST_CHECKS | ;<hostgroup_name> (1) | - + ENABLE_SERVICEGROUP_HOST_CHECKS | ;<servicegroup_name> (1) | - + ENABLE_SERVICEGROUP_PASSIVE_HOST_CHECKS | ;<servicegroup_name> (1) | - + ENABLE_NOTIFICATIONS | - | - + DISABLE_NOTIFICATIONS | - | - + ENABLE_FLAP_DETECTION | - | - + DISABLE_FLAP_DETECTION | - | - + ENABLE_EVENT_HANDLERS | - | - + DISABLE_EVENT_HANDLERS | - | - + ENABLE_PERFORMANCE_DATA | - | - + DISABLE_PERFORMANCE_DATA | - | - + START_EXECUTING_HOST_CHECKS | - | - + STOP_EXECUTING_HOST_CHECKS | - | - + START_EXECUTING_SVC_CHECKS | - | - + STOP_EXECUTING_SVC_CHECKS | - | - + CHANGE_NORMAL_SVC_CHECK_INTERVAL | ;<host_name>;<service_name>;<check_interval> (3) | - + CHANGE_NORMAL_HOST_CHECK_INTERVAL | ;<host_name>;<check_interval> (2) | - + CHANGE_RETRY_SVC_CHECK_INTERVAL | ;<host_name>;<service_name>;<check_interval> (3) | - + CHANGE_RETRY_HOST_CHECK_INTERVAL | ;<host_name>;<check_interval> (2) | - + ENABLE_HOST_EVENT_HANDLER | ;<host_name> (1) | - + DISABLE_HOST_EVENT_HANDLER | ;<host_name> (1) | - + ENABLE_SVC_EVENT_HANDLER | ;<host_name>;<service_name> (2) | - + DISABLE_SVC_EVENT_HANDLER | ;<host_name>;<service_name> (2) | - + CHANGE_HOST_EVENT_HANDLER | ;<host_name>;<event_command_name> (2) | - + CHANGE_SVC_EVENT_HANDLER | ;<host_name>;<service_name>;<event_command_name> (3) | - + CHANGE_HOST_CHECK_COMMAND | ;<host_name>;<check_command_name> (2) | - + CHANGE_SVC_CHECK_COMMAND | ;<host_name>;<service_name>;<check_command_name> (3) | - + CHANGE_MAX_HOST_CHECK_ATTEMPTS | ;<host_name>;<check_attempts> (2) | - + CHANGE_MAX_SVC_CHECK_ATTEMPTS | ;<host_name>;<service_name>;<check_attempts> (3) | - + CHANGE_HOST_CHECK_TIMEPERIOD | ;<host_name>;<timeperiod_name> (2) | - + CHANGE_SVC_CHECK_TIMEPERIOD | ;<host_name>;<service_name>;<timeperiod_name> | - + CHANGE_CUSTOM_HOST_VAR | ;<host_name>;<var_name>;<var_value> (3) | - + CHANGE_CUSTOM_SVC_VAR | ;<host_name>;<service_name>;<var_name>;<var_value> (4) | - + CHANGE_CUSTOM_USER_VAR | ;<user_name>;<var_name>;<var_value> (3) | - + CHANGE_CUSTOM_CHECKCOMMAND_VAR | ;<check_command_name>;<var_name>;<var_value> (3) | - + CHANGE_CUSTOM_EVENTCOMMAND_VAR | ;<event_command_name>;<var_name>;<var_value> (3) | - + CHANGE_CUSTOM_NOTIFICATIONCOMMAND_VAR | ;<notification_command_name>;<var_name>;<var_value> (3) | - + ENABLE_HOSTGROUP_HOST_NOTIFICATIONS | ;<hostgroup_name> (1) | - + ENABLE_HOSTGROUP_SVC_NOTIFICATIONS | ;<hostgroup_name> (1) | - + DISABLE_HOSTGROUP_HOST_NOTIFICATIONS | ;<hostgroup_name> (1) | - + DISABLE_HOSTGROUP_SVC_NOTIFICATIONS | ;<hostgroup_name> (1) | - + ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS | ;<servicegroup_name> (1) | - + DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS | ;<servicegroup_name> (1) | - + ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS | ;<servicegroup_name> (1) | - + DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS | ;<servicegroup_name> (1) | - + + +## Schemas + +By convention `CheckCommand`, `EventCommand`, and `NotificationCommand` objects +are exported using a prefix. This is mandatory for unique objects in the +command tables. + +Object | Prefix +------------------------|------------------------ +CheckCommand | check\_ +EventCommand | event\_ +NotificationCommand | notification\_ + +### DB IDO Schema + +There is a detailed documentation for the Icinga IDOUtils 1.x +database schema available on [https://docs.icinga.com/latest/en/db_model.html] + +#### DB IDO Schema Extensions + +Icinga 2 specific extensions are shown below: + +New table: `endpointstatus` + + Table | Column | Type | Default | Description + --------------------|--------------------|----------|---------|------------- + endpoints | endpoint_object_id | bigint | NULL | FK: objects table + endpoints | identity | TEXT | NULL | endpoint name + endpoints | node | TEXT | NULL | local node name + endpoints | zone_object_id | bigint | NULL | zone object where this endpoint is a member of + +New table: `endpointstatus` + + Table | Column | Type | Default | Description + --------------------|--------------------|----------|---------|------------- + endpointstatus | endpoint_object_id | bigint | NULL | FK: objects table + endpointstatus | identity | TEXT | NULL | endpoint name + endpointstatus | node | TEXT | NULL | local node name + endpointstatus | is_connected | smallint | 0 | update on endpoint connect/disconnect + endpointstatus | zone_object_id | bigint | NULL | zone object where this endpoint is a member of + +New tables: `zones` and `zonestatus`: + + Table | Column | Type | Default | Description + --------------------|--------------------|----------|---------|------------- + zones | zone_object_id | bigint | NULL | FK: objects table + zones | parent_zone_object_id | bigint | NULL | FK: zones table + zones | is_global | smallint | 0 | zone is global + + +New columns: + + Table | Column | Type | Default | Description + --------------------|-------------------------|----------|---------|------------- + all status/history | endpoint_object_id | bigint | NULL | FK: objects table + servicestatus | check_source | TEXT | NULL | node name where check was executed + hoststatus | check_source | TEXT | NULL | node name where check was executed + statehistory | check_source | TEXT | NULL | node name where check was executed + servicestatus | is_reachable | integer | NULL | object reachability + hoststatus | is_reachable | integer | NULL | object reachability + logentries | object_id | bigint | NULL | FK: objects table (service associated with column) + {host,service}group | notes | TEXT | NULL | - + {host,service}group | notes_url | TEXT | NULL | - + {host,service}group | action_url | TEXT | NULL | - + customvariable* | is_json | integer | 0 | Defines whether `varvalue` is a json encoded string from custom variables, or not + servicestatus | original_attributes | TEXT | NULL | JSON encoded dictionary of original attributes if modified at runtime. + hoststatus | original_attributes | TEXT | NULL | JSON encoded dictionary of original attributes if modified at runtime. + +Additional command custom variables populated from 'vars' dictionary. +Additional global custom variables populated from 'Vars' constant (object_id is NULL). + +### Livestatus Schema + +#### Livestatus Schema Extensions + +Icinga 2 specific extensions are shown below: + +New table: `endpoints`: + + Table | Column + ----------|-------------- + endpoints | name + endpoints | identity + endpoints | node + endpoints | is_connected + endpoints | zone + +New table: `zones`: + + Table | Column + ----------|-------------- + zone | name + zone | endpoints + zone | parent + zone | global + +New columns: + + Table | Column + ----------|-------------- + hosts | is_reachable + services | is_reachable + hosts | cv_is_json + services | cv_is_json + contacts | cv_is_json + hosts | check_source + services | check_source + downtimes | triggers + downtimes | trigger_time + commands | custom_variable_names + commands | custom_variable_values + commands | custom_variables + commands | modified_attributes + commands | modified_attributes_list + status | custom_variable_names + status | custom_variable_values + status | custom_variables + hosts | original_attributes + services | original_attributes + +Command custom variables reflect the local 'vars' dictionary. +Status custom variables reflect the global 'Vars' constant. + +#### Livestatus Hosts Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + display_name | string | . + alias | string | same as display_name. + address | string | . + address6 | string | NEW in Icinga. + check_command | string | . + check_command_expanded | string | . + event_handler | string | . + notification_period | string | host with notifications: period. + check_period | string | . + notes | string | . + notes_expanded | string | . + notes_url | string | . + notes_url_expanded | string | . + action_url | string | . + action_url_expanded | string | . + plugin_output | string | . + perf_data | string | . + icon_image | string | . + icon_image_expanded | string | . + icon_image_alt | stirng | . + statusmap_image | string | . + long_plugin_output | string | . + max_check_attempts | int | . + flap_detection_enabled | int | . + check_freshness | int | . + process_performance_data | int | . + accept_passive_checks | int | . + event_handler_enabled | int | . + acknowledgement_type | int | Only 0 or 1. + check_type | int | . + last_state | int | . + last_hard_state | int | . + current_attempt | int | . + last_notification | int | host with notifications: last notification. + next_notification | int | host with notifications: next notification. + next_check | int | . + last_hard_state_change | int | . + has_been_checked | int | . + current_notification_number | int | host with notifications: number. + total_services | int | . + checks_enabled | int | . + notifications_enabled | int | . + acknowledged | int | . + state | int | . + state_type | int | . + no_more_notifications | int | notification_interval == 0 && volatile == false. + last_check | int | . + last_state_change | int | . + last_time_up | int | . + last_time_down | int | . + last_time_unreachable | int | . + is_flapping | int | . + scheduled_downtime_depth | int | . + active_checks_enabled | int | . + modified_attributes | array | . + modified_attributes_list | array | . + check_interval | double | . + retry_interval | double | . + notification_interval | double | host with notifications: smallest interval. + low_flap_threshold | double | flapping_threshold + high_flap_threshold | double | flapping_threshold + latency | double | . + execution_time | double | . + percent_state_change | double | flapping. + in_notification_period | int | host with notifications: matching period. + in_check_period | int | . + contacts | array | host with notifications, users and user groups. + downtimes | array | id. + downtimes_with_info | array | id+author+comment. + comments | array | id. + comments_with_info | array | id+author+comment. + comments_with_extra_info | array | id+author+comment+entry_type+entry_time. + custom_variable_names | array | . + custom_variable_values | array | . + custom_variables | array | Array of custom variable array pair. + parents | array | Direct host parents. + childs | array | Direct host children (Note: `childs` is inherited from the origin MK_Livestatus protocol). + num_services | int | . + worst_service_state | int | All services and their worst state. + num_services_ok | int | All services with Ok state. + num_services_warn | int | All services with Warning state. + num_services_crit | int | All services with Critical state. + num_services_unknown | int | All services with Unknown state. + worst_service_hard_state | int | All services and their worst hard state. + num_services_hard_ok | int | All services in a hard state with Ok state. + num_services_hard_warn | int | All services in a hard state with Warning state. + num_services_hard_crit | int | All services in a hard state with Critical state. + num_services_hard_unknown | int | All services in a hard state with Unknown state. + hard_state | int | Returns OK if state is OK. Returns current state if now a hard state type. Returns last hard state otherwise. + staleness | int | Indicates time since last check normalized onto the check_interval. + groups | array | All hostgroups this host is a member of. + contact_groups | array | All usergroups associated with this host through notifications. + services | array | All services associated with this host. + services_with_state | array | All services associated with this host with state and hasbeenchecked. + services_with_info | array | All services associated with this host with state, hasbeenchecked and output. + +Not supported: `initial_state`, `pending_flex_downtime`, `check_flapping_recovery_notification`, +`is_executing`, `check_options`, `obsess_over_host`, `first_notification_delay`, `x_3d`, `y_3d`, `z_3d`, +`x_2d`, `y_2d`, `filename`, `pnpgraph_present`. + +#### Livestatus Hostgroups Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + alias | string | `display_name` attribute. + notes | string | . + notes_url | string | . + action_url | string | . + members | array | . + members_with_state | array | Host name and state. + worst_host_state | int | Of all group members. + num_hosts | int | In this group. + num_hosts_pending | int | . + num_hosts_up | int | . + num_hosts_down | int | . + num_hosts_unreach | int | . + num_services | int | Number of services associated with hosts in this hostgroup. + worst_services_state | int | . + num_services_pending | int | . + num_services_ok | int | . + num_services_warn | int | . + num_services_crit | int | . + num_services_unknown | int | . + worst_service_hard_state | int | . + num_services_hard_ok | int | . + num_services_hard_warn | int | . + num_services_hard_crit | int | . + num_services_hard_unknown | int | . + +#### Livestatus Services Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + description | string | . + display_name | string | . + alias | string | same as display_name. + check_command | string | . + check_command_expanded | string | . + event_handler | string | . + notification_period | string | host with notifications: period. + check_period | string | . + notes | string | . + notes_expanded | string | . + notes_url | string | . + notes_url_expanded | string | . + action_url | string | . + action_url_expanded | string | . + plugin_output | string | . + perf_data | string | . + icon_image | string | . + icon_image_expanded | string | . + icon_image_alt | stirng | . + statusmap_image | string | . + long_plugin_output | string | . + max_check_attempts | int | . + flap_detection_enabled | int | . + check_freshness | int | . + process_performance_data | int | . + accept_passive_checks | int | . + event_handler_enabled | int | . + acknowledgement_type | int | Only 0 or 1. + check_type | int | . + last_state | int | . + last_hard_state | int | . + current_attempt | int | . + last_notification | int | service with notifications: last notification. + next_notification | int | service with notifications: next notification. + next_check | int | . + last_hard_state_change | int | . + has_been_checked | int | . + current_notification_number | int | service with notifications: number. + checks_enabled | int | . + notifications_enabled | int | . + acknowledged | int | . + state | int | . + state_type | int | . + no_more_notifications | int | notification_interval == 0 && volatile == false. + last_check | int | . + last_state_change | int | . + last_time_ok | int | . + last_time_warning | int | . + last_time_critical | int | . + last_time_unknown | int | . + is_flapping | int | . + scheduled_downtime_depth | int | . + active_checks_enabled | int | . + modified_attributes | array | . + modified_attributes_list | array | . + check_interval | double | . + retry_interval | double | . + notification_interval | double | service with notifications: smallest interval. + low_flap_threshold | double | flapping_threshold + high_flap_threshold | double | flapping_threshold + latency | double | . + execution_time | double | . + percent_state_change | double | flapping. + in_notification_period | int | service with notifications: matching period. + in_check_period | int | . + contacts | array | service with notifications, users and user groups. + downtimes | array | id. + downtimes_with_info | array | id+author+comment. + comments | array | id. + comments_with_info | array | id+author+comment. + comments_with_extra_info | array | id+author+comment+entry_type+entry_time. + custom_variable_names | array | . + custom_variable_values | array | . + custom_variables | array | Array of custom variable array pair. + hard_state | int | Returns OK if state is OK. Returns current state if now a hard state type. Returns last hard state otherwise. + staleness | int | Indicates time since last check normalized onto the check_interval. + groups | array | All hostgroups this host is a member of. + contact_groups | array | All usergroups associated with this host through notifications. + host_ | join | Prefix for attributes from implicit join with hosts table. + +Not supported: `initial_state`, `is_executing`, `check_options`, `obsess_over_service`, `first_notification_delay`, +`pnpgraph_present`. + +#### Livestatus Servicegroups Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + alias | string | `display_name` attribute. + notes | string | . + notes_url | string | . + action_url | string | . + members | array | CSV format uses `host|service` syntax. + members_with_state | array | Host, service, hoststate, servicestate. + worst_service_state | int | . + num_services | int | . + num_services_pending | int | . + num_services_ok | int | . + num_services_warn | int | . + num_services_crit | int | . + num_services_unknown | int | . + num_services_hard_ok | int | . + num_services_hard_warn | int | . + num_services_hard_crit | int | . + num_services_hard_unknown | int | . + +#### Livestatus Contacts Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + alias | string | `display_name` attribute. + email | string | . + pager | string | . + host_notification_period | string | . + service_notification_period | string | . + host_notifications_enabled | int | . + service_notifications_enabled | int | . + in_host_notification_period | int | . + in_service_notification_period | int | . + custom_variable_names | array | . + custom_variable_values | array | . + custom_variables | array | Array of customvariable array pairs. + modified_attributes | array | . + modified_attributes_list | array | . + + +Not supported: `can_submit_commands`. + +#### Livestatus Contactgroups Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + alias | string | `display_name` attribute. + members | array | . + + +#### Livestatus Commands Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | 3 types of commands in Icinga 2. + line | string | . + + +#### Livestatus Status Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + connections | int | Since application start. + connections_rate | double | . + service_checks | int | Since application start. + service_checks_rate | double | . + host_checks | int | Since application start. + host_checks_rate | double | . + external_commands | int | Since application start. + external_commands_rate | double | . + nagios_pid | string | Application PID. + enable_notifications | int | . + execute_service_checks | int | . + accept_passive_service_checks | int | . + execute_host_checks | int | . + accept_passive_host_checks | int | . + enable_event_handlers | int | . + check_service_freshness | int | . + check_host_freshness | int | . + enable_flap_detection | int | . + process_performance_data | int | . + check_external_commands | int | Always enabled. + program_start | int | In seconds. + last_command_check | int | Always. + interval_length | int | Compatibility mode: 60. + num_hosts | int | . + num_services | int | . + program_version | string | 2.0. + livestatus_active_connections | string | . + +Not supported: `neb_callbacks`, `neb_callbacks_rate`, `requests`, `requests_rate`, `forks`, `forks_rate`, +`log_messages`, `log_messages_rate`, `livechecks`, `livechecks_rate`, `livecheck_overflows`, +`livecheck_overflows_rate`, `obsess_over_services`, `obsess_over_hosts`, `last_log_rotation`, +`external_command_buffer_slots`, `external_command_buffer_usage`, `external_command_buffer_max`, +`cached_log_messages`, `livestatus_queued_connections`, `livestatus_threads`. + + +#### Livestatus Comments Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + author | string | . + comment | string | . + id | int | legacy_id. + entry_time | string | Seconds. + type | int | 1=host, 2=service. + is_service | int | . + persistent | int | Always. + source | string | Always external (1). + entry_type | int | . + expires | int | . + expire_time | string | Seconds. + service_ | join | Prefix for attributes from implicit join with services table. + host_ | join | Prefix for attributes from implicit join with hosts table. + + +#### Livestatus Downtimes Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + author | string | . + comment | string | . + id | int | legacy_id. + entry_time | string | Seconds. + type | int | 1=active, 0=pending. + is_service | int | . + start_time | string | Seconds. + end_time | string | Seconds. + fixed | int | 0=flexible, 1=fixed. + duration | int | . + triggered_by | int | legacy_id. + triggers | int | NEW in Icinga 2. + trigger_time | string | NEW in Icinga 2. + service_ | join | Prefix for attributes from implicit join with services table. + host_ | join | Prefix for attributes from implicit join with hosts table. + + +#### Livestatus Timeperiods Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + name | string | . + alias | string | `display_name` attribute. + in | int | Current time is in timeperiod or not. + +#### Livestatus Log Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + time | int | Time of log event (unix timestamp). + lineno | int | Line number in `CompatLogger` log file. + class | int | Log message class: 0=info, 1=state, 2=program, 3=notification, 4=passive, 5=command. + message | string | Complete message line. + type | string | Text before the colon `:`. + options | string | Text after the colon `:`. + comment | string | Comment if available. + plugin_output | string | Check output if available. + state | int | Host or service state. + state_type | int | State type if available. + attempt | int | Current check attempt. + service_description | string | . + host_name | string | . + contact_name | string | . + command_name | string | . + current_service_ | join | Prefix for attributes from implicit join with services table. + current_host_ | join | Prefix for attributes from implicit join with hosts table. + current_contact_ | join | Prefix for attributes from implicit join with contacts table. + current_command_ | join | Prefix for attributes from implicit join with commands table. + +#### Livestatus Statehist Table Attributes + + Key | Type | Note + ----------------------|-----------|------------------------- + time | int | Time of log event (unix timestamp). + lineno | int | Line number in `CompatLogger` log file. + from | int | Start timestamp (unix timestamp). + until | int | End timestamp (unix timestamp). + duration | int | until-from. + duration_part | double | duration / query_part. + state | int | State: 0=ok, 1=warn, 2=crit, 3=unknown, -1=notmonitored. + host_down | int | Host associated with the service is down or not. + in_downtime | int | Host/service is in downtime. + in_host_downtime | int | Host associated with the service is in a downtime or not. + is_flapping | int | Host/service is flapping. + in_notification_period | int | Host/service notification periods match or not. + notification_period | string | Host/service notification period. + host_name | string | . + service_description | string | . + log_output | string | Log file output for this state. + duration_ok | int | until-from for OK state. + duration_part_ok | double | . + duration_warning | int | until-from for Warning state. + duration_part_warning | double | . + duration_critical | int | until-from for Critical state. + duration_part_critical | double | . + duration_unknown | int | until-from for Unknown state. + duration_part_unknown | double | . + duration_unmonitored | int | until-from for Not-Monitored state. + duration_part_unmonitored | double | . + current_service_ | join | Prefix for attributes from implicit join with services table. + current_host_ | join | Prefix for attributes from implicit join with hosts table. + +Not supported: `debug_info`. + +#### Livestatus Hostsbygroup Table Attributes + +All [hosts](24-appendix.md#schema-livestatus-hosts-table-attributes) table attributes grouped with +the [hostgroups](24-appendix.md#schema-livestatus-hostgroups-table-attributes) table prefixed with `hostgroup_`. + +#### Livestatus Servicesbygroup Table Attributes + +All [services](24-appendix.md#schema-livestatus-services-table-attributes) table attributes grouped with +the [servicegroups](24-appendix.md#schema-livestatus-servicegroups-table-attributes) table prefixed with `servicegroup_`. + +#### Livestatus Servicesbyhostgroup Table Attributes + +All [services](24-appendix.md#schema-livestatus-services-table-attributes) table attributes grouped with +the [hostgroups](24-appendix.md#schema-livestatus-hostgroups-table-attributes) table prefixed with `hostgroup_`. + diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt new file mode 100644 index 0000000..3be5b58 --- /dev/null +++ b/doc/CMakeLists.txt @@ -0,0 +1,20 @@ +# Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ + +file(GLOB DOCSRCS "*.md") + +if(UNIX OR CYGWIN) + install( + FILES icinga2.8 + DESTINATION ${CMAKE_INSTALL_MANDIR}/man8 + ) +endif() + +install( + FILES ${DOCSRCS} + DESTINATION ${CMAKE_INSTALL_DOCDIR}/markdown +) + +install( + DIRECTORY images + DESTINATION ${CMAKE_INSTALL_DOCDIR}/markdown +) diff --git a/doc/icinga2.8 b/doc/icinga2.8 new file mode 100644 index 0000000..dfc062f --- /dev/null +++ b/doc/icinga2.8 @@ -0,0 +1,99 @@ +.TH ICINGA2 "8" "October 2015" "icinga2 - The Icinga 2 network monitoring daemon" +.SH NAME +icinga2 \- The Icinga 2 network monitoring daemon + +.SH SYNOPSIS +.B icinga2 +.I command +[ +.I command options +][ +.I global options +] + +.I command +:= [ +.B api | ca | console | daemon | feature | node | object | pki | variable +] +.B --help + +.SH DESCRIPTION + +Icinga 2 is an open source monitoring system which checks the availability of your network resources, notifies users of outages, and generates performance data for reporting. + +Scalable and extensible, Icinga 2 can monitor large, complex environments across multiple locations. + +.SH OPTIONS +Details for specific command options can be viewed by invoking the command name with +.B --help +parameter. + +.SS Global options +.TP +.B -h,--help +Show this help message. +.TP +.B -V,--version +Show version information. +.TP +.B --color +Use VT100 color codes even when stdout is not a terminal. +.TP +.BI "-D, --define" " arg" +Define a constant. +.TP +.BI "-l, --library" " arg" +Load a library. +.TP +.BI "-I, --include" " arg" +Add include search directory. +.TP +.BI "-x, --log-level" " [ debug | notice | information | warning | critical ]" +Specify the log level for the console log, default is +.B information. +.TP +.BI "-X, --script-debugger" +Enables the script debugger. When an exception occurs or the 'debugger' keyword +is encountered in a user script Icinga 2 launches the script debugger that +allows the user to debug the script. + +.SS daemon options +The CLI command daemon provides the functionality to start/stop Icinga 2. +Furthermore it provides the configuration validation. + +.TP +.BI "-c, --config" " arg" +Using this option you can specify one or more configuration files. +Config files are processed in the order they are specified on the command-line. + +When no configuration file is specified and the +.B --no-config +is not used, Icinga 2 automatically falls back to using the configuration file +.B ConfigDir + "/icinga2.conf" +(where ConfigDir is usually +.BI "/etc/icinga2" ")." + +.TP +.B "-z, --noconfig" +Start without a configuration file. +.TP +.B "-C, --validate" +This option can be used to check if your configuration files contain errors. +If any errors are found the exit status is 1, otherwise 0 is returned. +.TP +.BI "-e, --errorlog" " arg" +Log fatal errors to the specified log file (only works in combination with +.BR "--daemonize" ")." +.TP +.B "-d, --daemonize" +Detach from the controlling terminal. +.SH "REPORTING BUGS" +Report bugs at +.br +Icinga home page: +.SH COPYRIGHT +Copyright \(co 2012 Icinga GmbH +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/images/addons/dashing_icinga2.png b/doc/images/addons/dashing_icinga2.png new file mode 100644 index 0000000..d3e0e42 Binary files /dev/null and b/doc/images/addons/dashing_icinga2.png differ diff --git a/doc/images/addons/icinga_certificate_monitoring.png b/doc/images/addons/icinga_certificate_monitoring.png new file mode 100644 index 0000000..d1be34b Binary files /dev/null and b/doc/images/addons/icinga_certificate_monitoring.png differ diff --git a/doc/images/addons/icinga_reporting.png b/doc/images/addons/icinga_reporting.png new file mode 100644 index 0000000..4b561a3 Binary files /dev/null and b/doc/images/addons/icinga_reporting.png differ diff --git a/doc/images/addons/icingaweb2_businessprocess.png b/doc/images/addons/icingaweb2_businessprocess.png new file mode 100644 index 0000000..7824ded Binary files /dev/null and b/doc/images/addons/icingaweb2_businessprocess.png differ diff --git a/doc/images/addons/icingaweb2_grafana.png b/doc/images/addons/icingaweb2_grafana.png new file mode 100644 index 0000000..0861543 Binary files /dev/null and b/doc/images/addons/icingaweb2_grafana.png differ diff --git a/doc/images/addons/icingaweb2_graphite.png b/doc/images/addons/icingaweb2_graphite.png new file mode 100644 index 0000000..4147ba5 Binary files /dev/null and b/doc/images/addons/icingaweb2_graphite.png differ diff --git a/doc/images/addons/icingaweb2_maps.png b/doc/images/addons/icingaweb2_maps.png new file mode 100644 index 0000000..5564eda Binary files /dev/null and b/doc/images/addons/icingaweb2_maps.png differ diff --git a/doc/images/addons/nano-syntax.png b/doc/images/addons/nano-syntax.png new file mode 100644 index 0000000..d89b2af Binary files /dev/null and b/doc/images/addons/nano-syntax.png differ diff --git a/doc/images/addons/vim-syntax.png b/doc/images/addons/vim-syntax.png new file mode 100644 index 0000000..ebe116f Binary files /dev/null and b/doc/images/addons/vim-syntax.png differ diff --git a/doc/images/advanced-topics/flapping-state-graph.png b/doc/images/advanced-topics/flapping-state-graph.png new file mode 100644 index 0000000..2f78057 Binary files /dev/null and b/doc/images/advanced-topics/flapping-state-graph.png differ diff --git a/doc/images/advanced-topics/icinga2_external_checks_freshness_icingaweb2.png b/doc/images/advanced-topics/icinga2_external_checks_freshness_icingaweb2.png new file mode 100644 index 0000000..b46db68 Binary files /dev/null and b/doc/images/advanced-topics/icinga2_external_checks_freshness_icingaweb2.png differ diff --git a/doc/images/advanced-topics/icingaweb2_downtime_handled.png b/doc/images/advanced-topics/icingaweb2_downtime_handled.png new file mode 100644 index 0000000..3fe2690 Binary files /dev/null and b/doc/images/advanced-topics/icingaweb2_downtime_handled.png differ diff --git a/doc/images/api/icinga2_api_powershell_ise.png b/doc/images/api/icinga2_api_powershell_ise.png new file mode 100644 index 0000000..41acbdd Binary files /dev/null and b/doc/images/api/icinga2_api_powershell_ise.png differ diff --git a/doc/images/configuration/icinga_web_local_server.png b/doc/images/configuration/icinga_web_local_server.png new file mode 100644 index 0000000..3dac92d Binary files /dev/null and b/doc/images/configuration/icinga_web_local_server.png differ diff --git a/doc/images/development/windows_boost_build_dev_cmd.png b/doc/images/development/windows_boost_build_dev_cmd.png new file mode 100644 index 0000000..1a3c30c Binary files /dev/null and b/doc/images/development/windows_boost_build_dev_cmd.png differ diff --git a/doc/images/development/windows_builds_gitlab_pipeline.png b/doc/images/development/windows_builds_gitlab_pipeline.png new file mode 100644 index 0000000..8110c53 Binary files /dev/null and b/doc/images/development/windows_builds_gitlab_pipeline.png differ diff --git a/doc/images/development/windows_powershell_posh_git.png b/doc/images/development/windows_powershell_posh_git.png new file mode 100644 index 0000000..48014a4 Binary files /dev/null and b/doc/images/development/windows_powershell_posh_git.png differ diff --git a/doc/images/development/windows_visual_studio_installer_01.png b/doc/images/development/windows_visual_studio_installer_01.png new file mode 100644 index 0000000..a8cb449 Binary files /dev/null and b/doc/images/development/windows_visual_studio_installer_01.png differ diff --git a/doc/images/development/windows_visual_studio_installer_02.png b/doc/images/development/windows_visual_studio_installer_02.png new file mode 100644 index 0000000..0369970 Binary files /dev/null and b/doc/images/development/windows_visual_studio_installer_02.png differ diff --git a/doc/images/development/windows_visual_studio_installer_03.png b/doc/images/development/windows_visual_studio_installer_03.png new file mode 100644 index 0000000..c29f57f Binary files /dev/null and b/doc/images/development/windows_visual_studio_installer_03.png differ diff --git a/doc/images/development/windows_visual_studio_tabs_c++.png b/doc/images/development/windows_visual_studio_tabs_c++.png new file mode 100644 index 0000000..d511469 Binary files /dev/null and b/doc/images/development/windows_visual_studio_tabs_c++.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_agent_checks_command_endpoint.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_agent_checks_command_endpoint.png new file mode 100644 index 0000000..d55278e Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_agent_checks_command_endpoint.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_endpoints.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_endpoints.png new file mode 100644 index 0000000..aa37f60 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_endpoints.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_roles.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_roles.png new file mode 100644 index 0000000..d9018f8 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_roles.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_satellite_config_sync.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_satellite_config_sync.png new file mode 100644 index 0000000..92dcda9 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_satellite_config_sync.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenario_ha_masters_with_agents.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenario_ha_masters_with_agents.png new file mode 100644 index 0000000..c45df2c Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenario_ha_masters_with_agents.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png new file mode 100644 index 0000000..8535993 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_satellites_agents.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png new file mode 100644 index 0000000..fe7ac4d Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_scenarios_master_with_agents.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_monitoring_zones.png b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_zones.png new file mode 100644 index 0000000..84b42f8 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_monitoring_zones.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_windows_client_disk_icingaweb2.png b/doc/images/distributed-monitoring/icinga2_distributed_windows_client_disk_icingaweb2.png new file mode 100644 index 0000000..de13ad7 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_windows_client_disk_icingaweb2.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_api_drivesize_icingaweb2.png b/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_api_drivesize_icingaweb2.png new file mode 100644 index 0000000..9409025 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_api_drivesize_icingaweb2.png differ diff --git a/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_counter_icingaweb2.png b/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_counter_icingaweb2.png new file mode 100644 index 0000000..a7383e2 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_distributed_windows_nscp_counter_icingaweb2.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_running_service.png b/doc/images/distributed-monitoring/icinga2_windows_running_service.png new file mode 100644 index 0000000..53b851b Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_running_service.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_installer_01.png b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_01.png new file mode 100644 index 0000000..8460dc6 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_01.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_installer_02.png b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_02.png new file mode 100644 index 0000000..476b6d2 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_02.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_installer_03.png b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_03.png new file mode 100644 index 0000000..35aad83 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_03.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_installer_04.png b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_04.png new file mode 100644 index 0000000..4d314e6 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_04.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_installer_05.png b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_05.png new file mode 100644 index 0000000..7b2c3d8 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_installer_05.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_01.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_01.png new file mode 100644 index 0000000..c74857a Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_01.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02.png new file mode 100644 index 0000000..7084985 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02_global_zone.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02_global_zone.png new file mode 100644 index 0000000..c0bc4e0 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_02_global_zone.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_03.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_03.png new file mode 100644 index 0000000..0d246f4 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_03.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_04.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_04.png new file mode 100644 index 0000000..9f1a5d0 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_04.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_01.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_01.png new file mode 100644 index 0000000..8f9df0d Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_01.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_02.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_02.png new file mode 100644 index 0000000..1867db4 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_02.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_03.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_03.png new file mode 100644 index 0000000..0775e29 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_03.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_04.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_04.png new file mode 100644 index 0000000..dd0d2fd Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_04.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_05.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_05.png new file mode 100644 index 0000000..bacd069 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_05.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_06.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_06.png new file mode 100644 index 0000000..fa8331f Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_06.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_07.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_07.png new file mode 100644 index 0000000..58b5699 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_05_nsclient_07.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_no_ticket.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_no_ticket.png new file mode 100644 index 0000000..1c91f2d Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_no_ticket.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_with_ticket.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_with_ticket.png new file mode 100644 index 0000000..3d60237 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_06_finish_with_ticket.png differ diff --git a/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_examine_config.png b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_examine_config.png new file mode 100644 index 0000000..f568776 Binary files /dev/null and b/doc/images/distributed-monitoring/icinga2_windows_setup_wizard_examine_config.png differ diff --git a/doc/images/icingadb/icingadb-architecture.png b/doc/images/icingadb/icingadb-architecture.png new file mode 100644 index 0000000..3d55ff7 Binary files /dev/null and b/doc/images/icingadb/icingadb-architecture.png differ diff --git a/doc/images/icingadb/icingadb-daemon.png b/doc/images/icingadb/icingadb-daemon.png new file mode 100644 index 0000000..de3f4c7 Binary files /dev/null and b/doc/images/icingadb/icingadb-daemon.png differ diff --git a/doc/images/icingadb/icingadb-icinga2.png b/doc/images/icingadb/icingadb-icinga2.png new file mode 100644 index 0000000..7b7aafa Binary files /dev/null and b/doc/images/icingadb/icingadb-icinga2.png differ diff --git a/doc/images/icingadb/icingadb-redis.png b/doc/images/icingadb/icingadb-redis.png new file mode 100644 index 0000000..d6eafab Binary files /dev/null and b/doc/images/icingadb/icingadb-redis.png differ diff --git a/doc/scroll.js b/doc/scroll.js new file mode 100644 index 0000000..bad2ef6 --- /dev/null +++ b/doc/scroll.js @@ -0,0 +1,16 @@ +$(document).ready(function() { + + $('a[href^="#"]').on('click',function (e) { + e.preventDefault(); + + var target = this.hash; + var $target = $(target); + + $('html, body').stop().animate({ + 'scrollTop': $target.offset().top + }, 900, 'swing', function () { + window.location.hash = target; + }); + }); + +}); diff --git a/doc/update-links.py b/doc/update-links.py new file mode 100755 index 0000000..765d4a0 --- /dev/null +++ b/doc/update-links.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ + +import os +import sys +import re + +if len(sys.argv) < 2: + print "Syntax: %s " % sys.argv[0] + print "" + print "Updates inter-chapter links in the specified Markdown files." + sys.exit(1) + +anchors = {} + +for file in sys.argv[1:]: + text = open(file).read() + for match in re.finditer(r".*?)\">", text): + id = match.group("id") + + if id in anchors: + print "Error: Anchor '%s' is used multiple times: in %s and %s" % (id, file, anchors[id]) + + anchors[match.group("id")] = file + +def update_anchor(match): + id = match.group("id") + + try: + file = os.path.basename(anchors[id]) + except KeyError: + print "Error: Unmatched anchor: %s" % (id) + file = "" + + return "[%s](%s#%s)" % (match.group("text"), file, id) + +for file in sys.argv[1:]: + text = open(file).read() + print "> Processing file '%s'..." % (file) + new_text = re.sub(r"\[(?P.*?)\]\((?P[0-9-a-z\.]+)?#(?P[^#\)]+)\)", update_anchor, text) + open(file, "w").write(new_text) diff --git a/doc/win-dev.ps1 b/doc/win-dev.ps1 new file mode 100644 index 0000000..d2fc8be --- /dev/null +++ b/doc/win-dev.ps1 @@ -0,0 +1,97 @@ +Set-PSDebug -Trace 1 + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' +$PSDefaultParameterValues['*:ErrorAction'] = 'Stop' + +function ThrowOnNativeFailure { + if (-not $?) { + throw 'Native failure' + } +} + + +$VsVersion = 2019 +$MsvcVersion = '14.2' +$BoostVersion = @(1, 80, 0) +$OpensslVersion = '1_1_1s' + +switch ($Env:BITS) { + 32 { } + 64 { } + default { + $Env:BITS = 64 + } +} + + +function Install-Exe { + param ( + [string]$Url, + [string]$Dir + ) + + $TempDir = Join-Path ([System.IO.Path]::GetTempPath()) ([System.Guid]::NewGuid().Guid) + $ExeFile = Join-Path $TempDir inst.exe + + New-Item -ItemType Directory -Path $TempDir + + for ($trial = 1;; ++$trial) { + try { + Invoke-WebRequest -Uri $Url -OutFile $ExeFile -UseBasicParsing + } catch { + if ($trial -ge 2) { + throw + } + + continue + } + + break + } + + Start-Process -Wait -FilePath $ExeFile -ArgumentList @('/VERYSILENT', '/INSTALL', '/PASSIVE', '/NORESTART', "/DIR=${Dir}") + ThrowOnNativeFailure + + Remove-Item -Recurse -Path $TempDir +} + + +try { + Get-Command choco +} catch { + Invoke-Expression (New-Object Net.WebClient).DownloadString('https://chocolatey.org/install.ps1') + ThrowOnNativeFailure + + $RegEnv = 'Registry::HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Session Manager\Environment' + $ChocoPath = ";$(Join-Path $Env:AllUsersProfile chocolatey\bin)" + + Set-ItemProperty -Path $RegEnv -Name Path -Value ((Get-ItemProperty -Path $RegEnv -Name Path).Path + $ChocoPath) + $Env:Path += $ChocoPath +} + +# GitHub Actions uses an image that comes with most dependencies preinstalled. Don't install them twice. +if (-not $Env:GITHUB_ACTIONS) { + choco install -y ` + "visualstudio${VsVersion}community" ` + "visualstudio${VsVersion}-workload-netcoretools" ` + "visualstudio${VsVersion}-workload-vctools" ` + "visualstudio${VsVersion}-workload-manageddesktop" ` + "visualstudio${VsVersion}-workload-nativedesktop" ` + "visualstudio${VsVersion}-workload-universal" ` + "visualstudio${VsVersion}buildtools" ` + git ` + cmake ` + winflexbison3 ` + windows-sdk-8.1 ` + wixtoolset + ThrowOnNativeFailure +} else { + choco install -y winflexbison3 + ThrowOnNativeFailure +} + + +Install-Exe -Url "https://packages.icinga.com/windows/dependencies/boost_$($BoostVersion -join '_')-msvc-${MsvcVersion}-${Env:BITS}.exe" -Dir "C:\local\boost_$($BoostVersion -join '_')-Win${Env:BITS}" + +Install-Exe -Url "https://packages.icinga.com/windows/dependencies/Win${Env:BITS}OpenSSL-${OpensslVersion}.exe" -Dir "C:\local\OpenSSL_${OpensslVersion}-Win${Env:BITS}" -- cgit v1.2.3