summaryrefslogtreecommitdiffstats
path: root/conf.d
diff options
context:
space:
mode:
Diffstat (limited to 'conf.d')
-rw-r--r--conf.d/Makefile.am35
-rw-r--r--conf.d/Makefile.in85
-rw-r--r--conf.d/health.d/cpu.conf8
-rw-r--r--conf.d/health.d/disks.conf16
-rw-r--r--conf.d/health.d/entropy.conf2
-rw-r--r--conf.d/health.d/ipc.conf6
-rw-r--r--conf.d/health.d/memory.conf8
-rw-r--r--conf.d/health.d/net.conf28
-rw-r--r--conf.d/health.d/netfilter.conf6
-rw-r--r--conf.d/health.d/qos.conf4
-rw-r--r--conf.d/health.d/ram.conf6
-rw-r--r--conf.d/health.d/softnet.conf7
-rw-r--r--conf.d/health.d/swap.conf8
-rw-r--r--conf.d/health.d/tcp_resets.conf13
-rw-r--r--conf.d/health.d/udp_errors.conf9
-rw-r--r--conf.d/health_alarm_notify.conf16
-rw-r--r--conf.d/node.d/fronius.conf.md2
-rw-r--r--conf.d/node.d/stiebeleltron.conf.md453
-rw-r--r--conf.d/python.d.conf1
-rw-r--r--conf.d/python.d/chrony.conf72
-rw-r--r--conf.d/python.d/postgres.conf3
-rw-r--r--conf.d/python.d/tomcat.conf5
-rw-r--r--conf.d/python.d/web_log.conf2
23 files changed, 724 insertions, 71 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am
index 4cbecb56a..7a0786678 100644
--- a/conf.d/Makefile.am
+++ b/conf.d/Makefile.am
@@ -21,6 +21,7 @@ dist_nodeconfig_DATA = \
node.d/named.conf.md \
node.d/sma_webbox.conf.md \
node.d/snmp.conf.md \
+ node.d/stiebeleltron.conf.md \
$(NULL)
pythonconfigdir=$(configdir)/python.d
@@ -28,6 +29,7 @@ dist_pythonconfig_DATA = \
python.d/apache.conf \
python.d/apache_cache.conf \
python.d/bind_rndc.conf \
+ python.d/chrony.conf \
python.d/cpufreq.conf \
python.d/dns_query_time.conf \
python.d/dovecot.conf \
@@ -69,45 +71,40 @@ dist_healthconfig_DATA = \
health.d/apache.conf \
health.d/backend.conf \
health.d/bind_rndc.conf \
+ health.d/cpu.conf \
+ health.d/disks.conf \
health.d/elasticsearch.conf \
+ health.d/entropy.conf \
health.d/fping.conf \
health.d/haproxy.conf \
+ health.d/ipc.conf \
health.d/ipfs.conf \
health.d/ipmi.conf \
health.d/isc_dhcpd.conf \
health.d/lighttpd.conf \
health.d/mdstat.conf \
health.d/memcached.conf \
+ health.d/memory.conf \
+ health.d/mongodb.conf \
health.d/mysql.conf \
health.d/named.conf \
- health.d/mongodb.conf \
- health.d/nginx.conf \
- health.d/postgres.conf \
- health.d/redis.conf \
- health.d/retroshare.conf \
- health.d/squid.conf \
- health.d/varnish.conf \
- health.d/web_log.conf \
- health.d/zfs.conf \
- $(NULL)
-
-if LINUX
-dist_healthconfig_DATA += \
- health.d/cpu.conf \
- health.d/disks.conf \
- health.d/entropy.conf \
- health.d/ipc.conf \
- health.d/memory.conf \
health.d/net.conf \
health.d/netfilter.conf \
+ health.d/nginx.conf \
+ health.d/postgres.conf \
health.d/qos.conf \
health.d/ram.conf \
+ health.d/redis.conf \
+ health.d/retroshare.conf \
health.d/softnet.conf \
+ health.d/squid.conf \
health.d/swap.conf \
health.d/tcp_resets.conf \
health.d/udp_errors.conf \
+ health.d/varnish.conf \
+ health.d/web_log.conf \
+ health.d/zfs.conf \
$(NULL)
-endif LINUX
chartsconfigdir=$(configdir)/charts.d
dist_chartsconfig_DATA = \
diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in
index 7a1e300e0..3d7084ed1 100644
--- a/conf.d/Makefile.in
+++ b/conf.d/Makefile.in
@@ -78,26 +78,10 @@ PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-@LINUX_TRUE@am__append_1 = \
-@LINUX_TRUE@ health.d/cpu.conf \
-@LINUX_TRUE@ health.d/disks.conf \
-@LINUX_TRUE@ health.d/entropy.conf \
-@LINUX_TRUE@ health.d/ipc.conf \
-@LINUX_TRUE@ health.d/memory.conf \
-@LINUX_TRUE@ health.d/net.conf \
-@LINUX_TRUE@ health.d/netfilter.conf \
-@LINUX_TRUE@ health.d/qos.conf \
-@LINUX_TRUE@ health.d/ram.conf \
-@LINUX_TRUE@ health.d/softnet.conf \
-@LINUX_TRUE@ health.d/swap.conf \
-@LINUX_TRUE@ health.d/tcp_resets.conf \
-@LINUX_TRUE@ health.d/udp_errors.conf \
-@LINUX_TRUE@ $(NULL)
-
subdir = conf.d
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(dist_chartsconfig_DATA) $(dist_config_DATA) \
- $(am__dist_healthconfig_DATA_DIST) $(dist_nodeconfig_DATA) \
+ $(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \
$(dist_pythonconfig_DATA) $(dist_statsdconfig_DATA)
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \
@@ -164,21 +148,6 @@ am__installdirs = "$(DESTDIR)$(chartsconfigdir)" \
"$(DESTDIR)$(configdir)" "$(DESTDIR)$(healthconfigdir)" \
"$(DESTDIR)$(nodeconfigdir)" "$(DESTDIR)$(pythonconfigdir)" \
"$(DESTDIR)$(statsdconfigdir)"
-am__dist_healthconfig_DATA_DIST = health.d/apache.conf \
- health.d/backend.conf health.d/bind_rndc.conf \
- health.d/elasticsearch.conf health.d/fping.conf \
- health.d/haproxy.conf health.d/ipfs.conf health.d/ipmi.conf \
- health.d/isc_dhcpd.conf health.d/lighttpd.conf \
- health.d/mdstat.conf health.d/memcached.conf \
- health.d/mysql.conf health.d/named.conf health.d/mongodb.conf \
- health.d/nginx.conf health.d/postgres.conf health.d/redis.conf \
- health.d/retroshare.conf health.d/squid.conf \
- health.d/varnish.conf health.d/web_log.conf health.d/zfs.conf \
- health.d/cpu.conf health.d/disks.conf health.d/entropy.conf \
- health.d/ipc.conf health.d/memory.conf health.d/net.conf \
- health.d/netfilter.conf health.d/qos.conf health.d/ram.conf \
- health.d/softnet.conf health.d/swap.conf \
- health.d/tcp_resets.conf health.d/udp_errors.conf
DATA = $(dist_chartsconfig_DATA) $(dist_config_DATA) \
$(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \
$(dist_pythonconfig_DATA) $(dist_statsdconfig_DATA)
@@ -351,6 +320,7 @@ dist_nodeconfig_DATA = \
node.d/named.conf.md \
node.d/sma_webbox.conf.md \
node.d/snmp.conf.md \
+ node.d/stiebeleltron.conf.md \
$(NULL)
pythonconfigdir = $(configdir)/python.d
@@ -358,6 +328,7 @@ dist_pythonconfig_DATA = \
python.d/apache.conf \
python.d/apache_cache.conf \
python.d/bind_rndc.conf \
+ python.d/chrony.conf \
python.d/cpufreq.conf \
python.d/dns_query_time.conf \
python.d/dovecot.conf \
@@ -394,17 +365,45 @@ dist_pythonconfig_DATA = \
$(NULL)
healthconfigdir = $(configdir)/health.d
-dist_healthconfig_DATA = health.d/apache.conf health.d/backend.conf \
- health.d/bind_rndc.conf health.d/elasticsearch.conf \
- health.d/fping.conf health.d/haproxy.conf health.d/ipfs.conf \
- health.d/ipmi.conf health.d/isc_dhcpd.conf \
- health.d/lighttpd.conf health.d/mdstat.conf \
- health.d/memcached.conf health.d/mysql.conf \
- health.d/named.conf health.d/mongodb.conf health.d/nginx.conf \
- health.d/postgres.conf health.d/redis.conf \
- health.d/retroshare.conf health.d/squid.conf \
- health.d/varnish.conf health.d/web_log.conf health.d/zfs.conf \
- $(NULL) $(am__append_1)
+dist_healthconfig_DATA = \
+ health.d/apache.conf \
+ health.d/backend.conf \
+ health.d/bind_rndc.conf \
+ health.d/cpu.conf \
+ health.d/disks.conf \
+ health.d/elasticsearch.conf \
+ health.d/entropy.conf \
+ health.d/fping.conf \
+ health.d/haproxy.conf \
+ health.d/ipc.conf \
+ health.d/ipfs.conf \
+ health.d/ipmi.conf \
+ health.d/isc_dhcpd.conf \
+ health.d/lighttpd.conf \
+ health.d/mdstat.conf \
+ health.d/memcached.conf \
+ health.d/memory.conf \
+ health.d/mongodb.conf \
+ health.d/mysql.conf \
+ health.d/named.conf \
+ health.d/net.conf \
+ health.d/netfilter.conf \
+ health.d/nginx.conf \
+ health.d/postgres.conf \
+ health.d/qos.conf \
+ health.d/ram.conf \
+ health.d/redis.conf \
+ health.d/retroshare.conf \
+ health.d/softnet.conf \
+ health.d/squid.conf \
+ health.d/swap.conf \
+ health.d/tcp_resets.conf \
+ health.d/udp_errors.conf \
+ health.d/varnish.conf \
+ health.d/web_log.conf \
+ health.d/zfs.conf \
+ $(NULL)
+
chartsconfigdir = $(configdir)/charts.d
dist_chartsconfig_DATA = \
charts.d/apache.conf \
diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf
index 30a714097..db6285561 100644
--- a/conf.d/health.d/cpu.conf
+++ b/conf.d/health.d/cpu.conf
@@ -1,6 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
template: 10min_cpu_usage
on: system.cpu
+ os: linux
+ hosts: *
lookup: average -10m unaligned of user,system,softirq,irq,guest
units: %
every: 1m
@@ -12,6 +16,8 @@ template: 10min_cpu_usage
template: 10min_cpu_iowait
on: system.cpu
+ os: linux
+ hosts: *
lookup: average -10m unaligned of iowait
units: %
every: 1m
@@ -23,6 +29,8 @@ template: 10min_cpu_iowait
template: 20min_steal_cpu
on: system.cpu
+ os: linux
+ hosts: *
lookup: average -20m unaligned of steal
units: %
every: 5m
diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf
index 9548f9ee0..63053491e 100644
--- a/conf.d/health.d/disks.conf
+++ b/conf.d/health.d/disks.conf
@@ -1,3 +1,7 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+
# -----------------------------------------------------------------------------
# low disk space
@@ -7,6 +11,8 @@
template: disk_space_usage
on: disk.space
+ os: linux
+ hosts: *
families: *
calc: $used * 100 / ($avail + $used)
units: %
@@ -19,6 +25,8 @@ families: *
template: disk_inode_usage
on: disk.inodes
+ os: linux
+ hosts: *
families: *
calc: $used * 100 / ($avail + $used)
units: %
@@ -43,6 +51,8 @@ families: *
template: disk_fill_rate
on: disk.space
+ os: linux
+ hosts: *
families: *
lookup: min -10m at -50m unaligned of avail
calc: ($this - $avail) / (($now - $after) / 3600)
@@ -57,6 +67,8 @@ families: *
template: out_of_disk_space_time
on: disk.space
+ os: linux
+ hosts: *
families: *
calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
units: hours
@@ -77,6 +89,8 @@ families: *
template: 10min_disk_utilization
on: disk.util
+ os: linux
+ hosts: *
families: *
lookup: average -10m unaligned
units: %
@@ -97,6 +111,8 @@ families: *
template: 10min_disk_backlog
on: disk.backlog
+ os: linux
+ hosts: *
families: *
lookup: average -10m unaligned
units: ms
diff --git a/conf.d/health.d/entropy.conf b/conf.d/health.d/entropy.conf
index 5dd8af502..66d44ec13 100644
--- a/conf.d/health.d/entropy.conf
+++ b/conf.d/health.d/entropy.conf
@@ -5,6 +5,8 @@
alarm: lowest_entropy
on: system.entropy
+ os: linux
+ hosts: *
lookup: min -10m unaligned
units: entries
every: 5m
diff --git a/conf.d/health.d/ipc.conf b/conf.d/health.d/ipc.conf
index ee7c4badd..03cf264d8 100644
--- a/conf.d/health.d/ipc.conf
+++ b/conf.d/health.d/ipc.conf
@@ -1,6 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
alarm: semaphores_used
on: system.ipc_semaphores
+ os: linux
+ hosts: *
calc: $semaphores * 100 / $ipc.semaphores.max
units: %
every: 10s
@@ -12,6 +16,8 @@
alarm: semaphore_arrays_used
on: system.ipc_semaphore_arrays
+ os: linux
+ hosts: *
calc: $arrays * 100 / $ipc.semaphores.arrays.max
units: %
every: 10s
diff --git a/conf.d/health.d/memory.conf b/conf.d/health.d/memory.conf
index 3c904f6b1..4a0e6e522 100644
--- a/conf.d/health.d/memory.conf
+++ b/conf.d/health.d/memory.conf
@@ -1,6 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
alarm: 1hour_ecc_memory_correctable
on: mem.ecc_ce
+ os: linux
+ hosts: *
lookup: sum -10m unaligned
units: errors
every: 1m
@@ -11,6 +15,8 @@
alarm: 1hour_ecc_memory_uncorrectable
on: mem.ecc_ue
+ os: linux
+ hosts: *
lookup: sum -10m unaligned
units: errors
every: 1m
@@ -21,6 +27,8 @@
alarm: 1hour_memory_hw_corrupted
on: mem.hwcorrupt
+ os: linux
+ hosts: *
calc: $HardwareCorrupted
units: MB
every: 10s
diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf
index bd288817b..00a198612 100644
--- a/conf.d/health.d/net.conf
+++ b/conf.d/health.d/net.conf
@@ -1,4 +1,6 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
# -----------------------------------------------------------------------------
# dropped packets
@@ -8,48 +10,56 @@
template: inbound_packets_dropped
on: net.drops
+ os: linux
+ hosts: *
families: *
lookup: sum -10m unaligned absolute of inbound
units: packets
every: 1m
- warn: $this > 0
+ warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: interface inbound dropped packets in the last 10 minutes
to: sysadmin
template: outbound_packets_dropped
on: net.drops
+ os: linux
+ hosts: *
families: *
lookup: sum -10m unaligned absolute of outbound
units: packets
every: 1m
- warn: $this > 0
+ warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: interface outbound dropped packets in the last 10 minutes
to: sysadmin
template: inbound_packets_dropped_ratio
on: net.packets
+ os: linux
+ hosts: *
families: *
lookup: sum -10m unaligned absolute of received
calc: (($inbound_packets_dropped != nan AND $this > 0) ? ($inbound_packets_dropped * 100 / $this) : (0))
units: %
every: 1m
- warn: $this > 0.5
- crit: $this > 3
+ warn: $this >= 0.1
+ crit: $this >= 2
delay: down 1h multiplier 1.5 max 2h
info: the ratio of inbound dropped packets vs the total number of received packets of the network interface, during the last 10 minutes
to: sysadmin
template: outbound_packets_dropped_ratio
on: net.packets
+ os: linux
+ hosts: *
families: *
lookup: sum -10m unaligned absolute of sent
calc: (($outbound_packets_dropped != nan AND $this > 0) ? ($outbound_packets_dropped * 100 / $this) : (0))
units: %
every: 1m
- warn: $this > 0.5
- crit: $this > 3
+ warn: $this >= 0.1
+ crit: $this >= 2
delay: down 1h multiplier 1.5 max 2h
info: the ratio of outbound dropped packets vs the total number of sent packets of the network interface, during the last 10 minutes
to: sysadmin
@@ -65,6 +75,8 @@ families: *
template: 10min_fifo_errors
on: net.fifo
+ os: linux
+ hosts: *
families: *
lookup: sum -10m unaligned absolute
units: errors
@@ -86,6 +98,8 @@ families: *
template: 1m_received_packets_rate
on: net.packets
+ os: linux
+ hosts: *
families: *
lookup: average -1m of received
units: packets
@@ -94,6 +108,8 @@ families: *
template: 10s_received_packets_storm
on: net.packets
+ os: linux
+ hosts: *
families: *
lookup: average -10s of received
calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
diff --git a/conf.d/health.d/netfilter.conf b/conf.d/health.d/netfilter.conf
index 3dd6a67b3..fa1732b33 100644
--- a/conf.d/health.d/netfilter.conf
+++ b/conf.d/health.d/netfilter.conf
@@ -1,6 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
alarm: netfilter_last_collected_secs
on: netfilter.conntrack_sockets
+ os: linux
+ hosts: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
@@ -12,6 +16,8 @@
alarm: netfilter_conntrack_full
on: netfilter.conntrack_sockets
+ os: linux
+ hosts: *
lookup: max -10s unaligned of connections
calc: $this * 100 / $netfilter.conntrack.max
units: %
diff --git a/conf.d/health.d/qos.conf b/conf.d/health.d/qos.conf
index 9e5939fdc..7290d15ff 100644
--- a/conf.d/health.d/qos.conf
+++ b/conf.d/health.d/qos.conf
@@ -1,10 +1,14 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
# check if a QoS class is dropping packets
# the alarm is checked every 10 seconds
# and examines the last minute of data
#template: 10min_qos_packet_drops
# on: tc.qos_dropped
+# os: linux
+# hosts: *
# lookup: sum -10m unaligned absolute
# every: 30s
# warn: $this > 0
diff --git a/conf.d/health.d/ram.conf b/conf.d/health.d/ram.conf
index b99e5e226..8d0e8838d 100644
--- a/conf.d/health.d/ram.conf
+++ b/conf.d/health.d/ram.conf
@@ -1,12 +1,18 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
alarm: used_ram_to_ignore
on: system.ram
+ os: linux
+ hosts: *
calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz)
every: 10s
info: the amount of memory that is reported as used, but it is actually capable for resizing itself based on the system needs (eg. ZFS ARC)
alarm: ram_in_use
on: system.ram
+ os: linux
+ hosts: *
# calc: $used * 100 / ($used + $cached + $free)
calc: ($used - $used_ram_to_ignore) * 100 / ($used - $used_ram_to_ignore + $cached + $free)
units: %
diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf
index 5faf9a9ee..64e1c6784 100644
--- a/conf.d/health.d/softnet.conf
+++ b/conf.d/health.d/softnet.conf
@@ -1,7 +1,12 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
# check for common /proc/net/softnet_stat errors
alarm: 10min_netdev_backlog_exceeded
on: system.softnet_stat
+ os: linux
+ hosts: *
lookup: sum -10m unaligned absolute of dropped
units: packets
every: 1m
@@ -12,6 +17,8 @@
alarm: 10min_netdev_budget_ran_outs
on: system.softnet_stat
+ os: linux
+ hosts: *
lookup: sum -10m unaligned absolute of squeezed
units: events
every: 1m
diff --git a/conf.d/health.d/swap.conf b/conf.d/health.d/swap.conf
index 7f57560e2..830a9af95 100644
--- a/conf.d/health.d/swap.conf
+++ b/conf.d/health.d/swap.conf
@@ -1,6 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
alarm: 30min_ram_swapped_out
on: system.swapio
+ os: linux
+ hosts: *
lookup: sum -30m unaligned absolute of out
# we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024
calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
@@ -14,6 +18,8 @@
alarm: ram_in_swap
on: system.swap
+ os: linux
+ hosts: *
calc: $used * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
units: % of RAM
every: 10s
@@ -25,6 +31,8 @@
alarm: used_swap
on: system.swap
+ os: linux
+ hosts: *
calc: $used * 100 / ( $used + $free )
units: %
every: 10s
diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf
index 803c88a81..fec124ac7 100644
--- a/conf.d/health.d/tcp_resets.conf
+++ b/conf.d/health.d/tcp_resets.conf
@@ -1,7 +1,12 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
# -----------------------------------------------------------------------------
alarm: ipv4_tcphandshake_last_collected_secs
on: ipv4.tcphandshake
+ os: linux
+ hosts: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
@@ -16,6 +21,8 @@
alarm: 1m_ipv4_tcp_resets_sent
on: ipv4.tcphandshake
+ os: linux
+ hosts: *
lookup: average -1m at -10s unaligned absolute of OutRsts
units: tcp resets/s
every: 10s
@@ -23,6 +30,8 @@
alarm: 10s_ipv4_tcp_resets_sent
on: ipv4.tcphandshake
+ os: linux
+ hosts: *
lookup: average -10s unaligned absolute of OutRsts
units: tcp resets/s
every: 10s
@@ -37,6 +46,8 @@ options: no-clear-notification
alarm: 1m_ipv4_tcp_resets_received
on: ipv4.tcphandshake
+ os: linux
+ hosts: *
lookup: average -1m at -10s unaligned absolute of AttemptFails
units: tcp resets/s
every: 10s
@@ -44,6 +55,8 @@ options: no-clear-notification
alarm: 10s_ipv4_tcp_resets_received
on: ipv4.tcphandshake
+ os: linux
+ hosts: *
lookup: average -10s unaligned absolute of AttemptFails
units: tcp resets/s
every: 10s
diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf
index 98e955c02..33338b83e 100644
--- a/conf.d/health.d/udp_errors.conf
+++ b/conf.d/health.d/udp_errors.conf
@@ -1,7 +1,12 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
# -----------------------------------------------------------------------------
alarm: ipv4_udperrors_last_collected_secs
on: ipv4.udperrors
+ os: linux
+ hosts: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
@@ -16,6 +21,8 @@
alarm: 1m_ipv4_udp_receive_buffer_errors
on: ipv4.udperrors
+ os: linux
+ hosts: *
lookup: sum -1m unaligned absolute of RcvbufErrors
units: errors
every: 10s
@@ -30,6 +37,8 @@
alarm: 1m_ipv4_udp_send_buffer_errors
on: ipv4.udperrors
+ os: linux
+ hosts: *
lookup: sum -1m unaligned absolute of SndbufErrors
units: errors
every: 10s
diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf
index 4d8444ed5..641272ced 100644
--- a/conf.d/health_alarm_notify.conf
+++ b/conf.d/health_alarm_notify.conf
@@ -94,6 +94,15 @@ curl=""
# multiple recipients can be given like this:
# "admin1@example.com admin2@example.com ..."
+# the email address sending email notifications
+# the default is the system user netdata runs as (usually: netdata)
+# The following formats are supported:
+# EMAIL_SENDER="user@domain"
+# EMAIL_SENDER="User Name <user@domain>"
+# EMAIL_SENDER="'User Name' <user@domain>"
+# EMAIL_SENDER="\"User Name\" <user@domain>"
+EMAIL_SENDER=""
+
# enable/disable sending emails
SEND_EMAIL="YES"
@@ -101,6 +110,13 @@ SEND_EMAIL="YES"
DEFAULT_RECIPIENT_EMAIL="root"
# to receive only critical alarms, set it to "root|critical"
+# Optionally specify the encoding to list in the Content-Type header.
+# This doesn't change what encoding the e-mail is sent with, just what
+# the headers say it was encoded as.
+# This shouldn't need to be changed as it will almost always be
+# autodetected from the environment.
+#EMAIL_CHARSET="UTF-8"
+
#------------------------------------------------------------------------------
# pushover (pushover.net) global notification options
diff --git a/conf.d/node.d/fronius.conf.md b/conf.d/node.d/fronius.conf.md
index c80afa0b5..622086b27 100644
--- a/conf.d/node.d/fronius.conf.md
+++ b/conf.d/node.d/fronius.conf.md
@@ -16,7 +16,7 @@ The module supports any number of servers. Sometimes there is a lag when collect
"update_every": 5,
"servers": [
{
- "name": "Solar",
+ "name": "solar",
"hostname": "symo.ip.or.dns",
"update_every": 5,
"api_path": "/solar_api/v1/GetPowerFlowRealtimeData.fcgi"
diff --git a/conf.d/node.d/stiebeleltron.conf.md b/conf.d/node.d/stiebeleltron.conf.md
new file mode 100644
index 000000000..6ae5aa1c7
--- /dev/null
+++ b/conf.d/node.d/stiebeleltron.conf.md
@@ -0,0 +1,453 @@
+[Stiebel Eltron Heat pump system with ISG](https://www.stiebel-eltron.com/en/home/products-solutions/renewables/controller_energymanagement/internet_servicegateway/isg_web.html)
+
+Original author: BrainDoctor (github)
+
+The module supports any metrics that are parseable with RegEx. There is no API that gives direct access to the values (AFAIK), so the "workaround" is to parse the HTML output of the ISG.
+
+### Testing
+This plugin has been tested within the following environment:
+ * ISG version: 8.5.6
+ * MFG version: 12
+ * Controller version: 9
+ * July (summer time, not much activity)
+ * Interface language: English
+ * login- and password-less ISG web access (without HTTPS it's useless anyway)
+ * Heatpump model: WPL 25 I-2
+ * Hot water boiler model: 820 WT 1
+
+So, if the language is set to english, copy the following configuration into `/etc/netdata/node.d/stiebeleltron.conf` and change the `url`s.
+
+In my case, the ISG is relatively slow with responding (at least 1s, but also up to 4s). Collecting metrics every 10s is more than enough for me.
+
+### How to update the config
+
+* The dimensions support variable digits, the default is `1`. Most of the values printed by ISG are using 1 digit, some use 2.
+* The dimensions also support the `multiplier` and `divisor` attributes, however the divisor gets overridden by `digits`, if specified. Default is `1`.
+* The test string for the regex is always the whole HTML output from the url. For each parameter you need to have a regular expression that extracts the value from the HTML source in the first capture group.
+ Recommended: [regexr.com](regexr.com) for testing and matching, [freeformatter.com](https://www.freeformatter.com/json-escape.html) for escaping the newly created regex for the JSON config.
+
+The charts are being generated using the configuration below. So if your installation is in another language or has other metrics, just adapt the structure or regexes.
+### Configuration template
+```json
+{
+ "enable_autodetect": false,
+ "update_every": 10,
+ "pages": [
+ {
+ "name": "System",
+ "id": "system",
+ "url": "http://machine.ip.or.dns/?s=1,0",
+ "update_every": 10,
+ "categories": [
+ {
+ "id": "eletricreheating",
+ "name": "electric reheating",
+ "charts": [
+ {
+ "title": "Dual Mode Reheating Temperature",
+ "id": "reheatingtemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Heating",
+ "id": "dualmodeheatingtemp",
+ "regex": "DUAL MODE TEMP HEATING<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ },
+ {
+ "name": "Hot Water",
+ "id" : "dualmodehotwatertemp",
+ "regex": "DUAL MODE TEMP DHW<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "roomtemp",
+ "name": "room temperature",
+ "charts": [
+ {
+ "title": "Heat Circuit 1",
+ "id": "hc1",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "<tr class=\"even\">\\s*<td.*>ACTUAL TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "<tr class=\"odd\">\\s*<td.*>SET TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ }
+ ]
+ },
+ {
+ "title": "Heat Circuit 2",
+ "id": "hc2",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 2,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "<tr class=\"even\">\\s*<td.*>ACTUAL TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "<tr class=\"odd\">\\s*<td.*>SET TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "heating",
+ "name": "heating",
+ "charts": [
+ {
+ "title": "Heat Circuit 1",
+ "id": "hc1",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "<tr class=\"odd\">\\s*<td.*>ACTUAL TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "<tr class=\"even\">\\s*<td.*>SET TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ }
+ ]
+ },
+ {
+ "title": "Heat Circuit 2",
+ "id": "hc2",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 2,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "<tr class=\"odd\">\\s*<td.*>ACTUAL TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "<tr class=\"even\">\\s*<td.*>SET TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ }
+ ]
+ },
+ {
+ "title": "Flow Temperature",
+ "id": "flowtemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 3,
+ "dimensions": [
+ {
+ "name": "Heating",
+ "id": "heating",
+ "regex": "ACTUAL FLOW TEMPERATURE WP<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ },
+ {
+ "name": "Reheating",
+ "id" : "reheating",
+ "regex": "ACTUAL FLOW TEMPERATURE NHZ<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Buffer Temperature",
+ "id": "buffertemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 4,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "ACTUAL BUFFER TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "SET BUFFER TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Fixed Temperature",
+ "id": "fixedtemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 5,
+ "dimensions": [
+ {
+ "name": "Set",
+ "id" : "setfixed",
+ "regex": "SET FIXED TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Pre-flow Temperature",
+ "id": "preflowtemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 6,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actualreturn",
+ "regex": "ACTUAL RETURN TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "hotwater",
+ "name": "hot water",
+ "charts": [
+ {
+ "title": "Hot Water Temperature",
+ "id": "hotwatertemp",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Actual",
+ "id": "actual",
+ "regex": "ACTUAL TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ },
+ {
+ "name": "Set",
+ "id" : "set",
+ "regex": "SET TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "general",
+ "name": "general",
+ "charts": [
+ {
+ "title": "Outside Temperature",
+ "id": "outside",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Outside temperature",
+ "id": "outsidetemp",
+ "regex": "OUTSIDE TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>"
+ }
+ ]
+ },
+ {
+ "title": "Condenser Temperature",
+ "id": "condenser",
+ "unit": "Celsius",
+ "type": "line",
+ "prio": 2,
+ "dimensions": [
+ {
+ "name": "Condenser",
+ "id": "condenser",
+ "regex": "CONDENSER TEMP\\.<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Heating Circuit Pressure",
+ "id": "heatingcircuit",
+ "unit": "bar",
+ "type": "line",
+ "prio": 3,
+ "dimensions": [
+ {
+ "name": "Heating Circuit",
+ "id": "heatingcircuit",
+ "digits": 2,
+ "regex": "PRESSURE HTG CIRC<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]*).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Flow Rate",
+ "id": "flowrate",
+ "unit": "liters/min",
+ "type": "line",
+ "prio": 4,
+ "dimensions": [
+ {
+ "name": "Flow Rate",
+ "id": "flowrate",
+ "digits": 2,
+ "regex": "FLOW RATE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>"
+ }
+ ]
+ },
+ {
+ "title": "Output",
+ "id": "output",
+ "unit": "%",
+ "type": "line",
+ "prio": 5,
+ "dimensions": [
+ {
+ "name": "Heat Pump",
+ "id": "outputheatpump",
+ "regex": "OUTPUT HP<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*).*<\\\/td>"
+ },
+ {
+ "name": "Water Pump",
+ "id": "intpumprate",
+ "regex": "INT PUMP RATE<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*).*<\\\/td>"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "Heat Pump",
+ "id": "heatpump",
+ "url": "http://machine.ip.or.dns/?s=1,1",
+ "update_every": 10,
+ "categories": [
+ {
+ "id": "runtime",
+ "name": "runtime",
+ "charts": [
+ {
+ "title": "Compressor",
+ "id": "compressor",
+ "unit": "h",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Heating",
+ "id": "heating",
+ "regex": "RNT COMP 1 HEA<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ },
+ {
+ "name": "Hot Water",
+ "id" : "hotwater",
+ "regex": "RNT COMP 1 DHW<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ }
+ ]
+ },
+ {
+ "title": "Reheating",
+ "id": "reheating",
+ "unit": "h",
+ "type": "line",
+ "prio": 2,
+ "dimensions": [
+ {
+ "name": "Reheating 1",
+ "id": "rh1",
+ "regex": "BH 1<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ },
+ {
+ "name": "Reheating 2",
+ "id" : "rh2",
+ "regex": "BH 2<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "processdata",
+ "name": "process data",
+ "charts": [
+ {
+ "title": "Remaining Compressor Rest Time",
+ "id": "remaincomp",
+ "unit": "s",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Timer",
+ "id": "timer",
+ "regex": "COMP DLAY CNTR<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "id": "energy",
+ "name": "energy",
+ "charts": [
+ {
+ "title": "Compressor Today",
+ "id": "compressorday",
+ "unit": "kWh",
+ "type": "line",
+ "prio": 1,
+ "dimensions": [
+ {
+ "name": "Heating",
+ "id": "heating",
+ "digits": 3,
+ "regex": "COMPRESSOR HEATING DAY<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ },
+ {
+ "name": "Hot Water",
+ "id": "hotwater",
+ "digits": 3,
+ "regex": "COMPRESSOR DHW DAY<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ }
+ ]
+ },
+ {
+ "title": "Compressor Total",
+ "id": "compressortotal",
+ "unit": "MWh",
+ "type": "line",
+ "prio": 2,
+ "dimensions": [
+ {
+ "name": "Heating",
+ "id": "heating",
+ "digits": 3,
+ "regex": "COMPRESSOR HEATING TOTAL<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ },
+ {
+ "name": "Hot Water",
+ "id": "hotwater",
+ "digits": 3,
+ "regex": "COMPRESSOR DHW TOTAL<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+}
+```
diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf
index 0a37e40ae..741d49914 100644
--- a/conf.d/python.d.conf
+++ b/conf.d/python.d.conf
@@ -30,6 +30,7 @@ log_interval: 3600
apache_cache: no
# apache: yes
# bind_rndc: yes
+# chrony: yes
# cpufreq: yes
# cpuidle: yes
# dns_query_time: yes
diff --git a/conf.d/python.d/chrony.conf b/conf.d/python.d/chrony.conf
new file mode 100644
index 000000000..46229687b
--- /dev/null
+++ b/conf.d/python.d/chrony.conf
@@ -0,0 +1,72 @@
+# netdata python.d.plugin configuration for chrony
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+update_every: 5
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, chrony also supports the following:
+#
+# command: 'chrony tracking' # the command to run
+#
+
+# ----------------------------------------------------------------------
+# REQUIRED chrony CONFIGURATION
+#
+# netdata will query chrony as user netdata.
+# verify that user netdata is allowed to call 'chronyc tracking'
+# Check cmdallow in chrony.conf
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+
+local:
+ command: 'chronyc -n tracking'
diff --git a/conf.d/python.d/postgres.conf b/conf.d/python.d/postgres.conf
index 12dddae67..1dbb64f40 100644
--- a/conf.d/python.d/postgres.conf
+++ b/conf.d/python.d/postgres.conf
@@ -75,6 +75,9 @@
# a postgres user for netdata and add its password below to allow
# netdata connect.
#
+# Without superuser access, netdata won't be able to generate the write
+# ahead log and the background writer charts.
+#
# ----------------------------------------------------------------------
socket:
diff --git a/conf.d/python.d/tomcat.conf b/conf.d/python.d/tomcat.conf
index aef9631b9..ce89175f6 100644
--- a/conf.d/python.d/tomcat.conf
+++ b/conf.d/python.d/tomcat.conf
@@ -63,7 +63,10 @@
# user: 'username'
# pass: 'password'
#
-
+# if you have multiple connectors, the following are supported:
+#
+# connector_name: 'ajp-bio-8009' # default is null, which use first connector in status XML
+#
# ----------------------------------------------------------------------
# AUTO-DETECTION JOBS
# only one of them will run (they have the same name)
diff --git a/conf.d/python.d/web_log.conf b/conf.d/python.d/web_log.conf
index e51b565d6..cd1f1af00 100644
--- a/conf.d/python.d/web_log.conf
+++ b/conf.d/python.d/web_log.conf
@@ -76,7 +76,7 @@
# observium: 'observium.*' # name(dimension): REGEX to match
# stub_status: 'stub_status' # name(dimension): REGEX to match
# custom_log_format: # define a custom log format
-# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d\.\d+) '
+# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d+\.\d+) '
# time_multiplier: 1000000 # type <int> - convert time to microseconds
# ----------------------------------------------------------------------