diff options
Diffstat (limited to '')
-rw-r--r-- | conf.d/Makefile.am | 35 | ||||
-rw-r--r-- | conf.d/Makefile.in | 85 | ||||
-rw-r--r-- | conf.d/health.d/cpu.conf | 8 | ||||
-rw-r--r-- | conf.d/health.d/disks.conf | 16 | ||||
-rw-r--r-- | conf.d/health.d/entropy.conf | 2 | ||||
-rw-r--r-- | conf.d/health.d/ipc.conf | 6 | ||||
-rw-r--r-- | conf.d/health.d/memory.conf | 8 | ||||
-rw-r--r-- | conf.d/health.d/net.conf | 28 | ||||
-rw-r--r-- | conf.d/health.d/netfilter.conf | 6 | ||||
-rw-r--r-- | conf.d/health.d/qos.conf | 4 | ||||
-rw-r--r-- | conf.d/health.d/ram.conf | 6 | ||||
-rw-r--r-- | conf.d/health.d/softnet.conf | 7 | ||||
-rw-r--r-- | conf.d/health.d/swap.conf | 8 | ||||
-rw-r--r-- | conf.d/health.d/tcp_resets.conf | 13 | ||||
-rw-r--r-- | conf.d/health.d/udp_errors.conf | 9 | ||||
-rw-r--r-- | conf.d/health_alarm_notify.conf | 16 | ||||
-rw-r--r-- | conf.d/node.d/fronius.conf.md | 2 | ||||
-rw-r--r-- | conf.d/node.d/stiebeleltron.conf.md | 453 | ||||
-rw-r--r-- | conf.d/python.d.conf | 1 | ||||
-rw-r--r-- | conf.d/python.d/chrony.conf | 72 | ||||
-rw-r--r-- | conf.d/python.d/postgres.conf | 3 | ||||
-rw-r--r-- | conf.d/python.d/tomcat.conf | 5 | ||||
-rw-r--r-- | conf.d/python.d/web_log.conf | 2 |
23 files changed, 724 insertions, 71 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index 4cbecb56a..7a0786678 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -21,6 +21,7 @@ dist_nodeconfig_DATA = \ node.d/named.conf.md \ node.d/sma_webbox.conf.md \ node.d/snmp.conf.md \ + node.d/stiebeleltron.conf.md \ $(NULL) pythonconfigdir=$(configdir)/python.d @@ -28,6 +29,7 @@ dist_pythonconfig_DATA = \ python.d/apache.conf \ python.d/apache_cache.conf \ python.d/bind_rndc.conf \ + python.d/chrony.conf \ python.d/cpufreq.conf \ python.d/dns_query_time.conf \ python.d/dovecot.conf \ @@ -69,45 +71,40 @@ dist_healthconfig_DATA = \ health.d/apache.conf \ health.d/backend.conf \ health.d/bind_rndc.conf \ + health.d/cpu.conf \ + health.d/disks.conf \ health.d/elasticsearch.conf \ + health.d/entropy.conf \ health.d/fping.conf \ health.d/haproxy.conf \ + health.d/ipc.conf \ health.d/ipfs.conf \ health.d/ipmi.conf \ health.d/isc_dhcpd.conf \ health.d/lighttpd.conf \ health.d/mdstat.conf \ health.d/memcached.conf \ + health.d/memory.conf \ + health.d/mongodb.conf \ health.d/mysql.conf \ health.d/named.conf \ - health.d/mongodb.conf \ - health.d/nginx.conf \ - health.d/postgres.conf \ - health.d/redis.conf \ - health.d/retroshare.conf \ - health.d/squid.conf \ - health.d/varnish.conf \ - health.d/web_log.conf \ - health.d/zfs.conf \ - $(NULL) - -if LINUX -dist_healthconfig_DATA += \ - health.d/cpu.conf \ - health.d/disks.conf \ - health.d/entropy.conf \ - health.d/ipc.conf \ - health.d/memory.conf \ health.d/net.conf \ health.d/netfilter.conf \ + health.d/nginx.conf \ + health.d/postgres.conf \ health.d/qos.conf \ health.d/ram.conf \ + health.d/redis.conf \ + health.d/retroshare.conf \ health.d/softnet.conf \ + health.d/squid.conf \ health.d/swap.conf \ health.d/tcp_resets.conf \ health.d/udp_errors.conf \ + health.d/varnish.conf \ + health.d/web_log.conf \ + health.d/zfs.conf \ $(NULL) -endif LINUX chartsconfigdir=$(configdir)/charts.d dist_chartsconfig_DATA = \ diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in index 7a1e300e0..3d7084ed1 100644 --- a/conf.d/Makefile.in +++ b/conf.d/Makefile.in @@ -78,26 +78,10 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ -@LINUX_TRUE@am__append_1 = \ -@LINUX_TRUE@ health.d/cpu.conf \ -@LINUX_TRUE@ health.d/disks.conf \ -@LINUX_TRUE@ health.d/entropy.conf \ -@LINUX_TRUE@ health.d/ipc.conf \ -@LINUX_TRUE@ health.d/memory.conf \ -@LINUX_TRUE@ health.d/net.conf \ -@LINUX_TRUE@ health.d/netfilter.conf \ -@LINUX_TRUE@ health.d/qos.conf \ -@LINUX_TRUE@ health.d/ram.conf \ -@LINUX_TRUE@ health.d/softnet.conf \ -@LINUX_TRUE@ health.d/swap.conf \ -@LINUX_TRUE@ health.d/tcp_resets.conf \ -@LINUX_TRUE@ health.d/udp_errors.conf \ -@LINUX_TRUE@ $(NULL) - subdir = conf.d DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(dist_chartsconfig_DATA) $(dist_config_DATA) \ - $(am__dist_healthconfig_DATA_DIST) $(dist_nodeconfig_DATA) \ + $(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \ $(dist_pythonconfig_DATA) $(dist_statsdconfig_DATA) ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \ @@ -164,21 +148,6 @@ am__installdirs = "$(DESTDIR)$(chartsconfigdir)" \ "$(DESTDIR)$(configdir)" "$(DESTDIR)$(healthconfigdir)" \ "$(DESTDIR)$(nodeconfigdir)" "$(DESTDIR)$(pythonconfigdir)" \ "$(DESTDIR)$(statsdconfigdir)" -am__dist_healthconfig_DATA_DIST = health.d/apache.conf \ - health.d/backend.conf health.d/bind_rndc.conf \ - health.d/elasticsearch.conf health.d/fping.conf \ - health.d/haproxy.conf health.d/ipfs.conf health.d/ipmi.conf \ - health.d/isc_dhcpd.conf health.d/lighttpd.conf \ - health.d/mdstat.conf health.d/memcached.conf \ - health.d/mysql.conf health.d/named.conf health.d/mongodb.conf \ - health.d/nginx.conf health.d/postgres.conf health.d/redis.conf \ - health.d/retroshare.conf health.d/squid.conf \ - health.d/varnish.conf health.d/web_log.conf health.d/zfs.conf \ - health.d/cpu.conf health.d/disks.conf health.d/entropy.conf \ - health.d/ipc.conf health.d/memory.conf health.d/net.conf \ - health.d/netfilter.conf health.d/qos.conf health.d/ram.conf \ - health.d/softnet.conf health.d/swap.conf \ - health.d/tcp_resets.conf health.d/udp_errors.conf DATA = $(dist_chartsconfig_DATA) $(dist_config_DATA) \ $(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \ $(dist_pythonconfig_DATA) $(dist_statsdconfig_DATA) @@ -351,6 +320,7 @@ dist_nodeconfig_DATA = \ node.d/named.conf.md \ node.d/sma_webbox.conf.md \ node.d/snmp.conf.md \ + node.d/stiebeleltron.conf.md \ $(NULL) pythonconfigdir = $(configdir)/python.d @@ -358,6 +328,7 @@ dist_pythonconfig_DATA = \ python.d/apache.conf \ python.d/apache_cache.conf \ python.d/bind_rndc.conf \ + python.d/chrony.conf \ python.d/cpufreq.conf \ python.d/dns_query_time.conf \ python.d/dovecot.conf \ @@ -394,17 +365,45 @@ dist_pythonconfig_DATA = \ $(NULL) healthconfigdir = $(configdir)/health.d -dist_healthconfig_DATA = health.d/apache.conf health.d/backend.conf \ - health.d/bind_rndc.conf health.d/elasticsearch.conf \ - health.d/fping.conf health.d/haproxy.conf health.d/ipfs.conf \ - health.d/ipmi.conf health.d/isc_dhcpd.conf \ - health.d/lighttpd.conf health.d/mdstat.conf \ - health.d/memcached.conf health.d/mysql.conf \ - health.d/named.conf health.d/mongodb.conf health.d/nginx.conf \ - health.d/postgres.conf health.d/redis.conf \ - health.d/retroshare.conf health.d/squid.conf \ - health.d/varnish.conf health.d/web_log.conf health.d/zfs.conf \ - $(NULL) $(am__append_1) +dist_healthconfig_DATA = \ + health.d/apache.conf \ + health.d/backend.conf \ + health.d/bind_rndc.conf \ + health.d/cpu.conf \ + health.d/disks.conf \ + health.d/elasticsearch.conf \ + health.d/entropy.conf \ + health.d/fping.conf \ + health.d/haproxy.conf \ + health.d/ipc.conf \ + health.d/ipfs.conf \ + health.d/ipmi.conf \ + health.d/isc_dhcpd.conf \ + health.d/lighttpd.conf \ + health.d/mdstat.conf \ + health.d/memcached.conf \ + health.d/memory.conf \ + health.d/mongodb.conf \ + health.d/mysql.conf \ + health.d/named.conf \ + health.d/net.conf \ + health.d/netfilter.conf \ + health.d/nginx.conf \ + health.d/postgres.conf \ + health.d/qos.conf \ + health.d/ram.conf \ + health.d/redis.conf \ + health.d/retroshare.conf \ + health.d/softnet.conf \ + health.d/squid.conf \ + health.d/swap.conf \ + health.d/tcp_resets.conf \ + health.d/udp_errors.conf \ + health.d/varnish.conf \ + health.d/web_log.conf \ + health.d/zfs.conf \ + $(NULL) + chartsconfigdir = $(configdir)/charts.d dist_chartsconfig_DATA = \ charts.d/apache.conf \ diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf index 30a714097..db6285561 100644 --- a/conf.d/health.d/cpu.conf +++ b/conf.d/health.d/cpu.conf @@ -1,6 +1,10 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + template: 10min_cpu_usage on: system.cpu + os: linux + hosts: * lookup: average -10m unaligned of user,system,softirq,irq,guest units: % every: 1m @@ -12,6 +16,8 @@ template: 10min_cpu_usage template: 10min_cpu_iowait on: system.cpu + os: linux + hosts: * lookup: average -10m unaligned of iowait units: % every: 1m @@ -23,6 +29,8 @@ template: 10min_cpu_iowait template: 20min_steal_cpu on: system.cpu + os: linux + hosts: * lookup: average -20m unaligned of steal units: % every: 5m diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf index 9548f9ee0..63053491e 100644 --- a/conf.d/health.d/disks.conf +++ b/conf.d/health.d/disks.conf @@ -1,3 +1,7 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + + # ----------------------------------------------------------------------------- # low disk space @@ -7,6 +11,8 @@ template: disk_space_usage on: disk.space + os: linux + hosts: * families: * calc: $used * 100 / ($avail + $used) units: % @@ -19,6 +25,8 @@ families: * template: disk_inode_usage on: disk.inodes + os: linux + hosts: * families: * calc: $used * 100 / ($avail + $used) units: % @@ -43,6 +51,8 @@ families: * template: disk_fill_rate on: disk.space + os: linux + hosts: * families: * lookup: min -10m at -50m unaligned of avail calc: ($this - $avail) / (($now - $after) / 3600) @@ -57,6 +67,8 @@ families: * template: out_of_disk_space_time on: disk.space + os: linux + hosts: * families: * calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) units: hours @@ -77,6 +89,8 @@ families: * template: 10min_disk_utilization on: disk.util + os: linux + hosts: * families: * lookup: average -10m unaligned units: % @@ -97,6 +111,8 @@ families: * template: 10min_disk_backlog on: disk.backlog + os: linux + hosts: * families: * lookup: average -10m unaligned units: ms diff --git a/conf.d/health.d/entropy.conf b/conf.d/health.d/entropy.conf index 5dd8af502..66d44ec13 100644 --- a/conf.d/health.d/entropy.conf +++ b/conf.d/health.d/entropy.conf @@ -5,6 +5,8 @@ alarm: lowest_entropy on: system.entropy + os: linux + hosts: * lookup: min -10m unaligned units: entries every: 5m diff --git a/conf.d/health.d/ipc.conf b/conf.d/health.d/ipc.conf index ee7c4badd..03cf264d8 100644 --- a/conf.d/health.d/ipc.conf +++ b/conf.d/health.d/ipc.conf @@ -1,6 +1,10 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + alarm: semaphores_used on: system.ipc_semaphores + os: linux + hosts: * calc: $semaphores * 100 / $ipc.semaphores.max units: % every: 10s @@ -12,6 +16,8 @@ alarm: semaphore_arrays_used on: system.ipc_semaphore_arrays + os: linux + hosts: * calc: $arrays * 100 / $ipc.semaphores.arrays.max units: % every: 10s diff --git a/conf.d/health.d/memory.conf b/conf.d/health.d/memory.conf index 3c904f6b1..4a0e6e522 100644 --- a/conf.d/health.d/memory.conf +++ b/conf.d/health.d/memory.conf @@ -1,6 +1,10 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + alarm: 1hour_ecc_memory_correctable on: mem.ecc_ce + os: linux + hosts: * lookup: sum -10m unaligned units: errors every: 1m @@ -11,6 +15,8 @@ alarm: 1hour_ecc_memory_uncorrectable on: mem.ecc_ue + os: linux + hosts: * lookup: sum -10m unaligned units: errors every: 1m @@ -21,6 +27,8 @@ alarm: 1hour_memory_hw_corrupted on: mem.hwcorrupt + os: linux + hosts: * calc: $HardwareCorrupted units: MB every: 10s diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf index bd288817b..00a198612 100644 --- a/conf.d/health.d/net.conf +++ b/conf.d/health.d/net.conf @@ -1,4 +1,6 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + # ----------------------------------------------------------------------------- # dropped packets @@ -8,48 +10,56 @@ template: inbound_packets_dropped on: net.drops + os: linux + hosts: * families: * lookup: sum -10m unaligned absolute of inbound units: packets every: 1m - warn: $this > 0 + warn: $this >= 5 delay: down 1h multiplier 1.5 max 2h info: interface inbound dropped packets in the last 10 minutes to: sysadmin template: outbound_packets_dropped on: net.drops + os: linux + hosts: * families: * lookup: sum -10m unaligned absolute of outbound units: packets every: 1m - warn: $this > 0 + warn: $this >= 5 delay: down 1h multiplier 1.5 max 2h info: interface outbound dropped packets in the last 10 minutes to: sysadmin template: inbound_packets_dropped_ratio on: net.packets + os: linux + hosts: * families: * lookup: sum -10m unaligned absolute of received calc: (($inbound_packets_dropped != nan AND $this > 0) ? ($inbound_packets_dropped * 100 / $this) : (0)) units: % every: 1m - warn: $this > 0.5 - crit: $this > 3 + warn: $this >= 0.1 + crit: $this >= 2 delay: down 1h multiplier 1.5 max 2h info: the ratio of inbound dropped packets vs the total number of received packets of the network interface, during the last 10 minutes to: sysadmin template: outbound_packets_dropped_ratio on: net.packets + os: linux + hosts: * families: * lookup: sum -10m unaligned absolute of sent calc: (($outbound_packets_dropped != nan AND $this > 0) ? ($outbound_packets_dropped * 100 / $this) : (0)) units: % every: 1m - warn: $this > 0.5 - crit: $this > 3 + warn: $this >= 0.1 + crit: $this >= 2 delay: down 1h multiplier 1.5 max 2h info: the ratio of outbound dropped packets vs the total number of sent packets of the network interface, during the last 10 minutes to: sysadmin @@ -65,6 +75,8 @@ families: * template: 10min_fifo_errors on: net.fifo + os: linux + hosts: * families: * lookup: sum -10m unaligned absolute units: errors @@ -86,6 +98,8 @@ families: * template: 1m_received_packets_rate on: net.packets + os: linux + hosts: * families: * lookup: average -1m of received units: packets @@ -94,6 +108,8 @@ families: * template: 10s_received_packets_storm on: net.packets + os: linux + hosts: * families: * lookup: average -10s of received calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) diff --git a/conf.d/health.d/netfilter.conf b/conf.d/health.d/netfilter.conf index 3dd6a67b3..fa1732b33 100644 --- a/conf.d/health.d/netfilter.conf +++ b/conf.d/health.d/netfilter.conf @@ -1,6 +1,10 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + alarm: netfilter_last_collected_secs on: netfilter.conntrack_sockets + os: linux + hosts: * calc: $now - $last_collected_t units: seconds ago every: 10s @@ -12,6 +16,8 @@ alarm: netfilter_conntrack_full on: netfilter.conntrack_sockets + os: linux + hosts: * lookup: max -10s unaligned of connections calc: $this * 100 / $netfilter.conntrack.max units: % diff --git a/conf.d/health.d/qos.conf b/conf.d/health.d/qos.conf index 9e5939fdc..7290d15ff 100644 --- a/conf.d/health.d/qos.conf +++ b/conf.d/health.d/qos.conf @@ -1,10 +1,14 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + # check if a QoS class is dropping packets # the alarm is checked every 10 seconds # and examines the last minute of data #template: 10min_qos_packet_drops # on: tc.qos_dropped +# os: linux +# hosts: * # lookup: sum -10m unaligned absolute # every: 30s # warn: $this > 0 diff --git a/conf.d/health.d/ram.conf b/conf.d/health.d/ram.conf index b99e5e226..8d0e8838d 100644 --- a/conf.d/health.d/ram.conf +++ b/conf.d/health.d/ram.conf @@ -1,12 +1,18 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + alarm: used_ram_to_ignore on: system.ram + os: linux + hosts: * calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz) every: 10s info: the amount of memory that is reported as used, but it is actually capable for resizing itself based on the system needs (eg. ZFS ARC) alarm: ram_in_use on: system.ram + os: linux + hosts: * # calc: $used * 100 / ($used + $cached + $free) calc: ($used - $used_ram_to_ignore) * 100 / ($used - $used_ram_to_ignore + $cached + $free) units: % diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf index 5faf9a9ee..64e1c6784 100644 --- a/conf.d/health.d/softnet.conf +++ b/conf.d/health.d/softnet.conf @@ -1,7 +1,12 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + # check for common /proc/net/softnet_stat errors alarm: 10min_netdev_backlog_exceeded on: system.softnet_stat + os: linux + hosts: * lookup: sum -10m unaligned absolute of dropped units: packets every: 1m @@ -12,6 +17,8 @@ alarm: 10min_netdev_budget_ran_outs on: system.softnet_stat + os: linux + hosts: * lookup: sum -10m unaligned absolute of squeezed units: events every: 1m diff --git a/conf.d/health.d/swap.conf b/conf.d/health.d/swap.conf index 7f57560e2..830a9af95 100644 --- a/conf.d/health.d/swap.conf +++ b/conf.d/health.d/swap.conf @@ -1,6 +1,10 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + alarm: 30min_ram_swapped_out on: system.swapio + os: linux + hosts: * lookup: sum -30m unaligned absolute of out # we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024 calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free ) @@ -14,6 +18,8 @@ alarm: ram_in_swap on: system.swap + os: linux + hosts: * calc: $used * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free ) units: % of RAM every: 10s @@ -25,6 +31,8 @@ alarm: used_swap on: system.swap + os: linux + hosts: * calc: $used * 100 / ( $used + $free ) units: % every: 10s diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf index 803c88a81..fec124ac7 100644 --- a/conf.d/health.d/tcp_resets.conf +++ b/conf.d/health.d/tcp_resets.conf @@ -1,7 +1,12 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + # ----------------------------------------------------------------------------- alarm: ipv4_tcphandshake_last_collected_secs on: ipv4.tcphandshake + os: linux + hosts: * calc: $now - $last_collected_t units: seconds ago every: 10s @@ -16,6 +21,8 @@ alarm: 1m_ipv4_tcp_resets_sent on: ipv4.tcphandshake + os: linux + hosts: * lookup: average -1m at -10s unaligned absolute of OutRsts units: tcp resets/s every: 10s @@ -23,6 +30,8 @@ alarm: 10s_ipv4_tcp_resets_sent on: ipv4.tcphandshake + os: linux + hosts: * lookup: average -10s unaligned absolute of OutRsts units: tcp resets/s every: 10s @@ -37,6 +46,8 @@ options: no-clear-notification alarm: 1m_ipv4_tcp_resets_received on: ipv4.tcphandshake + os: linux + hosts: * lookup: average -1m at -10s unaligned absolute of AttemptFails units: tcp resets/s every: 10s @@ -44,6 +55,8 @@ options: no-clear-notification alarm: 10s_ipv4_tcp_resets_received on: ipv4.tcphandshake + os: linux + hosts: * lookup: average -10s unaligned absolute of AttemptFails units: tcp resets/s every: 10s diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf index 98e955c02..33338b83e 100644 --- a/conf.d/health.d/udp_errors.conf +++ b/conf.d/health.d/udp_errors.conf @@ -1,7 +1,12 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + # ----------------------------------------------------------------------------- alarm: ipv4_udperrors_last_collected_secs on: ipv4.udperrors + os: linux + hosts: * calc: $now - $last_collected_t units: seconds ago every: 10s @@ -16,6 +21,8 @@ alarm: 1m_ipv4_udp_receive_buffer_errors on: ipv4.udperrors + os: linux + hosts: * lookup: sum -1m unaligned absolute of RcvbufErrors units: errors every: 10s @@ -30,6 +37,8 @@ alarm: 1m_ipv4_udp_send_buffer_errors on: ipv4.udperrors + os: linux + hosts: * lookup: sum -1m unaligned absolute of SndbufErrors units: errors every: 10s diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf index 4d8444ed5..641272ced 100644 --- a/conf.d/health_alarm_notify.conf +++ b/conf.d/health_alarm_notify.conf @@ -94,6 +94,15 @@ curl="" # multiple recipients can be given like this: # "admin1@example.com admin2@example.com ..." +# the email address sending email notifications +# the default is the system user netdata runs as (usually: netdata) +# The following formats are supported: +# EMAIL_SENDER="user@domain" +# EMAIL_SENDER="User Name <user@domain>" +# EMAIL_SENDER="'User Name' <user@domain>" +# EMAIL_SENDER="\"User Name\" <user@domain>" +EMAIL_SENDER="" + # enable/disable sending emails SEND_EMAIL="YES" @@ -101,6 +110,13 @@ SEND_EMAIL="YES" DEFAULT_RECIPIENT_EMAIL="root" # to receive only critical alarms, set it to "root|critical" +# Optionally specify the encoding to list in the Content-Type header. +# This doesn't change what encoding the e-mail is sent with, just what +# the headers say it was encoded as. +# This shouldn't need to be changed as it will almost always be +# autodetected from the environment. +#EMAIL_CHARSET="UTF-8" + #------------------------------------------------------------------------------ # pushover (pushover.net) global notification options diff --git a/conf.d/node.d/fronius.conf.md b/conf.d/node.d/fronius.conf.md index c80afa0b5..622086b27 100644 --- a/conf.d/node.d/fronius.conf.md +++ b/conf.d/node.d/fronius.conf.md @@ -16,7 +16,7 @@ The module supports any number of servers. Sometimes there is a lag when collect "update_every": 5, "servers": [ { - "name": "Solar", + "name": "solar", "hostname": "symo.ip.or.dns", "update_every": 5, "api_path": "/solar_api/v1/GetPowerFlowRealtimeData.fcgi" diff --git a/conf.d/node.d/stiebeleltron.conf.md b/conf.d/node.d/stiebeleltron.conf.md new file mode 100644 index 000000000..6ae5aa1c7 --- /dev/null +++ b/conf.d/node.d/stiebeleltron.conf.md @@ -0,0 +1,453 @@ +[Stiebel Eltron Heat pump system with ISG](https://www.stiebel-eltron.com/en/home/products-solutions/renewables/controller_energymanagement/internet_servicegateway/isg_web.html) + +Original author: BrainDoctor (github) + +The module supports any metrics that are parseable with RegEx. There is no API that gives direct access to the values (AFAIK), so the "workaround" is to parse the HTML output of the ISG. + +### Testing +This plugin has been tested within the following environment: + * ISG version: 8.5.6 + * MFG version: 12 + * Controller version: 9 + * July (summer time, not much activity) + * Interface language: English + * login- and password-less ISG web access (without HTTPS it's useless anyway) + * Heatpump model: WPL 25 I-2 + * Hot water boiler model: 820 WT 1 + +So, if the language is set to english, copy the following configuration into `/etc/netdata/node.d/stiebeleltron.conf` and change the `url`s. + +In my case, the ISG is relatively slow with responding (at least 1s, but also up to 4s). Collecting metrics every 10s is more than enough for me. + +### How to update the config + +* The dimensions support variable digits, the default is `1`. Most of the values printed by ISG are using 1 digit, some use 2. +* The dimensions also support the `multiplier` and `divisor` attributes, however the divisor gets overridden by `digits`, if specified. Default is `1`. +* The test string for the regex is always the whole HTML output from the url. For each parameter you need to have a regular expression that extracts the value from the HTML source in the first capture group. + Recommended: [regexr.com](regexr.com) for testing and matching, [freeformatter.com](https://www.freeformatter.com/json-escape.html) for escaping the newly created regex for the JSON config. + +The charts are being generated using the configuration below. So if your installation is in another language or has other metrics, just adapt the structure or regexes. +### Configuration template +```json +{ + "enable_autodetect": false, + "update_every": 10, + "pages": [ + { + "name": "System", + "id": "system", + "url": "http://machine.ip.or.dns/?s=1,0", + "update_every": 10, + "categories": [ + { + "id": "eletricreheating", + "name": "electric reheating", + "charts": [ + { + "title": "Dual Mode Reheating Temperature", + "id": "reheatingtemp", + "unit": "Celsius", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Heating", + "id": "dualmodeheatingtemp", + "regex": "DUAL MODE TEMP HEATING<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + }, + { + "name": "Hot Water", + "id" : "dualmodehotwatertemp", + "regex": "DUAL MODE TEMP DHW<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + } + ] + }, + { + "id": "roomtemp", + "name": "room temperature", + "charts": [ + { + "title": "Heat Circuit 1", + "id": "hc1", + "unit": "Celsius", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "<tr class=\"even\">\\s*<td.*>ACTUAL TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + }, + { + "name": "Set", + "id" : "set", + "regex": "<tr class=\"odd\">\\s*<td.*>SET TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + } + ] + }, + { + "title": "Heat Circuit 2", + "id": "hc2", + "unit": "Celsius", + "type": "line", + "prio": 2, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "<tr class=\"even\">\\s*<td.*>ACTUAL TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + }, + { + "name": "Set", + "id" : "set", + "regex": "<tr class=\"odd\">\\s*<td.*>SET TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + } + ] + } + ] + }, + { + "id": "heating", + "name": "heating", + "charts": [ + { + "title": "Heat Circuit 1", + "id": "hc1", + "unit": "Celsius", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "<tr class=\"odd\">\\s*<td.*>ACTUAL TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + }, + { + "name": "Set", + "id" : "set", + "regex": "<tr class=\"even\">\\s*<td.*>SET TEMPERATURE HC 1<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + } + ] + }, + { + "title": "Heat Circuit 2", + "id": "hc2", + "unit": "Celsius", + "type": "line", + "prio": 2, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "<tr class=\"odd\">\\s*<td.*>ACTUAL TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + }, + { + "name": "Set", + "id" : "set", + "regex": "<tr class=\"even\">\\s*<td.*>SET TEMPERATURE HC 2<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + } + ] + }, + { + "title": "Flow Temperature", + "id": "flowtemp", + "unit": "Celsius", + "type": "line", + "prio": 3, + "dimensions": [ + { + "name": "Heating", + "id": "heating", + "regex": "ACTUAL FLOW TEMPERATURE WP<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + }, + { + "name": "Reheating", + "id" : "reheating", + "regex": "ACTUAL FLOW TEMPERATURE NHZ<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + }, + { + "title": "Buffer Temperature", + "id": "buffertemp", + "unit": "Celsius", + "type": "line", + "prio": 4, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "ACTUAL BUFFER TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + }, + { + "name": "Set", + "id" : "set", + "regex": "SET BUFFER TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + }, + { + "title": "Fixed Temperature", + "id": "fixedtemp", + "unit": "Celsius", + "type": "line", + "prio": 5, + "dimensions": [ + { + "name": "Set", + "id" : "setfixed", + "regex": "SET FIXED TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + }, + { + "title": "Pre-flow Temperature", + "id": "preflowtemp", + "unit": "Celsius", + "type": "line", + "prio": 6, + "dimensions": [ + { + "name": "Actual", + "id": "actualreturn", + "regex": "ACTUAL RETURN TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + } + ] + }, + { + "id": "hotwater", + "name": "hot water", + "charts": [ + { + "title": "Hot Water Temperature", + "id": "hotwatertemp", + "unit": "Celsius", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Actual", + "id": "actual", + "regex": "ACTUAL TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + }, + { + "name": "Set", + "id" : "set", + "regex": "SET TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + } + ] + }, + { + "id": "general", + "name": "general", + "charts": [ + { + "title": "Outside Temperature", + "id": "outside", + "unit": "Celsius", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Outside temperature", + "id": "outsidetemp", + "regex": "OUTSIDE TEMPERATURE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>\\s*<\\\/tr>" + } + ] + }, + { + "title": "Condenser Temperature", + "id": "condenser", + "unit": "Celsius", + "type": "line", + "prio": 2, + "dimensions": [ + { + "name": "Condenser", + "id": "condenser", + "regex": "CONDENSER TEMP\\.<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + }, + { + "title": "Heating Circuit Pressure", + "id": "heatingcircuit", + "unit": "bar", + "type": "line", + "prio": 3, + "dimensions": [ + { + "name": "Heating Circuit", + "id": "heatingcircuit", + "digits": 2, + "regex": "PRESSURE HTG CIRC<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]*).*<\\\/td>" + } + ] + }, + { + "title": "Flow Rate", + "id": "flowrate", + "unit": "liters/min", + "type": "line", + "prio": 4, + "dimensions": [ + { + "name": "Flow Rate", + "id": "flowrate", + "digits": 2, + "regex": "FLOW RATE<\\\/td>\\s*<td.*>(-?[0-9]+,[0-9]+).*<\\\/td>" + } + ] + }, + { + "title": "Output", + "id": "output", + "unit": "%", + "type": "line", + "prio": 5, + "dimensions": [ + { + "name": "Heat Pump", + "id": "outputheatpump", + "regex": "OUTPUT HP<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*).*<\\\/td>" + }, + { + "name": "Water Pump", + "id": "intpumprate", + "regex": "INT PUMP RATE<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*).*<\\\/td>" + } + ] + } + ] + } + ] + }, + { + "name": "Heat Pump", + "id": "heatpump", + "url": "http://machine.ip.or.dns/?s=1,1", + "update_every": 10, + "categories": [ + { + "id": "runtime", + "name": "runtime", + "charts": [ + { + "title": "Compressor", + "id": "compressor", + "unit": "h", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Heating", + "id": "heating", + "regex": "RNT COMP 1 HEA<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + }, + { + "name": "Hot Water", + "id" : "hotwater", + "regex": "RNT COMP 1 DHW<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + } + ] + }, + { + "title": "Reheating", + "id": "reheating", + "unit": "h", + "type": "line", + "prio": 2, + "dimensions": [ + { + "name": "Reheating 1", + "id": "rh1", + "regex": "BH 1<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + }, + { + "name": "Reheating 2", + "id" : "rh2", + "regex": "BH 2<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + } + ] + } + ] + }, + { + "id": "processdata", + "name": "process data", + "charts": [ + { + "title": "Remaining Compressor Rest Time", + "id": "remaincomp", + "unit": "s", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Timer", + "id": "timer", + "regex": "COMP DLAY CNTR<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + } + ] + } + ] + }, + { + "id": "energy", + "name": "energy", + "charts": [ + { + "title": "Compressor Today", + "id": "compressorday", + "unit": "kWh", + "type": "line", + "prio": 1, + "dimensions": [ + { + "name": "Heating", + "id": "heating", + "digits": 3, + "regex": "COMPRESSOR HEATING DAY<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + }, + { + "name": "Hot Water", + "id": "hotwater", + "digits": 3, + "regex": "COMPRESSOR DHW DAY<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + } + ] + }, + { + "title": "Compressor Total", + "id": "compressortotal", + "unit": "MWh", + "type": "line", + "prio": 2, + "dimensions": [ + { + "name": "Heating", + "id": "heating", + "digits": 3, + "regex": "COMPRESSOR HEATING TOTAL<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + }, + { + "name": "Hot Water", + "id": "hotwater", + "digits": 3, + "regex": "COMPRESSOR DHW TOTAL<\\\/td>\\s*<td.*>(-?[0-9]+,?[0-9]*)" + } + ] + } + ] + } + ] + } + ] +} +``` diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf index 0a37e40ae..741d49914 100644 --- a/conf.d/python.d.conf +++ b/conf.d/python.d.conf @@ -30,6 +30,7 @@ log_interval: 3600 apache_cache: no # apache: yes # bind_rndc: yes +# chrony: yes # cpufreq: yes # cpuidle: yes # dns_query_time: yes diff --git a/conf.d/python.d/chrony.conf b/conf.d/python.d/chrony.conf new file mode 100644 index 000000000..46229687b --- /dev/null +++ b/conf.d/python.d/chrony.conf @@ -0,0 +1,72 @@ +# netdata python.d.plugin configuration for chrony +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 5 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 5 # the JOB's number of restoration attempts +# +# Additionally to the above, chrony also supports the following: +# +# command: 'chrony tracking' # the command to run +# + +# ---------------------------------------------------------------------- +# REQUIRED chrony CONFIGURATION +# +# netdata will query chrony as user netdata. +# verify that user netdata is allowed to call 'chronyc tracking' +# Check cmdallow in chrony.conf +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS + +local: + command: 'chronyc -n tracking' diff --git a/conf.d/python.d/postgres.conf b/conf.d/python.d/postgres.conf index 12dddae67..1dbb64f40 100644 --- a/conf.d/python.d/postgres.conf +++ b/conf.d/python.d/postgres.conf @@ -75,6 +75,9 @@ # a postgres user for netdata and add its password below to allow # netdata connect. # +# Without superuser access, netdata won't be able to generate the write +# ahead log and the background writer charts. +# # ---------------------------------------------------------------------- socket: diff --git a/conf.d/python.d/tomcat.conf b/conf.d/python.d/tomcat.conf index aef9631b9..ce89175f6 100644 --- a/conf.d/python.d/tomcat.conf +++ b/conf.d/python.d/tomcat.conf @@ -63,7 +63,10 @@ # user: 'username' # pass: 'password' # - +# if you have multiple connectors, the following are supported: +# +# connector_name: 'ajp-bio-8009' # default is null, which use first connector in status XML +# # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS # only one of them will run (they have the same name) diff --git a/conf.d/python.d/web_log.conf b/conf.d/python.d/web_log.conf index e51b565d6..cd1f1af00 100644 --- a/conf.d/python.d/web_log.conf +++ b/conf.d/python.d/web_log.conf @@ -76,7 +76,7 @@ # observium: 'observium.*' # name(dimension): REGEX to match # stub_status: 'stub_status' # name(dimension): REGEX to match # custom_log_format: # define a custom log format -# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d\.\d+) ' +# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d+\.\d+) ' # time_multiplier: 1000000 # type <int> - convert time to microseconds # ---------------------------------------------------------------------- |