diff options
Diffstat (limited to '')
-rw-r--r-- | health/health.d/boinc.conf | 4 | ||||
-rw-r--r-- | health/health.d/btrfs.conf | 9 | ||||
-rw-r--r-- | health/health.d/cockroachdb.conf | 10 | ||||
-rw-r--r-- | health/health.d/disks.conf | 10 | ||||
-rw-r--r-- | health/health.d/exporting.conf | 2 | ||||
-rw-r--r-- | health/health.d/httpcheck.conf | 5 | ||||
-rw-r--r-- | health/health.d/ioping.conf | 1 | ||||
-rw-r--r-- | health/health.d/mdstat.conf | 2 | ||||
-rw-r--r-- | health/health.d/net.conf | 18 | ||||
-rw-r--r-- | health/health.d/nvme.conf | 1 | ||||
-rw-r--r-- | health/health.d/ping.conf | 3 | ||||
-rw-r--r-- | health/health.d/plugin.conf | 11 | ||||
-rw-r--r-- | health/health.d/portcheck.conf | 3 | ||||
-rw-r--r-- | health/health.d/redis.conf | 4 | ||||
-rw-r--r-- | health/health.d/vsphere.conf | 8 | ||||
-rw-r--r-- | health/health.d/web_log.conf | 12 | ||||
-rw-r--r-- | health/health.d/windows.conf | 4 |
17 files changed, 23 insertions, 84 deletions
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf index 7d7a4fdae..6f37787d7 100644 --- a/health/health.d/boinc.conf +++ b/health/health.d/boinc.conf @@ -8,7 +8,6 @@ component: BOINC os: * hosts: * - families: * lookup: average -10m unaligned of comperror units: tasks every: 1m @@ -26,7 +25,6 @@ component: BOINC component: BOINC os: * hosts: * - families: * lookup: average -10m unaligned of upload_failed units: tasks every: 1m @@ -44,7 +42,6 @@ component: BOINC component: BOINC os: * hosts: * - families: * lookup: average -10m unaligned of total units: tasks every: 1m @@ -62,7 +59,6 @@ component: BOINC component: BOINC os: * hosts: * - families: * lookup: average -10m unaligned of active calc: ($boinc_total_tasks >= 1) ? ($this) : (inf) units: tasks diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf index ab63ff28d..97b7a3a94 100644 --- a/health/health.d/btrfs.conf +++ b/health/health.d/btrfs.conf @@ -6,7 +6,6 @@ component: File system os: * hosts: * - families: * calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free)) units: % every: 10s @@ -23,7 +22,6 @@ component: File system component: File system os: * hosts: * - families: * calc: $used * 100 / ($used + $free) units: % every: 10s @@ -40,7 +38,6 @@ component: File system component: File system os: * hosts: * - families: * calc: ($used + $reserved) * 100 / ($used + $free + $reserved) units: % every: 10s @@ -57,7 +54,6 @@ component: File system component: File system os: * hosts: * - families: * calc: $used * 100 / ($used + $free) units: % every: 10s @@ -74,7 +70,6 @@ component: File system component: File system os: * hosts: * - families: * units: errors lookup: max -10m every 1m of read_errs warn: $this > 0 @@ -89,7 +84,6 @@ component: File system component: File system os: * hosts: * - families: * units: errors lookup: max -10m every 1m of write_errs warn: $this > 0 @@ -104,7 +98,6 @@ component: File system component: File system os: * hosts: * - families: * units: errors lookup: max -10m every 1m of flush_errs warn: $this > 0 @@ -119,7 +112,6 @@ component: File system component: File system os: * hosts: * - families: * units: errors lookup: max -10m every 1m of corruption_errs warn: $this > 0 @@ -134,7 +126,6 @@ component: File system component: File system os: * hosts: * - families: * units: errors lookup: max -10m every 1m of generation_errs warn: $this > 0 diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf index 1f227841e..09e4f9d40 100644 --- a/health/health.d/cockroachdb.conf +++ b/health/health.d/cockroachdb.conf @@ -6,7 +6,7 @@ class: Utilization type: Database component: CockroachDB - calc: $capacity_used_percent + calc: $total units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (85)) @@ -20,7 +20,7 @@ component: CockroachDB class: Utilization type: Database component: CockroachDB - calc: $capacity_usable_used_percent + calc: $usable units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (85)) @@ -36,7 +36,7 @@ component: CockroachDB class: Errors type: Database component: CockroachDB - calc: $ranges_unavailable + calc: $unavailable units: num every: 10s warn: $this > 0 @@ -49,7 +49,7 @@ component: CockroachDB class: Errors type: Database component: CockroachDB - calc: $ranges_underreplicated + calc: $under_replicated units: num every: 10s warn: $this > 0 @@ -64,7 +64,7 @@ component: CockroachDB class: Utilization type: Database component: CockroachDB - calc: $sys_fd_open/$sys_fd_softlimit * 100 + calc: $open/$sys_fd_softlimit * 100 units: % every: 10s warn: $this > 80 diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf index fd207fbc1..7bd4f120c 100644 --- a/health/health.d/disks.conf +++ b/health/health.d/disks.conf @@ -16,7 +16,7 @@ component: Disk os: linux freebsd hosts: * - families: !/dev !/dev/* !/run !/run/* * +chart labels: mount_point=!/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % every: 1m @@ -33,7 +33,7 @@ component: Disk component: Disk os: linux freebsd hosts: * - families: !/dev !/dev/* !/run !/run/* * +chart labels: mount_point=!/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % every: 1m @@ -59,7 +59,6 @@ component: Disk # on: disk.space # os: linux freebsd # hosts: * -# families: * # lookup: min -10m at -50m unaligned of avail # calc: ($this - $avail) / (($now - $after) / 3600) # every: 1m @@ -75,7 +74,6 @@ component: Disk # on: disk.space # os: linux freebsd # hosts: * -# families: * # calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) # units: hours # every: 10s @@ -101,7 +99,6 @@ component: Disk # on: disk.inodes # os: linux freebsd # hosts: * -# families: * # lookup: min -10m at -50m unaligned of avail # calc: ($this - $avail) / (($now - $after) / 3600) # every: 1m @@ -116,7 +113,6 @@ component: Disk # on: disk.inodes # os: linux freebsd # hosts: * -# families: * # calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf) # units: hours # every: 10s @@ -141,7 +137,6 @@ component: Disk component: Disk os: linux freebsd hosts: * - families: * lookup: average -10m unaligned units: % every: 1m @@ -163,7 +158,6 @@ component: Disk component: Disk os: linux hosts: * - families: * lookup: average -10m unaligned units: ms every: 1m diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf index 06f398c6e..f1030a317 100644 --- a/health/health.d/exporting.conf +++ b/health/health.d/exporting.conf @@ -1,6 +1,5 @@ template: exporting_last_buffering - families: * on: exporting_data_size class: Latency type: Netdata @@ -15,7 +14,6 @@ component: Exporting engine to: dba template: exporting_metrics_sent - families: * on: exporting_data_size class: Workload type: Netdata diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf index 2008b000d..81748b9e0 100644 --- a/health/health.d/httpcheck.conf +++ b/health/health.d/httpcheck.conf @@ -1,7 +1,6 @@ # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges template: httpcheck_web_service_up - families: * on: httpcheck.status class: Utilization type: Web Server @@ -14,7 +13,6 @@ component: HTTP endpoint to: silent template: httpcheck_web_service_bad_content - families: * on: httpcheck.status class: Workload type: Web Server @@ -29,7 +27,6 @@ component: HTTP endpoint to: webmaster template: httpcheck_web_service_bad_status - families: * on: httpcheck.status class: Workload type: Web Server @@ -44,7 +41,6 @@ component: HTTP endpoint to: webmaster template: httpcheck_web_service_timeouts - families: * on: httpcheck.status class: Latency type: Web Server @@ -59,7 +55,6 @@ component: HTTP endpoint to: webmaster template: httpcheck_web_service_no_connection - families: * on: httpcheck.status class: Errors type: Other diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf index 8b498ad3c..2786cbd62 100644 --- a/health/health.d/ioping.conf +++ b/health/health.d/ioping.conf @@ -1,5 +1,4 @@ template: ioping_disk_latency - families: * on: ioping.latency class: Latency type: System diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf index ed980a26a..b90455a58 100644 --- a/health/health.d/mdstat.conf +++ b/health/health.d/mdstat.conf @@ -29,7 +29,7 @@ component: RAID class: Errors type: System component: RAID - families: !*(raid1) !*(raid10) * +chart labels: raid_level=!raid1 !raid10 * units: unsynchronized blocks calc: $count every: 60s diff --git a/health/health.d/net.conf b/health/health.d/net.conf index a0723f303..08a4eecb4 100644 --- a/health/health.d/net.conf +++ b/health/health.d/net.conf @@ -11,7 +11,6 @@ component: Network os: * hosts: * - families: * calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max) : ( nan ) units: Mbit every: 10s @@ -24,7 +23,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: average -1m unaligned absolute of received calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan ) units: % @@ -41,7 +39,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: average -1m unaligned absolute of sent calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan ) units: % @@ -68,7 +65,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute of inbound units: packets every: 1m @@ -81,7 +77,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute of outbound units: packets every: 1m @@ -94,7 +89,7 @@ component: Network component: Network os: linux hosts: * - families: !wl* * +chart labels: device=!wl* * lookup: sum -10m unaligned absolute of received calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0)) units: % @@ -111,7 +106,7 @@ component: Network component: Network os: linux hosts: * - families: !wl* * +chart labels: device=!wl* * lookup: sum -10m unaligned absolute of sent calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) units: % @@ -128,7 +123,7 @@ component: Network component: Network os: linux hosts: * - families: wl* +chart labels: device=wl* lookup: sum -10m unaligned absolute of received calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0)) units: % @@ -145,7 +140,7 @@ component: Network component: Network os: linux hosts: * - families: wl* +chart labels: device=wl* lookup: sum -10m unaligned absolute of sent calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) units: % @@ -165,7 +160,6 @@ component: Network component: Network os: freebsd hosts: * - families: * lookup: sum -10m unaligned absolute of inbound units: errors every: 1m @@ -181,7 +175,6 @@ component: Network component: Network os: freebsd hosts: * - families: * lookup: sum -10m unaligned absolute of outbound units: errors every: 1m @@ -205,7 +198,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute units: errors every: 1m @@ -230,7 +222,6 @@ component: Network component: Network os: linux freebsd hosts: * - families: * lookup: average -1m unaligned of received units: packets every: 10s @@ -243,7 +234,6 @@ component: Network component: Network os: linux freebsd hosts: * - families: * lookup: average -10s unaligned of received calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) every: 10s diff --git a/health/health.d/nvme.conf b/health/health.d/nvme.conf index b7c0e6fd4..742ffbc93 100644 --- a/health/health.d/nvme.conf +++ b/health/health.d/nvme.conf @@ -1,7 +1,6 @@ # you can disable an alarm notification by setting the 'to' line to: silent template: nvme_device_critical_warnings_state - families: * on: nvme.device_critical_warnings_state class: Errors type: System diff --git a/health/health.d/ping.conf b/health/health.d/ping.conf index fa8213ad3..b8d39bbad 100644 --- a/health/health.d/ping.conf +++ b/health/health.d/ping.conf @@ -1,7 +1,6 @@ # you can disable an alarm notification by setting the 'to' line to: silent template: ping_host_reachable - families: * on: ping.host_packet_loss class: Errors type: Other @@ -16,7 +15,6 @@ component: Network to: sysadmin template: ping_packet_loss - families: * on: ping.host_packet_loss class: Errors type: Other @@ -33,7 +31,6 @@ component: Network to: sysadmin template: ping_host_latency - families: * on: ping.host_rtt class: Latency type: Other diff --git a/health/health.d/plugin.conf b/health/health.d/plugin.conf new file mode 100644 index 000000000..0a891db79 --- /dev/null +++ b/health/health.d/plugin.conf @@ -0,0 +1,11 @@ + template: plugin_availability_status + on: netdata.plugin_availability_status + class: Errors + type: Netdata + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : (20 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: the amount of time that ${label:_collect_plugin} did not report its availability status + to: sysadmin diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf index e8908404c..34550ea02 100644 --- a/health/health.d/portcheck.conf +++ b/health/health.d/portcheck.conf @@ -1,7 +1,6 @@ # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges template: portcheck_service_reachable - families: * on: portcheck.status class: Workload type: Other @@ -14,7 +13,6 @@ component: TCP endpoint to: silent template: portcheck_connection_timeouts - families: * on: portcheck.status class: Errors type: Other @@ -29,7 +27,6 @@ component: TCP endpoint to: sysadmin template: portcheck_connection_fails - families: * on: portcheck.status class: Errors type: Other diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf index 34d00b5df..a58fa34d1 100644 --- a/health/health.d/redis.conf +++ b/health/health.d/redis.conf @@ -1,7 +1,6 @@ # you can disable an alarm notification by setting the 'to' line to: silent template: redis_connections_rejected - families: * on: redis.connections class: Errors type: KV Storage @@ -15,7 +14,6 @@ component: Redis to: dba template: redis_bgsave_broken - families: * on: redis.bgsave_health class: Errors type: KV Storage @@ -28,7 +26,6 @@ component: Redis to: dba template: redis_bgsave_slow - families: * on: redis.bgsave_now class: Latency type: KV Storage @@ -43,7 +40,6 @@ component: Redis to: dba template: redis_master_link_down - families: * on: redis.master_link_down_since_time class: Errors type: KV Storage diff --git a/health/health.d/vsphere.conf b/health/health.d/vsphere.conf index d8fc899b9..1d8be6cb5 100644 --- a/health/health.d/vsphere.conf +++ b/health/health.d/vsphere.conf @@ -43,7 +43,6 @@ component: Memory type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of rx units: packets every: 1m @@ -55,7 +54,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of tx units: packets every: 1m @@ -69,7 +67,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of rx calc: (($vsphere_inbound_packets_errors != nan AND $this > 1000) ? ($vsphere_inbound_packets_errors * 100 / $this) : (0)) units: % @@ -85,7 +82,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of tx calc: (($vsphere_outbound_packets_errors != nan AND $this > 1000) ? ($vsphere_outbound_packets_errors * 100 / $this) : (0)) units: % @@ -121,7 +117,6 @@ component: CPU type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of rx units: packets every: 1m @@ -133,7 +128,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of tx units: packets every: 1m @@ -147,7 +141,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of rx calc: (($vsphere_inbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_inbound_packets_dropped * 100 / $this) : (0)) units: % @@ -163,7 +156,6 @@ component: Network type: Virtual Machine component: Network hosts: * - families: * lookup: sum -10m unaligned absolute match-names of tx calc: (($vsphere_outbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_outbound_packets_dropped * 100 / $this) : (0)) units: % diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf index c33c4664c..3fd01831b 100644 --- a/health/health.d/web_log.conf +++ b/health/health.d/web_log.conf @@ -13,7 +13,6 @@ class: Workload type: Web Server component: Web log - families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) units: requests @@ -25,7 +24,6 @@ component: Web log class: Errors type: Web Server component: Web log - families: * lookup: sum -1m unaligned of unmatched calc: $this * 100 / $web_log_1m_total_requests units: % @@ -50,7 +48,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) units: requests @@ -62,7 +59,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * lookup: sum -1m unaligned of success calc: $this * 100 / $web_log_1m_requests units: % @@ -78,7 +74,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * lookup: sum -1m unaligned of redirect calc: $this * 100 / $web_log_1m_requests units: % @@ -93,7 +88,6 @@ component: Web log class: Errors type: Web Server component: Web log - families: * lookup: sum -1m unaligned of bad calc: $this * 100 / $web_log_1m_requests units: % @@ -108,7 +102,6 @@ component: Web log class: Errors type: Web Server component: Web log - families: * lookup: sum -1m unaligned of error calc: $this * 100 / $web_log_1m_requests units: % @@ -134,7 +127,6 @@ component: Web log class: Latency type: System component: Web log - families: * lookup: average -10m unaligned of avg units: ms every: 30s @@ -145,7 +137,6 @@ component: Web log class: Latency type: Web Server component: Web log - families: * lookup: average -1m unaligned of avg units: ms every: 10s @@ -174,7 +165,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * lookup: average -5m at -5m unaligned of success units: requests/s every: 30s @@ -185,7 +175,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * lookup: average -5m unaligned of success units: requests/s every: 30s @@ -196,7 +185,6 @@ component: Web log class: Workload type: Web Server component: Web log - families: * calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100) units: % every: 30s diff --git a/health/health.d/windows.conf b/health/health.d/windows.conf index d678ac3ae..d4bc7639c 100644 --- a/health/health.d/windows.conf +++ b/health/health.d/windows.conf @@ -62,7 +62,6 @@ component: Memory component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute match-names of inbound units: packets every: 1m @@ -78,7 +77,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute match-names of outbound units: packets every: 1m @@ -94,7 +92,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute match-names of inbound units: packets every: 1m @@ -110,7 +107,6 @@ component: Network component: Network os: linux hosts: * - families: * lookup: sum -10m unaligned absolute match-names of outbound units: packets every: 1m |