summaryrefslogtreecommitdiffstats
path: root/health/health.d
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-14 19:20:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-14 19:20:36 +0000
commitdd24e74edfbafc09eaeb2dde0fda7eb3e1e86d0b (patch)
tree1e52f4dac2622ab377c7649f218fb49003b4cbb9 /health/health.d
parentReleasing debian version 1.39.1-2. (diff)
downloadnetdata-dd24e74edfbafc09eaeb2dde0fda7eb3e1e86d0b.tar.xz
netdata-dd24e74edfbafc09eaeb2dde0fda7eb3e1e86d0b.zip
Merging upstream version 1.40.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--health/health.d/boinc.conf4
-rw-r--r--health/health.d/btrfs.conf9
-rw-r--r--health/health.d/cockroachdb.conf10
-rw-r--r--health/health.d/disks.conf10
-rw-r--r--health/health.d/exporting.conf2
-rw-r--r--health/health.d/httpcheck.conf5
-rw-r--r--health/health.d/ioping.conf1
-rw-r--r--health/health.d/mdstat.conf2
-rw-r--r--health/health.d/net.conf18
-rw-r--r--health/health.d/nvme.conf1
-rw-r--r--health/health.d/ping.conf3
-rw-r--r--health/health.d/plugin.conf11
-rw-r--r--health/health.d/portcheck.conf3
-rw-r--r--health/health.d/redis.conf4
-rw-r--r--health/health.d/vsphere.conf8
-rw-r--r--health/health.d/web_log.conf12
-rw-r--r--health/health.d/windows.conf4
17 files changed, 23 insertions, 84 deletions
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf
index 7d7a4fdae..6f37787d7 100644
--- a/health/health.d/boinc.conf
+++ b/health/health.d/boinc.conf
@@ -8,7 +8,6 @@
component: BOINC
os: *
hosts: *
- families: *
lookup: average -10m unaligned of comperror
units: tasks
every: 1m
@@ -26,7 +25,6 @@ component: BOINC
component: BOINC
os: *
hosts: *
- families: *
lookup: average -10m unaligned of upload_failed
units: tasks
every: 1m
@@ -44,7 +42,6 @@ component: BOINC
component: BOINC
os: *
hosts: *
- families: *
lookup: average -10m unaligned of total
units: tasks
every: 1m
@@ -62,7 +59,6 @@ component: BOINC
component: BOINC
os: *
hosts: *
- families: *
lookup: average -10m unaligned of active
calc: ($boinc_total_tasks >= 1) ? ($this) : (inf)
units: tasks
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
index ab63ff28d..97b7a3a94 100644
--- a/health/health.d/btrfs.conf
+++ b/health/health.d/btrfs.conf
@@ -6,7 +6,6 @@
component: File system
os: *
hosts: *
- families: *
calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
units: %
every: 10s
@@ -23,7 +22,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
calc: $used * 100 / ($used + $free)
units: %
every: 10s
@@ -40,7 +38,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
units: %
every: 10s
@@ -57,7 +54,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
calc: $used * 100 / ($used + $free)
units: %
every: 10s
@@ -74,7 +70,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
units: errors
lookup: max -10m every 1m of read_errs
warn: $this > 0
@@ -89,7 +84,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
units: errors
lookup: max -10m every 1m of write_errs
warn: $this > 0
@@ -104,7 +98,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
units: errors
lookup: max -10m every 1m of flush_errs
warn: $this > 0
@@ -119,7 +112,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
units: errors
lookup: max -10m every 1m of corruption_errs
warn: $this > 0
@@ -134,7 +126,6 @@ component: File system
component: File system
os: *
hosts: *
- families: *
units: errors
lookup: max -10m every 1m of generation_errs
warn: $this > 0
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
index 1f227841e..09e4f9d40 100644
--- a/health/health.d/cockroachdb.conf
+++ b/health/health.d/cockroachdb.conf
@@ -6,7 +6,7 @@
class: Utilization
type: Database
component: CockroachDB
- calc: $capacity_used_percent
+ calc: $total
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (85))
@@ -20,7 +20,7 @@ component: CockroachDB
class: Utilization
type: Database
component: CockroachDB
- calc: $capacity_usable_used_percent
+ calc: $usable
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (85))
@@ -36,7 +36,7 @@ component: CockroachDB
class: Errors
type: Database
component: CockroachDB
- calc: $ranges_unavailable
+ calc: $unavailable
units: num
every: 10s
warn: $this > 0
@@ -49,7 +49,7 @@ component: CockroachDB
class: Errors
type: Database
component: CockroachDB
- calc: $ranges_underreplicated
+ calc: $under_replicated
units: num
every: 10s
warn: $this > 0
@@ -64,7 +64,7 @@ component: CockroachDB
class: Utilization
type: Database
component: CockroachDB
- calc: $sys_fd_open/$sys_fd_softlimit * 100
+ calc: $open/$sys_fd_softlimit * 100
units: %
every: 10s
warn: $this > 80
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index fd207fbc1..7bd4f120c 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -16,7 +16,7 @@
component: Disk
os: linux freebsd
hosts: *
- families: !/dev !/dev/* !/run !/run/* *
+chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
calc: $used * 100 / ($avail + $used)
units: %
every: 1m
@@ -33,7 +33,7 @@ component: Disk
component: Disk
os: linux freebsd
hosts: *
- families: !/dev !/dev/* !/run !/run/* *
+chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
calc: $used * 100 / ($avail + $used)
units: %
every: 1m
@@ -59,7 +59,6 @@ component: Disk
# on: disk.space
# os: linux freebsd
# hosts: *
-# families: *
# lookup: min -10m at -50m unaligned of avail
# calc: ($this - $avail) / (($now - $after) / 3600)
# every: 1m
@@ -75,7 +74,6 @@ component: Disk
# on: disk.space
# os: linux freebsd
# hosts: *
-# families: *
# calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
# units: hours
# every: 10s
@@ -101,7 +99,6 @@ component: Disk
# on: disk.inodes
# os: linux freebsd
# hosts: *
-# families: *
# lookup: min -10m at -50m unaligned of avail
# calc: ($this - $avail) / (($now - $after) / 3600)
# every: 1m
@@ -116,7 +113,6 @@ component: Disk
# on: disk.inodes
# os: linux freebsd
# hosts: *
-# families: *
# calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
# units: hours
# every: 10s
@@ -141,7 +137,6 @@ component: Disk
component: Disk
os: linux freebsd
hosts: *
- families: *
lookup: average -10m unaligned
units: %
every: 1m
@@ -163,7 +158,6 @@ component: Disk
component: Disk
os: linux
hosts: *
- families: *
lookup: average -10m unaligned
units: ms
every: 1m
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
index 06f398c6e..f1030a317 100644
--- a/health/health.d/exporting.conf
+++ b/health/health.d/exporting.conf
@@ -1,6 +1,5 @@
template: exporting_last_buffering
- families: *
on: exporting_data_size
class: Latency
type: Netdata
@@ -15,7 +14,6 @@ component: Exporting engine
to: dba
template: exporting_metrics_sent
- families: *
on: exporting_data_size
class: Workload
type: Netdata
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
index 2008b000d..81748b9e0 100644
--- a/health/health.d/httpcheck.conf
+++ b/health/health.d/httpcheck.conf
@@ -1,7 +1,6 @@
# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
template: httpcheck_web_service_up
- families: *
on: httpcheck.status
class: Utilization
type: Web Server
@@ -14,7 +13,6 @@ component: HTTP endpoint
to: silent
template: httpcheck_web_service_bad_content
- families: *
on: httpcheck.status
class: Workload
type: Web Server
@@ -29,7 +27,6 @@ component: HTTP endpoint
to: webmaster
template: httpcheck_web_service_bad_status
- families: *
on: httpcheck.status
class: Workload
type: Web Server
@@ -44,7 +41,6 @@ component: HTTP endpoint
to: webmaster
template: httpcheck_web_service_timeouts
- families: *
on: httpcheck.status
class: Latency
type: Web Server
@@ -59,7 +55,6 @@ component: HTTP endpoint
to: webmaster
template: httpcheck_web_service_no_connection
- families: *
on: httpcheck.status
class: Errors
type: Other
diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf
index 8b498ad3c..2786cbd62 100644
--- a/health/health.d/ioping.conf
+++ b/health/health.d/ioping.conf
@@ -1,5 +1,4 @@
template: ioping_disk_latency
- families: *
on: ioping.latency
class: Latency
type: System
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index ed980a26a..b90455a58 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -29,7 +29,7 @@ component: RAID
class: Errors
type: System
component: RAID
- families: !*(raid1) !*(raid10) *
+chart labels: raid_level=!raid1 !raid10 *
units: unsynchronized blocks
calc: $count
every: 60s
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index a0723f303..08a4eecb4 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -11,7 +11,6 @@
component: Network
os: *
hosts: *
- families: *
calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max) : ( nan )
units: Mbit
every: 10s
@@ -24,7 +23,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: average -1m unaligned absolute of received
calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan )
units: %
@@ -41,7 +39,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: average -1m unaligned absolute of sent
calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan )
units: %
@@ -68,7 +65,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute of inbound
units: packets
every: 1m
@@ -81,7 +77,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute of outbound
units: packets
every: 1m
@@ -94,7 +89,7 @@ component: Network
component: Network
os: linux
hosts: *
- families: !wl* *
+chart labels: device=!wl* *
lookup: sum -10m unaligned absolute of received
calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
units: %
@@ -111,7 +106,7 @@ component: Network
component: Network
os: linux
hosts: *
- families: !wl* *
+chart labels: device=!wl* *
lookup: sum -10m unaligned absolute of sent
calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0))
units: %
@@ -128,7 +123,7 @@ component: Network
component: Network
os: linux
hosts: *
- families: wl*
+chart labels: device=wl*
lookup: sum -10m unaligned absolute of received
calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
units: %
@@ -145,7 +140,7 @@ component: Network
component: Network
os: linux
hosts: *
- families: wl*
+chart labels: device=wl*
lookup: sum -10m unaligned absolute of sent
calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0))
units: %
@@ -165,7 +160,6 @@ component: Network
component: Network
os: freebsd
hosts: *
- families: *
lookup: sum -10m unaligned absolute of inbound
units: errors
every: 1m
@@ -181,7 +175,6 @@ component: Network
component: Network
os: freebsd
hosts: *
- families: *
lookup: sum -10m unaligned absolute of outbound
units: errors
every: 1m
@@ -205,7 +198,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute
units: errors
every: 1m
@@ -230,7 +222,6 @@ component: Network
component: Network
os: linux freebsd
hosts: *
- families: *
lookup: average -1m unaligned of received
units: packets
every: 10s
@@ -243,7 +234,6 @@ component: Network
component: Network
os: linux freebsd
hosts: *
- families: *
lookup: average -10s unaligned of received
calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
every: 10s
diff --git a/health/health.d/nvme.conf b/health/health.d/nvme.conf
index b7c0e6fd4..742ffbc93 100644
--- a/health/health.d/nvme.conf
+++ b/health/health.d/nvme.conf
@@ -1,7 +1,6 @@
# you can disable an alarm notification by setting the 'to' line to: silent
template: nvme_device_critical_warnings_state
- families: *
on: nvme.device_critical_warnings_state
class: Errors
type: System
diff --git a/health/health.d/ping.conf b/health/health.d/ping.conf
index fa8213ad3..b8d39bbad 100644
--- a/health/health.d/ping.conf
+++ b/health/health.d/ping.conf
@@ -1,7 +1,6 @@
# you can disable an alarm notification by setting the 'to' line to: silent
template: ping_host_reachable
- families: *
on: ping.host_packet_loss
class: Errors
type: Other
@@ -16,7 +15,6 @@ component: Network
to: sysadmin
template: ping_packet_loss
- families: *
on: ping.host_packet_loss
class: Errors
type: Other
@@ -33,7 +31,6 @@ component: Network
to: sysadmin
template: ping_host_latency
- families: *
on: ping.host_rtt
class: Latency
type: Other
diff --git a/health/health.d/plugin.conf b/health/health.d/plugin.conf
new file mode 100644
index 000000000..0a891db79
--- /dev/null
+++ b/health/health.d/plugin.conf
@@ -0,0 +1,11 @@
+ template: plugin_availability_status
+ on: netdata.plugin_availability_status
+ class: Errors
+ type: Netdata
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : (20 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: the amount of time that ${label:_collect_plugin} did not report its availability status
+ to: sysadmin
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf
index e8908404c..34550ea02 100644
--- a/health/health.d/portcheck.conf
+++ b/health/health.d/portcheck.conf
@@ -1,7 +1,6 @@
# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
template: portcheck_service_reachable
- families: *
on: portcheck.status
class: Workload
type: Other
@@ -14,7 +13,6 @@ component: TCP endpoint
to: silent
template: portcheck_connection_timeouts
- families: *
on: portcheck.status
class: Errors
type: Other
@@ -29,7 +27,6 @@ component: TCP endpoint
to: sysadmin
template: portcheck_connection_fails
- families: *
on: portcheck.status
class: Errors
type: Other
diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf
index 34d00b5df..a58fa34d1 100644
--- a/health/health.d/redis.conf
+++ b/health/health.d/redis.conf
@@ -1,7 +1,6 @@
# you can disable an alarm notification by setting the 'to' line to: silent
template: redis_connections_rejected
- families: *
on: redis.connections
class: Errors
type: KV Storage
@@ -15,7 +14,6 @@ component: Redis
to: dba
template: redis_bgsave_broken
- families: *
on: redis.bgsave_health
class: Errors
type: KV Storage
@@ -28,7 +26,6 @@ component: Redis
to: dba
template: redis_bgsave_slow
- families: *
on: redis.bgsave_now
class: Latency
type: KV Storage
@@ -43,7 +40,6 @@ component: Redis
to: dba
template: redis_master_link_down
- families: *
on: redis.master_link_down_since_time
class: Errors
type: KV Storage
diff --git a/health/health.d/vsphere.conf b/health/health.d/vsphere.conf
index d8fc899b9..1d8be6cb5 100644
--- a/health/health.d/vsphere.conf
+++ b/health/health.d/vsphere.conf
@@ -43,7 +43,6 @@ component: Memory
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of rx
units: packets
every: 1m
@@ -55,7 +54,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of tx
units: packets
every: 1m
@@ -69,7 +67,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of rx
calc: (($vsphere_inbound_packets_errors != nan AND $this > 1000) ? ($vsphere_inbound_packets_errors * 100 / $this) : (0))
units: %
@@ -85,7 +82,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of tx
calc: (($vsphere_outbound_packets_errors != nan AND $this > 1000) ? ($vsphere_outbound_packets_errors * 100 / $this) : (0))
units: %
@@ -121,7 +117,6 @@ component: CPU
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of rx
units: packets
every: 1m
@@ -133,7 +128,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of tx
units: packets
every: 1m
@@ -147,7 +141,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of rx
calc: (($vsphere_inbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_inbound_packets_dropped * 100 / $this) : (0))
units: %
@@ -163,7 +156,6 @@ component: Network
type: Virtual Machine
component: Network
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of tx
calc: (($vsphere_outbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_outbound_packets_dropped * 100 / $this) : (0))
units: %
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index c33c4664c..3fd01831b 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -13,7 +13,6 @@
class: Workload
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
units: requests
@@ -25,7 +24,6 @@ component: Web log
class: Errors
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned of unmatched
calc: $this * 100 / $web_log_1m_total_requests
units: %
@@ -50,7 +48,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
units: requests
@@ -62,7 +59,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned of success
calc: $this * 100 / $web_log_1m_requests
units: %
@@ -78,7 +74,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned of redirect
calc: $this * 100 / $web_log_1m_requests
units: %
@@ -93,7 +88,6 @@ component: Web log
class: Errors
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned of bad
calc: $this * 100 / $web_log_1m_requests
units: %
@@ -108,7 +102,6 @@ component: Web log
class: Errors
type: Web Server
component: Web log
- families: *
lookup: sum -1m unaligned of error
calc: $this * 100 / $web_log_1m_requests
units: %
@@ -134,7 +127,6 @@ component: Web log
class: Latency
type: System
component: Web log
- families: *
lookup: average -10m unaligned of avg
units: ms
every: 30s
@@ -145,7 +137,6 @@ component: Web log
class: Latency
type: Web Server
component: Web log
- families: *
lookup: average -1m unaligned of avg
units: ms
every: 10s
@@ -174,7 +165,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
lookup: average -5m at -5m unaligned of success
units: requests/s
every: 30s
@@ -185,7 +175,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
lookup: average -5m unaligned of success
units: requests/s
every: 30s
@@ -196,7 +185,6 @@ component: Web log
class: Workload
type: Web Server
component: Web log
- families: *
calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100)
units: %
every: 30s
diff --git a/health/health.d/windows.conf b/health/health.d/windows.conf
index d678ac3ae..d4bc7639c 100644
--- a/health/health.d/windows.conf
+++ b/health/health.d/windows.conf
@@ -62,7 +62,6 @@ component: Memory
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of inbound
units: packets
every: 1m
@@ -78,7 +77,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of outbound
units: packets
every: 1m
@@ -94,7 +92,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of inbound
units: packets
every: 1m
@@ -110,7 +107,6 @@ component: Network
component: Network
os: linux
hosts: *
- families: *
lookup: sum -10m unaligned absolute match-names of outbound
units: packets
every: 1m