Merging upstream version 1.46.3.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-07-24 09:54:23 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-07-24 09:54:44 +0000
commit: 836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch)
tree: 1604da8f482d02effa033c94a84be42bc0c848c3 /health/health.d
parent: Releasing debian version 1.44.3-2. (diff)
download: netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz
netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip
79 files changed, 0 insertions, 5137 deletions
diff --git a/health/health.d/adaptec_raid.conf b/health/health.d/adaptec_raid.conf
deleted file mode 100644
index 1f1840491..000000000
--- a/health/health.d/adaptec_raid.conf
+++ /dev/null
@@ -1,32 +0,0 @@
-
-# logical device status check
-
- template: adaptec_raid_ld_status
-       on: adaptec_raid.ld_status
-    class: Errors
-     type: System
-component: RAID
-   lookup: max -10s foreach *
-    units: bool
-    every: 10s
-     crit: $this > 0
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Adaptec raid logical device status
-     info: Logical device status is failed or degraded
-       to: sysadmin
-
-# physical device state check
-
- template: adaptec_raid_pd_state
-       on: adaptec_raid.pd_state
-    class: Errors
-     type: System
-component: RAID
-   lookup: max -10s foreach *
-    units: bool
-    every: 10s
-     crit: $this > 0
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Adaptec raid physical device state
-     info: Physical device state is not online
-       to: sysadmin
diff --git a/health/health.d/anomalies.conf b/health/health.d/anomalies.conf
deleted file mode 100644
index 269ae544b..000000000
--- a/health/health.d/anomalies.conf
+++ /dev/null
@@ -1,23 +0,0 @@
-# raise a warning alarm if an anomaly probability is consistently above 50%
-
- template: anomalies_anomaly_probabilities
-       on: anomalies.probability
-    class: Errors
-     type: Netdata
-component: ML
-   lookup: average -2m foreach *
-    every: 1m
-     warn: $this > 50
-     info: average anomaly probability over the last 2 minutes
-
-# raise a warning alarm if an anomaly flag is consistently firing
-
- template: anomalies_anomaly_flags
-       on: anomalies.anomaly
-    class: Errors
-     type: Netdata
-component: ML
-   lookup: sum -2m foreach *
-    every: 1m
-     warn: $this > 10
-     info: number of anomalies in the last 2 minutes
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf
deleted file mode 100644
index 90a72af19..000000000
--- a/health/health.d/apcupsd.conf
+++ /dev/null
@@ -1,125 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: apcupsd_10min_ups_load
-       on: apcupsd.load
-    class: Utilization
-     type: Power Supply
-component: UPS
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of percentage
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-    delay: down 10m multiplier 1.5 max 1h
-  summary: APC UPS load
-     info: APC UPS average load over the last 10 minutes
-       to: sitemgr
-
-# Discussion in https://github.com/netdata/netdata/pull/3928:
-# Fire the alarm as soon as it's going on battery (99% charge) and clear only when full.
- template: apcupsd_ups_charge
-       on: apcupsd.charge
-    class: Errors
-     type: Power Supply
-component: UPS
-       os: *
-    hosts: *
-   lookup: average -60s unaligned of charge
-    units: %
-    every: 60s
-     warn: $this < 100
-     crit: $this < 40
-    delay: down 10m multiplier 1.5 max 1h
-  summary: APC UPS battery charge
-     info: APC UPS average battery charge over the last minute
-       to: sitemgr
-
- template: apcupsd_last_collected_secs
-       on: apcupsd.load
-    class: Latency
-     type: Power Supply
-component: UPS device
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: APC UPS last collection
-     info: APC UPS number of seconds since the last successful data collection
-       to: sitemgr
-
-#Send out a warning when SELFTEST code is BT or NG. Code descriptions can be found at:
-#http://www.apcupsd.org/manual/#:~:text=or%20N/A.-,SELFTEST,-The%20results%20of
- template: apcupsd_selftest_warning
-       on: apcupsd.selftest
-   lookup: max -1s unaligned match-names of BT,NG
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS self-test failed due to insufficient battery capacity or due to overload.
-       to: sitemgr
-
-#Send out a warning when STATUS code is ONBATT,OVERLOAD,LOWBATT,REPLACEBATT,NOBATT,COMMLOST
-#https://man.archlinux.org/man/apcaccess.8.en#:~:text=apcupsd%20was%20started-,STATUS,-%3A%20UPS%20status.%20One
-
- template: apcupsd_status_onbatt
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of ONBATT
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 1m down 15m multiplier 1.5 max 1h
-     info: APC UPS has switched to battery power because the input power has failed
-       to: sitemgr
-
- template: apcupsd_status_overload
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of OVERLOAD
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS is overloaded and cannot supply enough power to the load
-       to: sitemgr
-
- template: apcupsd_status_lowbatt
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of LOWBATT
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS battery is low and needs to be recharged
-       to: sitemgr
-
- template: apcupsd_status_replacebatt
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of REPLACEBATT
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS battery has reached the end of its lifespan and needs to be replaced
-       to: sitemgr
-
- template: apcupsd_status_nobatt
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of NOBATT
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS has no battery
-       to: sitemgr
-
- template: apcupsd_status_commlost
-       on: apcupsd.status
-   lookup: max -1s unaligned match-names of COMMLOST
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: up 0 down 15m multiplier 1.5 max 1h
-     info: APC UPS communication link is lost
-       to: sitemgr
diff --git a/health/health.d/bcache.conf b/health/health.d/bcache.conf
deleted file mode 100644
index 446173428..000000000
--- a/health/health.d/bcache.conf
+++ /dev/null
@@ -1,31 +0,0 @@
-
- template: bcache_cache_errors
-       on: disk.bcache_cache_read_races
-    class: Errors
-     type: System
-component: Disk
-   lookup: sum -1m unaligned absolute
-    units: errors
-    every: 1m
-     warn: $this > 0
-    delay: up 2m down 1h multiplier 1.5 max 2h
-  summary: Bcache cache read race errors
-     info: Number of times data was read from the cache, \
-           the bucket was reused and invalidated in the last 10 minutes \
-           (when this occurs the data is reread from the backing device)
-       to: silent
-
- template: bcache_cache_dirty
-       on: disk.bcache_cache_alloc
-    class: Utilization
-     type: System
-component: Disk
-     calc: $dirty + $metadata + $undefined
-    units: %
-    every: 1m
-     warn: $this > 75
-    delay: up 1m down 1h multiplier 1.5 max 2h
-  summary: Bcache cache used space
-     info: Percentage of cache space used for dirty data and metadata \
-           (this usually means your SSD cache is too small)
-       to: silent
diff --git a/health/health.d/beanstalkd.conf b/health/health.d/beanstalkd.conf
deleted file mode 100644
index 0d37f28e0..000000000
--- a/health/health.d/beanstalkd.conf
+++ /dev/null
@@ -1,41 +0,0 @@
-# get the number of buried jobs in all queues
-
- template: beanstalk_server_buried_jobs
-       on: beanstalk.current_jobs
-    class: Workload
-     type: Messaging
-component: Beanstalk
-     calc: $buried
-    units: jobs
-    every: 10s
-     warn: $this > 3
-    delay: up 0 down 5m multiplier 1.2 max 1h
-  summary: Beanstalk buried jobs
-     info: Number of buried jobs across all tubes. \
-           You need to manually kick them so they can be processed. \
-           Presence of buried jobs in a tube does not affect new jobs.
-       to: sysadmin
-      
-# get the number of buried jobs per queue
-
-#template: beanstalk_tube_buried_jobs
-#      on: beanstalk.jobs
-#    calc: $buried
-#   units: jobs
-#   every: 10s
-#    warn: $this > 0
-#    crit: $this > 10
-#   delay: up 0 down 5m multiplier 1.2 max 1h
-#    info: the number of jobs buried per tube
-#      to: sysadmin
-
-# get the current number of tubes
-
-#template: beanstalk_number_of_tubes
-#      on: beanstalk.current_tubes
-#    calc: $tubes
-#   every: 10s
-#    warn: $this < 5
-#   delay: up 0 down 5m multiplier 1.2 max 1h
-#    info: the current number of tubes on the server
-#      to: sysadmin
diff --git a/health/health.d/bind_rndc.conf b/health/health.d/bind_rndc.conf
deleted file mode 100644
index b1c271df9..000000000
--- a/health/health.d/bind_rndc.conf
+++ /dev/null
@@ -1,12 +0,0 @@
- template: bind_rndc_stats_file_size
-       on: bind_rndc.stats_size
-    class: Utilization
-     type: DNS
-component: BIND
-    units: megabytes
-    every: 60
-     calc: $stats_size
-     warn: $this > 512
-  summary: BIND statistics file size
-     info: BIND statistics-file size
-       to: sysadmin
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf
deleted file mode 100644
index 092a56845..000000000
--- a/health/health.d/boinc.conf
+++ /dev/null
@@ -1,70 +0,0 @@
-# Alarms for various BOINC issues.
-
-# Warn on any compute errors encountered.
- template: boinc_compute_errors
-       on: boinc.states
-    class: Errors
-     type: Computing
-component: BOINC
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of comperror
-    units: tasks
-    every: 1m
-     warn: $this > 0
-    delay: up 1m down 5m multiplier 1.5 max 1h
-  summary: BOINC compute errors
-     info: Average number of compute errors over the last 10 minutes
-       to: sysadmin
-
-# Warn on lots of upload errors
- template: boinc_upload_errors
-       on: boinc.states
-    class: Errors
-     type: Computing
-component: BOINC
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of upload_failed
-    units: tasks
-    every: 1m
-     warn: $this > 0
-    delay: up 1m down 5m multiplier 1.5 max 1h
-  summary: BOINC failed uploads
-     info: Average number of failed uploads over the last 10 minutes
-       to: sysadmin
-
-# Warn on the task queue being empty
- template: boinc_total_tasks
-       on: boinc.tasks
-    class: Utilization
-     type: Computing
-component: BOINC
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of total
-    units: tasks
-    every: 1m
-     warn: $this < 1
-    delay: up 5m down 10m multiplier 1.5 max 1h
-  summary: BOINC total tasks
-     info: Average number of total tasks over the last 10 minutes
-       to: sysadmin
-
-# Warn on no active tasks with a non-empty queue
- template: boinc_active_tasks
-       on: boinc.tasks
-    class: Utilization
-     type: Computing
-component: BOINC
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of active
-     calc: ($boinc_total_tasks >= 1) ? ($this) : (inf)
-    units: tasks
-    every: 1m
-     warn: $this < 1
-    delay: up 5m down 10m multiplier 1.5 max 1h
-  summary: BOINC active tasks
-     info: Average number of active tasks over the last 10 minutes
-       to: sysadmin
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
deleted file mode 100644
index 1557a5941..000000000
--- a/health/health.d/btrfs.conf
+++ /dev/null
@@ -1,142 +0,0 @@
-
- template: btrfs_allocated
-       on: btrfs.disk
-    class: Utilization
-     type: System
-component: File system
-       os: *
-    hosts: *
-     calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
-    units: %
-    every: 10s
-     warn: $this > (($status == $CRITICAL) ? (95) : (98))
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS allocated space utilization
-     info: Percentage of allocated BTRFS physical disk space
-       to: silent
-
- template: btrfs_data
-       on: btrfs.data
-    class: Utilization
-     type: System
-component: File system
-       os: *
-    hosts: *
-     calc: $used * 100 / ($used + $free)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (90) : (95)) && $btrfs_allocated > 98
-     crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS data space utilization
-     info: Utilization of BTRFS data space
-       to: sysadmin
-
- template: btrfs_metadata
-       on: btrfs.metadata
-    class: Utilization
-     type: System
-component: File system
-       os: *
-    hosts: *
-     calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (90) : (95)) && $btrfs_allocated > 98
-     crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS metadata space utilization
-     info: Utilization of BTRFS metadata space
-       to: sysadmin
-
- template: btrfs_system
-       on: btrfs.system
-    class: Utilization
-     type: System
-component: File system
-       os: *
-    hosts: *
-     calc: $used * 100 / ($used + $free)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (90) : (95)) && $btrfs_allocated > 98
-     crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS system space utilization
-     info: Utilization of BTRFS system space
-       to: sysadmin
-
- template: btrfs_device_read_errors
-       on: btrfs.device_errors
-    class: Errors
-     type: System
-component: File system
-       os: *
-    hosts: *
-    units: errors
-   lookup: max -10m every 1m of read_errs
-     warn: $this > 0
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS device read errors
-     info: Number of encountered BTRFS read errors
-       to: sysadmin
-
- template: btrfs_device_write_errors
-       on: btrfs.device_errors
-    class: Errors
-     type: System
-component: File system
-       os: *
-    hosts: *
-    units: errors
-   lookup: max -10m every 1m of write_errs
-     crit: $this > 0
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS device write errors
-     info: Number of encountered BTRFS write errors
-       to: sysadmin
-
- template: btrfs_device_flush_errors
-       on: btrfs.device_errors
-    class: Errors
-     type: System
-component: File system
-       os: *
-    hosts: *
-    units: errors
-   lookup: max -10m every 1m of flush_errs
-     crit: $this > 0
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS device flush errors
-     info: Number of encountered BTRFS flush errors
-       to: sysadmin
-
- template: btrfs_device_corruption_errors
-       on: btrfs.device_errors
-    class: Errors
-     type: System
-component: File system
-       os: *
-    hosts: *
-    units: errors
-   lookup: max -10m every 1m of corruption_errs
-     warn: $this > 0
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS device corruption errors
-     info: Number of encountered BTRFS corruption errors
-       to: sysadmin
-
- template: btrfs_device_generation_errors
-       on: btrfs.device_errors
-    class: Errors
-     type: System
-component: File system
-       os: *
-    hosts: *
-    units: errors
-   lookup: max -10m every 1m of generation_errs
-     warn: $this > 0
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: BTRFS device generation errors
-     info: Number of encountered BTRFS generation errors
-       to: sysadmin
diff --git a/health/health.d/ceph.conf b/health/health.d/ceph.conf
deleted file mode 100644
index 44d351338..000000000
--- a/health/health.d/ceph.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-# low ceph disk available
-
- template: ceph_cluster_space_usage
-       on: ceph.general_usage
-    class: Utilization
-     type: Storage
-component: Ceph
-     calc: $used * 100 / ($used + $avail)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING ) ? (85) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 5m multiplier 1.2 max 1h
-  summary: Ceph cluster disk space utilization
-     info: Ceph cluster disk space utilization
-       to: sysadmin
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
deleted file mode 100644
index 9c55633ef..000000000
--- a/health/health.d/cgroups.conf
+++ /dev/null
@@ -1,72 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: cgroup_10min_cpu_usage
-       on: cgroup.cpu_limit
-    class: Utilization
-     type: Cgroups
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Cgroup ${label:cgroup_name} CPU utilization
-     info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
-       to: silent
-
- template: cgroup_ram_in_use
-       on: cgroup.mem_usage
-    class: Utilization
-     type: Cgroups
-component: Memory
-       os: linux
-    hosts: *
-     calc: ($ram) * 100 / $memory_limit
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Cgroup ${label:cgroup_name} memory utilization
-     info: Cgroup ${label:cgroup_name} memory utilization
-       to: silent
-
-# ---------------------------------K8s containers--------------------------------------------
-
- template: k8s_cgroup_10min_cpu_usage
-       on: k8s.cgroup.cpu_limit
-    class: Utilization
-     type: Cgroups
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
-     info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
-           average CPU utilization over the last 10 minutes
-       to: silent
-
- template: k8s_cgroup_ram_in_use
-       on: k8s.cgroup.mem_usage
-    class: Utilization
-     type: Cgroups
-component: Memory
-       os: linux
-    hosts: *
-     calc: ($ram) * 100 / $memory_limit
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
-     info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
-           memory utilization
-       to: silent
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
deleted file mode 100644
index 60f178354..000000000
--- a/health/health.d/cockroachdb.conf
+++ /dev/null
@@ -1,78 +0,0 @@
-
-# Capacity
-
- template: cockroachdb_used_storage_capacity
-       on: cockroachdb.storage_used_capacity_percentage
-    class: Utilization
-     type: Database
-component: CockroachDB
-     calc: $total
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CockroachDB storage space utilization
-     info: Storage capacity utilization
-       to: dba
-
- template: cockroachdb_used_usable_storage_capacity
-       on: cockroachdb.storage_used_capacity_percentage
-    class: Utilization
-     type: Database
-component: CockroachDB
-     calc: $usable
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CockroachDB usable storage space utilization
-     info: Storage usable space utilization
-       to: dba
-
-# Replication
-
- template: cockroachdb_unavailable_ranges
-       on: cockroachdb.ranges_replication_problem
-    class: Errors
-     type: Database
-component: CockroachDB
-     calc: $unavailable
-    units: num
-    every: 10s
-     warn: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CockroachDB unavailable replication
-     info: Number of ranges with fewer live replicas than needed for quorum
-       to: dba
-
- template: cockroachdb_underreplicated_ranges
-       on: cockroachdb.ranges_replication_problem
-    class: Errors
-     type: Database
-component: CockroachDB
-     calc: $under_replicated
-    units: num
-    every: 10s
-     warn: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CockroachDB under-replicated
-     info: Number of ranges with fewer live replicas than the replication target
-       to: dba
-
-# FD
-
- template: cockroachdb_open_file_descriptors_limit
-       on: cockroachdb.process_file_descriptors
-    class: Utilization
-     type: Database
-component: CockroachDB
-     calc: $open/$sys_fd_softlimit * 100
-    units: %
-    every: 10s
-     warn: $this > 80
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CockroachDB file descriptors utilization
-     info: Open file descriptors utilization (against softlimit)
-       to: dba
diff --git a/health/health.d/consul.conf b/health/health.d/consul.conf
deleted file mode 100644
index 8b414a26d..000000000
--- a/health/health.d/consul.conf
+++ /dev/null
@@ -1,171 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: consul_license_expiration_time
-       on: consul.license_expiration_time
-    class: Errors
-     type: ServiceMesh
-component: Consul
-     calc: $license_expiration
-    every: 60m
-    units: seconds
-     warn: $this < 14*24*60*60
-     crit: $this < 7*24*60*60
-  summary: Consul license expiration on ${label:node_name}
-     info: Consul Enterprise license expiration time on node ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_autopilot_health_status
-       on: consul.autopilot_health_status
-    class: Errors
-     type: ServiceMesh
-component: Consul
-     calc: $unhealthy
-    every: 10s
-    units: status
-     warn: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul datacenter ${label:datacenter} health
-     info: Datacenter ${label:datacenter} cluster is unhealthy as reported by server ${label:node_name}
-       to: sysadmin
-
- template: consul_autopilot_server_health_status
-       on: consul.autopilot_server_health_status
-    class: Errors
-     type: ServiceMesh
-component: Consul
-     calc: $unhealthy
-    every: 10s
-    units: status
-     warn: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} health
-     info: Server ${label:node_name} from datacenter ${label:datacenter} is unhealthy
-       to: sysadmin
-
- template: consul_raft_leader_last_contact_time
-       on: consul.raft_leader_last_contact_time
-    class: Errors
-     type: ServiceMesh
-component: Consul
-   lookup: average -1m unaligned of quantile_0.5
-    every: 10s
-    units: milliseconds
-     warn: $this > (($status >= $WARNING)  ? (150) : (200))
-     crit: $this > (($status == $CRITICAL) ? (200) : (500))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul leader server ${label:node_name} last contact time
-     info: Median time elapsed since leader server ${label:node_name} datacenter ${label:datacenter} was last able to contact the follower nodes
-       to: sysadmin
-
- template: consul_raft_leadership_transitions
-       on: consul.raft_leadership_transitions_rate
-    class: Errors
-     type: ServiceMesh
-component: Consul
-   lookup: sum -1m unaligned
-    every: 10s
-    units: transitions
-     warn: $this > 0
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} leadership transitions
-     info: There has been a leadership change and server ${label:node_name} datacenter ${label:datacenter} has become the leader
-       to: sysadmin
-
- template: consul_raft_thread_main_saturation
-       on: consul.raft_thread_main_saturation_perc
-    class: Utilization
-     type: ServiceMesh
-component: Consul
-   lookup: average -1m unaligned of quantile_0.9
-    every: 10s
-    units: percentage
-     warn: $this > (($status >= $WARNING)  ? (40) : (50))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} main Raft saturation
-     info: Average saturation of the main Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_raft_thread_fsm_saturation
-       on: consul.raft_thread_fsm_saturation_perc
-    class: Utilization
-     type: ServiceMesh
-component: Consul
-   lookup: average -1m unaligned of quantile_0.9
-    every: 10s
-    units: milliseconds
-     warn: $this > (($status >= $WARNING)  ? (40) : (50))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} FSM Raft saturation
-     info: Average saturation of the FSM Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_client_rpc_requests_exceeded
-       on: consul.client_rpc_requests_exceeded_rate
-    class: Errors
-     type: ServiceMesh
-component: Consul
-   lookup: sum -1m unaligned
-    every: 10s
-    units: requests
-     warn: $this > (($status >= $WARNING)  ? (0) : (5))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} RPC requests rate
-     info: Number of rate-limited RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_client_rpc_requests_failed
-       on: consul.client_rpc_requests_failed_rate
-    class: Errors
-     type: ServiceMesh
-component: Consul
-   lookup: sum -1m unaligned
-    every: 10s
-    units: requests
-     warn: $this > (($status >= $WARNING)  ? (0) : (5))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} failed RPC requests
-     info: number of failed RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_node_health_check_status
-       on: consul.node_health_check_status
-    class: Errors
-     type: ServiceMesh
-component: Consul
-     calc: $warning + $critical
-    every: 10s
-    units: status
-     warn: $this != nan AND $this != 0
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul node health check ${label:check_name} on ${label:node_name}
-     info: Node health check ${label:check_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_service_health_check_status
-       on: consul.service_health_check_status
-    class: Errors
-     type: ServiceMesh
-component: Consul
-     calc: $warning + $critical
-    every: 10s
-    units: status
-     warn: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul service health check ${label:check_name} service ${label:service_name} node ${label:node_name}
-     info: Service health check ${label:check_name} for service ${label:service_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: consul_gc_pause_time
-       on: consul.gc_pause_time
-    class: Errors
-     type: ServiceMesh
-component: Consul
-   lookup: sum -1m unaligned
-    every: 10s
-    units: seconds
-     warn: $this > (($status >= $WARNING)  ? (1) : (2))
-     crit: $this > (($status >= $WARNING)  ? (2) : (5))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Consul server ${label:node_name} garbage collection pauses
-     info: Time spent in stop-the-world garbage collection pauses on server ${label:node_name} datacenter ${label:datacenter}
-       to: sysadmin
diff --git a/health/health.d/cpu.conf b/health/health.d/cpu.conf
deleted file mode 100644
index 0b007d6b4..000000000
--- a/health/health.d/cpu.conf
+++ /dev/null
@@ -1,69 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: 10min_cpu_usage
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned of user,system,softirq,irq,guest
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System CPU utilization
-     info: Average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
-       to: silent
-
- template: 10min_cpu_iowait
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned of iowait
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (20) : (40))
-    delay: up 30m down 30m multiplier 1.5 max 2h
-  summary: System CPU iowait time
-     info: Average CPU iowait time over the last 10 minutes
-       to: silent
-
- template: 20min_steal_cpu
-       on: system.cpu
-    class: Latency
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -20m unaligned of steal
-    units: %
-    every: 5m
-     warn: $this > (($status >= $WARNING)  ? (5)  : (10))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System CPU steal time
-     info: Average CPU steal time over the last 20 minutes
-       to: silent
-
-## FreeBSD
- template: 10min_cpu_usage
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: freebsd
-    hosts: *
-   lookup: average -10m unaligned of user,system,interrupt
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System CPU utilization
-     info: Average CPU utilization over the last 10 minutes (excluding nice)
-       to: silent
diff --git a/health/health.d/dbengine.conf b/health/health.d/dbengine.conf
deleted file mode 100644
index 0a70d2e8f..000000000
--- a/health/health.d/dbengine.conf
+++ /dev/null
@@ -1,68 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: 10min_dbengine_global_fs_errors
-       on: netdata.dbengine_global_errors
-    class: Errors
-     type: Netdata
-component: DB engine
-       os: linux freebsd macos
-    hosts: *
-   lookup: sum -10m unaligned of fs_errors
-    units: errors
-    every: 10s
-     crit: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Netdata DBengine filesystem errors
-     info: Number of filesystem errors in the last 10 minutes (too many open files, wrong permissions, etc)
-       to: sysadmin
-
-    alarm: 10min_dbengine_global_io_errors
-       on: netdata.dbengine_global_errors
-    class: Errors
-     type: Netdata
-component: DB engine
-       os: linux freebsd macos
-    hosts: *
-   lookup: sum -10m unaligned of io_errors
-    units: errors
-    every: 10s
-     crit: $this > 0
-    delay: down 1h multiplier 1.5 max 3h
-  summary: Netdata DBengine IO errors
-     info: Number of IO errors in the last 10 minutes (CRC errors, out of space, bad disk, etc)
-       to: sysadmin
-
-    alarm: 10min_dbengine_global_flushing_warnings
-       on: netdata.dbengine_global_errors
-    class: Errors
-     type: Netdata
-component: DB engine
-       os: linux freebsd macos
-    hosts: *
-   lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
-    units: errors
-    every: 10s
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 3h
-  summary: Netdata DBengine global flushing warnings
-     info: number of times when dbengine dirty pages were over 50% of the instance's page cache in the last 10 minutes. \
-           Metric data are at risk of not being stored in the database. To remedy, reduce disk load or use faster disks.
-       to: sysadmin
-
-    alarm: 10min_dbengine_global_flushing_errors
-       on: netdata.dbengine_long_term_page_stats
-    class: Errors
-     type: Netdata
-component: DB engine
-       os: linux freebsd macos
-    hosts: *
-   lookup: sum -10m unaligned of flushing_pressure_deletions
-    units: pages
-    every: 10s
-     crit: $this != 0
-    delay: down 1h multiplier 1.5 max 3h
-  summary: Netdata DBengine global flushing errors
-     info: Number of pages deleted due to failure to flush data to disk in the last 10 minutes. \
-           Metric data were lost to unblock data collection. To fix, reduce disk load or use faster disks.
-       to: sysadmin
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
deleted file mode 100644
index 2e417fd4a..000000000
--- a/health/health.d/disks.conf
+++ /dev/null
@@ -1,172 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-
-# -----------------------------------------------------------------------------
-# low disk space
-
-# checking the latest collected values
-# raise an alarm if the disk is low on
-# available disk space
-
- template: disk_space_usage
-       on: disk.space
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
-chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
-     calc: $used * 100 / ($avail + $used)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING ) ? (80) : (90))
-     crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: Disk ${label:mount_point} space usage
-     info: Total space utilization of disk ${label:mount_point}
-       to: sysadmin
-
- template: disk_inode_usage
-       on: disk.inodes
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
-chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
-     calc: $used * 100 / ($avail + $used)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: Disk ${label:mount_point} inode usage
-     info: Total inode utilization of disk ${label:mount_point}
-       to: sysadmin
-
-
-# -----------------------------------------------------------------------------
-# disk fill rate
-
-# calculate the rate the disk fills
-# use as base, the available space change
-# during the last hour
-
-# this is just a calculation - it has no alarm
-# we will use it in the next template to find
-# the hours remaining
-
-template: disk_fill_rate
-      on: disk.space
-      os: linux freebsd
-   hosts: *
-  lookup: min -10m at -50m unaligned of avail
-    calc: ($this - $avail) / (($now - $after) / 3600)
-   every: 1m
-   units: GB/hour
-    info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
-
-# calculate the hours remaining
-# if the disk continues to fill
-# in this rate
-
-template: out_of_disk_space_time
-      on: disk.space
-      os: linux freebsd
-   hosts: *
-    calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
-   units: hours
-   every: 10s
-    warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
-    crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-   delay: down 15m multiplier 1.2 max 1h
- summary: Disk ${label:mount_point} estimation of lack of space
-    info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour
-      to: silent
-
-
-# -----------------------------------------------------------------------------
-# disk inode fill rate
-
-# calculate the rate the disk inodes are allocated
-# use as base, the available inodes change
-# during the last hour
-
-# this is just a calculation - it has no alarm
-# we will use it in the next template to find
-# the hours remaining
-
-template: disk_inode_rate
-      on: disk.inodes
-      os: linux freebsd
-   hosts: *
-  lookup: min -10m at -50m unaligned of avail
-    calc: ($this - $avail) / (($now - $after) / 3600)
-   every: 1m
-   units: inodes/hour
-    info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
-
-# calculate the hours remaining
-# if the disk inodes are allocated
-# in this rate
-
-template: out_of_disk_inodes_time
-      on: disk.inodes
-      os: linux freebsd
-   hosts: *
-    calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
-   units: hours
-   every: 10s
-    warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
-    crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-   delay: down 15m multiplier 1.2 max 1h
- summary: Disk ${label:mount_point} estimation of lack of inodes
-    info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
-      to: silent
-
-
-# -----------------------------------------------------------------------------
-# disk congestion
-
-# raise an alarm if the disk is congested
-# by calculating the average disk utilization
-# for the last 10 minutes
-
- template: 10min_disk_utilization
-       on: disk.util
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
-    delay: down 15m multiplier 1.2 max 1h
-  summary: Disk ${label:device} utilization
-     info: Average percentage of time ${label:device} disk was busy over the last 10 minutes
-       to: silent
-
-
-# raise an alarm if the disk backlog
-# is above 1000ms (1s) per second
-# for 10 minutes
-# (i.e. the disk cannot catch up)
-
- template: 10min_disk_backlog
-       on: disk.backlog
-    class: Latency
-     type: System
-component: Disk
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: ms
-    every: 1m
-     warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
-    delay: down 15m multiplier 1.2 max 1h
-  summary: Disk ${label:device} backlog
-     info: Average backlog size of the ${label:device} disk over the last 10 minutes
-       to: silent
diff --git a/health/health.d/dns_query.conf b/health/health.d/dns_query.conf
deleted file mode 100644
index 756c6a1b6..000000000
--- a/health/health.d/dns_query.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# detect dns query failure
-
- template: dns_query_query_status
-       on: dns_query.query_status
-    class: Errors
-     type: DNS
-component: DNS
-     calc: $success
-    units: status
-    every: 10s
-     warn: $this != nan && $this != 1
-    delay: up 30s down 5m multiplier 1.5 max 1h
-  summary: DNS query unsuccessful requests to ${label:server}
-     info: DNS request type ${label:record_type} to server ${label:server} is unsuccessful
-       to: sysadmin
diff --git a/health/health.d/dnsmasq_dhcp.conf b/health/health.d/dnsmasq_dhcp.conf
deleted file mode 100644
index f6ef01940..000000000
--- a/health/health.d/dnsmasq_dhcp.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# dhcp-range utilization
-
- template: dnsmasq_dhcp_dhcp_range_utilization
-       on: dnsmasq_dhcp.dhcp_range_utilization
-    class: Utilization
-     type: DHCP
-component: Dnsmasq
-    every: 10s
-    units: %
-     calc: $used
-     warn: $this > ( ($status >= $WARNING ) ? ( 80 ) : ( 90 ) )
-    delay: down 5m
-  summary: Dnsmasq DHCP range ${label:dhcp_range} utilization
-     info: DHCP range ${label:dhcp_range} utilization
-       to: sysadmin
diff --git a/health/health.d/docker.conf b/health/health.d/docker.conf
deleted file mode 100644
index 668614d4d..000000000
--- a/health/health.d/docker.conf
+++ /dev/null
@@ -1,12 +0,0 @@
- template: docker_container_unhealthy
-       on: docker.container_health_status
-    class: Errors
-     type: Containers
-component: Docker
-    units: status
-    every: 10s
-   lookup: average -10s of unhealthy
-     warn: $this > 0
-  summary: Docker container ${label:container_name} health
-     info: ${label:container_name} docker container health status is unhealthy
-       to: sysadmin
diff --git a/health/health.d/elasticsearch.conf b/health/health.d/elasticsearch.conf
deleted file mode 100644
index 600840c58..000000000
--- a/health/health.d/elasticsearch.conf
+++ /dev/null
@@ -1,78 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# 'red' is a threshold, can't lookup the 'red' dimension - using simple pattern is a workaround.
-
- template: elasticsearch_cluster_health_status_red
-       on: elasticsearch.cluster_health_status
-    class: Errors
-     type: SearchEngine
-component: Elasticsearch
-   lookup: average -5s unaligned of *ed
-    every: 10s
-    units: status
-     crit: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Elasticsearch cluster ${label:cluster_name} status
-     info: Elasticsearch cluster ${label:cluster_name} health status is red.
-       to: sysadmin
-
-# the idea of '-10m' is to handle yellow status after node restart,
-# (usually) no action is required because Elasticsearch will automatically restore the green status.
- template: elasticsearch_cluster_health_status_yellow
-       on: elasticsearch.cluster_health_status
-    class: Errors
-     type: SearchEngine
-component: Elasticsearch
-   lookup: average -10m unaligned of yellow
-    every: 1m
-    units: status
-     warn: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Elasticsearch cluster ${label:cluster_name} status
-     info: Elasticsearch cluster ${label:cluster_name} health status is yellow.
-       to: sysadmin
-
- template: elasticsearch_node_index_health_red
-       on: elasticsearch.node_index_health
-    class: Errors
-     type: SearchEngine
-component: Elasticsearch
-   lookup: average -5s unaligned of *ed
-    every: 10s
-    units: status
-     warn: $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Elasticsearch cluster ${label:cluster_name} index ${label:index} status
-     info: Elasticsearch cluster ${label:cluster_name} index ${label:index} health status is red.
-       to: sysadmin
-
-# don't convert 'lookup' value to seconds in 'calc' due to UI showing seconds as hh:mm:ss (0 as now).
-
- template: elasticsearch_node_indices_search_time_query
-       on: elasticsearch.node_indices_search_time
-    class: Workload
-     type: SearchEngine
-component: Elasticsearch
-   lookup: average -10m unaligned of query
-    every: 10s
-    units: milliseconds
-     warn: $this > (($status >= $WARNING)  ? (20 * 1000) : (30 * 1000))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} query performance
-     info: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} search performance is degraded, queries run slowly.
-       to: sysadmin
-
- template: elasticsearch_node_indices_search_time_fetch
-       on: elasticsearch.node_indices_search_time
-    class: Workload
-     type: SearchEngine
-component: Elasticsearch
-   lookup: average -10m unaligned of fetch
-    every: 10s
-    units: milliseconds
-     warn: $this > (($status >= $WARNING)  ? (3 * 1000) : (5 * 1000))
-     crit: $this > (($status == $CRITICAL) ? (5 * 1000) : (30 * 1000))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} fetch performance
-     info: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} search performance is degraded, fetches run slowly.
-       to: sysadmin
diff --git a/health/health.d/entropy.conf b/health/health.d/entropy.conf
deleted file mode 100644
index be8b1fe4f..000000000
--- a/health/health.d/entropy.conf
+++ /dev/null
@@ -1,20 +0,0 @@
-
-# check if entropy is too low
-# the alarm is checked every 1 minute
-# and examines the last hour of data
-
-    alarm: lowest_entropy
-       on: system.entropy
-    class: Utilization
-     type: System
-component: Cryptography
-       os: linux
-    hosts: *
-   lookup: min -5m unaligned
-    units: entries
-    every: 5m
-     warn: $this < (($status >= $WARNING) ? (200) : (100))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System entropy pool number of entries
-     info: Minimum number of entries in the random numbers pool in the last 5 minutes
-       to: silent
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
deleted file mode 100644
index c0320193c..000000000
--- a/health/health.d/exporting.conf
+++ /dev/null
@@ -1,29 +0,0 @@
-
- template: exporting_last_buffering
-       on: netdata.exporting_data_size
-    class: Latency
-     type: Netdata
-component: Exporting engine
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Netdata exporting data last successful buffering
-     info: Number of seconds since the last successful buffering of exporting data
-       to: dba
-
- template: exporting_metrics_sent
-       on: netdata.exporting_data_size
-    class: Workload
-     type: Netdata
-component: Exporting engine
-    units: %
-     calc: abs($sent) * 100 / abs($buffered)
-    every: 10s
-     warn: $this != 100
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Netdata exporting metrics sent
-     info: Percentage of metrics sent to the external database server
-       to: dba
diff --git a/health/health.d/file_descriptors.conf b/health/health.d/file_descriptors.conf
deleted file mode 100644
index 20a592d6b..000000000
--- a/health/health.d/file_descriptors.conf
+++ /dev/null
@@ -1,33 +0,0 @@
- # you can disable an alarm notification by setting the 'to' line to: silent
-
-  template: system_file_descriptors_utilization
-        on: system.file_nr_utilization
-     class: Utilization
-      type: System
- component: Processes
-     hosts: *
-    lookup: max -1m unaligned
-     units: %
-     every: 1m
-      crit: $this > 90
-     delay: down 15m multiplier 1.5 max 1h
-   summary: System open file descriptors utilization
-      info: System-wide utilization of open files
-        to: sysadmin
-
- template: apps_group_file_descriptors_utilization
-       on: app.fds_open_limit
-    class: Utilization
-     type: System
-component: Process
-       os: linux
-   module: *
-    hosts: *
-   lookup: max -10s unaligned foreach *
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: App group ${label:app_group} file descriptors utilization
-     info: Open files percentage against the processes limits, among all PIDs in application group
-       to: sysadmin
diff --git a/health/health.d/gearman.conf b/health/health.d/gearman.conf
deleted file mode 100644
index 78e1165d1..000000000
--- a/health/health.d/gearman.conf
+++ /dev/null
@@ -1,14 +0,0 @@
-
- template: gearman_workers_queued
-       on: gearman.single_job
-    class: Latency
-     type: Computing
-component: Gearman
-   lookup: average -10m unaligned match-names of Pending
-    units: workers
-    every: 10s
-     warn: $this > 30000
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Gearman queued jobs
-     info: Average number of queued jobs over the last 10 minutes
-       to: sysadmin
diff --git a/health/health.d/geth.conf b/health/health.d/geth.conf
deleted file mode 100644
index 361b6b41f..000000000
--- a/health/health.d/geth.conf
+++ /dev/null
@@ -1,11 +0,0 @@
-#chainhead_header is expected momenterarily to be ahead. If its considerably ahead (e.g more than 5 blocks), then the node is definitely out of sync. 
- template: geth_chainhead_diff_between_header_block
-       on: geth.chainhead
-    class: Workload
-     type: ethereum_node
-component: geth
-    every: 10s
-     calc: $chain_head_block -  $chain_head_header
-    units: blocks
-     warn: $this != 0
-    delay: down 1m multiplier 1.5 max 1h
diff --git a/health/health.d/go.d.plugin.conf b/health/health.d/go.d.plugin.conf
deleted file mode 100644
index 7796a1bc8..000000000
--- a/health/health.d/go.d.plugin.conf
+++ /dev/null
@@ -1,18 +0,0 @@
-
-# make sure go.d.plugin data collection job is running
-
- template: go.d_job_last_collected_secs
-       on: netdata.go_plugin_execution_time
-    class: Errors
-     type: Netdata
-component: go.d.plugin
-   module: !* *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Go.d plugin last collection
-     info: Number of seconds since the last successful data collection
-       to: webmaster
diff --git a/health/health.d/haproxy.conf b/health/health.d/haproxy.conf
deleted file mode 100644
index 66a488fa4..000000000
--- a/health/health.d/haproxy.conf
+++ /dev/null
@@ -1,25 +0,0 @@
- template: haproxy_backend_server_status
-       on: haproxy_hs.down
-    class: Errors
-     type: Web Proxy
-component: HAProxy
-    units: failed servers
-    every: 10s
-   lookup: average -10s
-     crit: $this > 0
-  summary: HAProxy server status
-     info: Average number of failed haproxy backend servers over the last 10 seconds
-       to: sysadmin
-
- template: haproxy_backend_status
-       on: haproxy_hb.down
-    class: Errors
-     type: Web Proxy
-component: HAProxy
-    units: failed backend
-    every: 10s
-   lookup: average -10s
-     crit: $this > 0
-  summary: HAProxy backend status
-     info: Average number of failed haproxy backends over the last 10 seconds
-       to: sysadmin
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
deleted file mode 100644
index 566e815aa..000000000
--- a/health/health.d/hdfs.conf
+++ /dev/null
@@ -1,81 +0,0 @@
-
-# Common
-
- template: hdfs_capacity_usage
-       on: hdfs.capacity
-    class: Utilization
-     type: Storage
-component: HDFS
-     calc: ($used) * 100 / ($used + $remaining)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-     crit: $this > (($status == $CRITICAL) ? (80) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: HDFS datanodes space utilization
-     info: summary datanodes space capacity utilization
-       to: sysadmin
-
-
-# NameNode
-
- template: hdfs_missing_blocks
-       on: hdfs.blocks
-    class: Errors
-     type: Storage
-component: HDFS
-     calc: $missing
-    units: missing blocks
-    every: 10s
-     warn: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: HDFS missing blocks
-     info: number of missing blocks
-       to: sysadmin
-
-
- template: hdfs_stale_nodes
-       on: hdfs.data_nodes
-    class: Errors
-     type: Storage
-component: HDFS
-     calc: $stale
-    units: dead nodes
-    every: 10s
-     warn: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: HDFS stale datanodes
-     info: number of datanodes marked stale due to delayed heartbeat
-       to: sysadmin
-
-
- template: hdfs_dead_nodes
-       on: hdfs.data_nodes
-    class: Errors
-     type: Storage
-component: HDFS
-     calc: $dead
-    units: dead nodes
-    every: 10s
-     crit: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: HDFS dead datanodes
-     info: number of datanodes which are currently dead
-       to: sysadmin
-
-
-# DataNode
-
- template: hdfs_num_failed_volumes
-       on: hdfs.num_failed_volumes
-    class: Errors
-     type: Storage
-component: HDFS
-     calc: $fsds_num_failed_volumes
-    units: failed volumes
-    every: 10s
-     warn: $this > 0
-    delay: down 15m multiplier 1.5 max 1h
-  summary: HDFS failed volumes
-     info: number of failed volumes
-       to: sysadmin
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
deleted file mode 100644
index da5dec797..000000000
--- a/health/health.d/httpcheck.conf
+++ /dev/null
@@ -1,73 +0,0 @@
-
-# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
- template: httpcheck_web_service_up
-       on: httpcheck.status
-    class: Utilization
-     type: Web Server
-component: HTTP endpoint
-   lookup: average -1m unaligned percentage of success
-     calc: ($this < 75) ? (0) : ($this)
-    every: 5s
-    units: up/down
-     info: HTTP check endpoint ${label:url} liveness status
-       to: silent
-
- template: httpcheck_web_service_bad_content
-       on: httpcheck.status
-    class: Workload
-     type: Web Server
-component: HTTP endpoint
-   lookup: average -5m unaligned percentage of bad_content
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: HTTP check for ${label:url} unexpected content
-     info: Percentage of HTTP responses from ${label:url} with unexpected content in the last 5 minutes
-       to: webmaster
-
- template: httpcheck_web_service_bad_status
-       on: httpcheck.status
-    class: Workload
-     type: Web Server
-component: HTTP endpoint
-   lookup: average -5m unaligned percentage of bad_status
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: HTTP check for ${label:url} unexpected status
-     info: Percentage of HTTP responses from ${label:url} with unexpected status in the last 5 minutes
-       to: webmaster
-
- template: httpcheck_web_service_timeouts
-       on: httpcheck.status
-    class: Latency
-     type: Web Server
-component: HTTP endpoint
-   lookup: average -5m unaligned percentage of timeout
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: HTTP check for ${label:url} timeouts
-     info: Percentage of timed-out HTTP requests to ${label:url} in the last 5 minutes
-       to: webmaster
-
- template: httpcheck_web_service_no_connection
-       on: httpcheck.status
-    class: Errors
-     type: Other
-component: HTTP endpoint
-   lookup: average -5m unaligned percentage of no_connection
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: HTTP check for ${label:url} failed requests
-     info: Percentage of failed HTTP requests to ${label:url} in the last 5 minutes
-       to: webmaster
diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf
deleted file mode 100644
index 6d832bf00..000000000
--- a/health/health.d/ioping.conf
+++ /dev/null
@@ -1,14 +0,0 @@
- template: ioping_disk_latency
-       on: ioping.latency
-    class: Latency
-     type: System
-component: Disk
-   lookup: average -10s unaligned of latency
-    units: microseconds
-    every: 10s
-    green: 10000
-     warn: $this > $green
-    delay: down 30m multiplier 1.5 max 2h
-  summary: IO ping latency
-     info: Average I/O latency over the last 10 seconds
-       to: silent
diff --git a/health/health.d/ipc.conf b/health/health.d/ipc.conf
deleted file mode 100644
index f77f56065..000000000
--- a/health/health.d/ipc.conf
+++ /dev/null
@@ -1,34 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: semaphores_used
-       on: system.ipc_semaphores
-    class: Utilization
-     type: System
-component: IPC
-       os: linux
-    hosts: *
-     calc: $semaphores * 100 / $ipc_semaphores_max
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: IPC semaphores used
-     info: IPC semaphore utilization
-       to: sysadmin
-
-    alarm: semaphore_arrays_used
-       on: system.ipc_semaphore_arrays
-    class: Utilization
-     type: System
-component: IPC
-       os: linux
-    hosts: *
-     calc: $arrays * 100 / $ipc_semaphores_arrays_max
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: IPC semaphore arrays used
-     info: IPC semaphore arrays utilization
-       to: sysadmin
diff --git a/health/health.d/ipfs.conf b/health/health.d/ipfs.conf
deleted file mode 100644
index 4dfee3c7f..000000000
--- a/health/health.d/ipfs.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-
- template: ipfs_datastore_usage
-       on: ipfs.repo_size
-    class: Utilization
-     type: Data Sharing
-component: IPFS
-     calc: $size * 100 / $avail
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: IPFS datastore utilization
-     info: IPFS datastore utilization
-       to: sysadmin
diff --git a/health/health.d/ipmi.conf b/health/health.d/ipmi.conf
deleted file mode 100644
index cec2320a9..000000000
--- a/health/health.d/ipmi.conf
+++ /dev/null
@@ -1,28 +0,0 @@
- template: ipmi_sensor_state
-       on: ipmi.sensor_state
-    class: Errors
-     type: System
-component: IPMI
-     calc: $warning + $critical
-    units: state
-    every: 10s
-     warn: $warning > 0
-     crit: $critical > 0
-    delay: up 5m down 15m multiplier 1.5 max 1h
-  summary: IPMI sensor ${label:sensor} state
-     info: IPMI sensor ${label:sensor} (${label:component}) state
-       to: sysadmin
-
-    alarm: ipmi_events
-       on: ipmi.events
-    class: Utilization
-     type: System
-component: IPMI
-     calc: $events
-    units: events
-    every: 30s
-     warn: $this > 0
-    delay: up 5m down 15m multiplier 1.5 max 1h
-  summary: IPMI entries in System Event Log
-     info: number of events in the IPMI System Event Log (SEL)
-       to: silent
diff --git a/health/health.d/isc_dhcpd.conf b/health/health.d/isc_dhcpd.conf
deleted file mode 100644
index d1f93969a..000000000
--- a/health/health.d/isc_dhcpd.conf
+++ /dev/null
@@ -1,10 +0,0 @@
-# template: isc_dhcpd_leases_size
-#      on: isc_dhcpd.leases_total
-#   units: KB
-#   every: 60
-#    calc: $leases_size
-#    warn: $this > 3072
-#    crit: $this > 6144
-#   delay: up 2m down 5m
-#    info: dhcpd.leases file too big! Module can slow down your server.
-#      to: sysadmin
diff --git a/health/health.d/kubelet.conf b/health/health.d/kubelet.conf
deleted file mode 100644
index 8adf5f7d4..000000000
--- a/health/health.d/kubelet.conf
+++ /dev/null
@@ -1,151 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# -----------------------------------------------------------------------------
-
-# True (1) if the node is experiencing a configuration-related error, false (0) otherwise.
-
- template: kubelet_node_config_error
-       on: k8s_kubelet.kubelet_node_config_error
-    class: Errors
-     type: Kubernetes
-component: Kubelet
-     calc: $experiencing_error
-    units: bool
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 2h
-  summary: Kubelet node config error
-     info: The node is experiencing a configuration-related error (0: false, 1: true)
-       to: sysadmin
-
-# Failed Token() requests to the alternate token source
-
- template: kubelet_token_requests
-       on: k8s_kubelet.kubelet_token_requests
-    class: Errors
-     type: Kubernetes
-component: Kubelet
-   lookup: sum -10s of failed
-    units: requests
-    every: 10s
-     warn: $this > 0
-    delay: down 1m multiplier 1.5 max 2h
-  summary: Kubelet failed token requests
-     info: Number of failed Token() requests to the alternate token source
-       to: sysadmin
-
-# Docker and runtime operation errors
-
- template: kubelet_operations_error
-       on: k8s_kubelet.kubelet_operations_errors
-    class: Errors
-     type: Kubernetes
-component: Kubelet
-   lookup: sum -1m
-    units: errors
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (0) : (20))
-    delay: up 30s down 1m multiplier 1.5 max 2h
-  summary: Kubelet runtime errors
-     info: Number of Docker or runtime operation errors
-       to: sysadmin
-
-# -----------------------------------------------------------------------------
-
-# Pod Lifecycle Event Generator Relisting Latency
-
-# 1. calculate the pleg relisting latency for 1m (quantile 0.5, quantile 0.9, quantile 0.99)
-# 2. do the same for the last 10s
-# 3. raise an alarm if the later is:
-# - 2x the first for quantile 0.5
-# - 4x the first for quantile 0.9
-# - 8x the first for quantile 0.99
-#
-# we assume the minimum latency is 1000 microseconds
-
-# quantile 0.5
-
- template: kubelet_1m_pleg_relist_latency_quantile_05
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -1m unaligned of 0.5
-    units: microseconds
-    every: 10s
-     info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.5)
-
- template: kubelet_10s_pleg_relist_latency_quantile_05
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -10s unaligned of 0.5
-     calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_05 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_05))
-    every: 10s
-    units: %
-     warn: $this > (($status >= $WARNING)?(100):(200))
-     crit: $this > (($status >= $WARNING)?(200):(400))
-    delay: down 1m multiplier 1.5 max 2h
-  summary: Kubelet relisting latency (quantile 0.5)
-     info: Ratio of average Pod Lifecycle Event Generator relisting latency over the last 10 seconds, \
-           compared to the last minute (quantile 0.5)
-       to: sysadmin
-
-# quantile 0.9
-
- template: kubelet_1m_pleg_relist_latency_quantile_09
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -1m unaligned of 0.9
-    units: microseconds
-    every: 10s
-     info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.9)
-
- template: kubelet_10s_pleg_relist_latency_quantile_09
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -10s unaligned of 0.9
-     calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_09 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_09))
-    every: 10s
-    units: %
-     warn: $this > (($status >= $WARNING)?(200):(400))
-     crit: $this > (($status >= $WARNING)?(400):(800))
-    delay: down 1m multiplier 1.5 max 2h
-  summary: Kubelet relisting latency (quantile 0.9)
-     info: Ratio of average Pod Lifecycle Event Generator relisting latency over the last 10 seconds, \
-           compared to the last minute (quantile 0.9)
-       to: sysadmin
-
-# quantile 0.99
-
- template: kubelet_1m_pleg_relist_latency_quantile_099
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -1m unaligned of 0.99
-    units: microseconds
-    every: 10s
-     info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.99)
-
- template: kubelet_10s_pleg_relist_latency_quantile_099
-       on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Latency
-     type: Kubernetes
-component: Kubelet
-   lookup: average -10s unaligned of 0.99
-     calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_099 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_099))
-    every: 10s
-    units: %
-     warn: $this > (($status >= $WARNING)?(400):(800))
-     crit: $this > (($status >= $WARNING)?(800):(1200))
-    delay: down 1m multiplier 1.5 max 2h
-  summary: Kubelet relisting latency (quantile 0.99)
-     info: Ratio of average Pod Lifecycle Event Generator relisting latency over the last 10 seconds, \
-           compared to the last minute (quantile 0.99)
-       to: sysadmin
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
deleted file mode 100644
index b0d35e752..000000000
--- a/health/health.d/linux_power_supply.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# Alert on low battery capacity.
-
- template: linux_power_supply_capacity
-       on: powersupply.capacity
-    class: Utilization
-     type: Power Supply
-component: Battery
-     calc: $capacity
-    units: %
-    every: 10s
-     warn: $this < 10
-    delay: up 30s down 5m multiplier 1.2 max 1h
-  summary: Power supply capacity
-     info: Percentage of remaining power supply capacity
-       to: silent
diff --git a/health/health.d/load.conf b/health/health.d/load.conf
deleted file mode 100644
index fd8bf9396..000000000
--- a/health/health.d/load.conf
+++ /dev/null
@@ -1,72 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# Calculate the base trigger point for the load average alarms.
-# This is the maximum number of CPU's in the system over the past 1
-# minute, with a special case for a single CPU of setting the trigger at 2.
-    alarm: load_cpu_number
-       on: system.load
-    class: Utilization
-     type: System
-component: Load
-       os: linux
-    hosts: *
-     calc: ($active_processors == nan or $active_processors == 0) ? (nan) : ( ($active_processors < 2) ? ( 2 ) : ( $active_processors ) )
-    units: cpus
-    every: 1m
-     info: Number of active CPU cores in the system
-
-# Send alarms if the load average is unusually high.
-# These intentionally _do not_ calculate the average over the sampled
-# time period because the values being checked already are averages.
-
-    alarm: load_average_15
-       on: system.load
-    class: Utilization
-     type: System
-component: Load
-       os: linux
-    hosts: *
-   lookup: max -1m unaligned of load15
-     calc: ($load_cpu_number == nan) ? (nan) : ($this)
-    units: load
-    every: 1m
-     warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 175 : 200)
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Host load average (15 minutes)
-     info: System load average for the past 15 minutes
-       to: silent
-
-    alarm: load_average_5
-       on: system.load
-    class: Utilization
-     type: System
-component: Load
-       os: linux
-    hosts: *
-   lookup: max -1m unaligned of load5
-     calc: ($load_cpu_number == nan) ? (nan) : ($this)
-    units: load
-    every: 1m
-     warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 350 : 400)
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System load average (5 minutes)
-     info: System load average for the past 5 minutes
-       to: silent
-
-    alarm: load_average_1
-       on: system.load
-    class: Utilization
-     type: System
-component: Load
-       os: linux
-    hosts: *
-   lookup: max -1m unaligned of load1
-     calc: ($load_cpu_number == nan) ? (nan) : ($this)
-    units: load
-    every: 1m
-     warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 700 : 800)
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System load average (1 minute)
-     info: System load average for the past 1 minute
-       to: silent
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
deleted file mode 100644
index 90f97d851..000000000
--- a/health/health.d/mdstat.conf
+++ /dev/null
@@ -1,43 +0,0 @@
-
- template: mdstat_disks
-       on: md.disks
-    class: Errors
-     type: System
-component: RAID
-    units: failed devices
-    every: 10s
-     calc: $down
-     warn: $this > 0
-  summary: MD array device ${label:device} down
-     info: Number of devices in the down state for the ${label:device} ${label:raid_level} array. \
-           Any number > 0 indicates that the array is degraded.
-       to: sysadmin
-
- template: mdstat_mismatch_cnt
-       on: md.mismatch_cnt
-    class: Errors
-     type: System
-component: RAID
-chart labels: raid_level=!raid1 !raid10 *
-    units: unsynchronized blocks
-     calc: $count
-    every: 60s
-     warn: $this > 1024
-    delay: up 30m
-  summary: MD array device ${label:device} unsynchronized blocks
-     info: Number of unsynchronized blocks for the ${label:device} ${label:raid_level} array
-       to: silent
-
- template: mdstat_nonredundant_last_collected
-       on: md.nonredundant
-    class: Latency
-     type: System
-component: RAID
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-  summary: MD array last collected
-     info: Number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf
deleted file mode 100644
index 118997a59..000000000
--- a/health/health.d/megacli.conf
+++ /dev/null
@@ -1,76 +0,0 @@
-
-## Adapters (controllers)
-
- template: megacli_adapter_state
-       on: megacli.adapter_degraded
-    class: Errors
-     type: System
-component: RAID
-   lookup: max -10s foreach *
-    units: boolean
-    every: 10s
-     crit: $this > 0
-    delay: down 5m multiplier 2 max 10m
-  summary: MegaCLI adapter state
-     info: Adapter is in the degraded state (0: false, 1: true)
-       to: sysadmin
-
-## Physical Disks
-
- template: megacli_pd_predictive_failures
-       on: megacli.pd_predictive_failure
-    class: Errors
-     type: System
-component: RAID
-   lookup: sum -10s foreach *
-    units: predictive failures
-    every: 10s
-     warn: $this > 0
-    delay: up 1m down 5m multiplier 2 max 10m
-  summary: MegaCLI physical drive predictive failures
-     info: Number of physical drive predictive failures
-       to: sysadmin
-
- template: megacli_pd_media_errors
-       on: megacli.pd_media_error
-    class: Errors
-     type: System
-component: RAID
-   lookup: sum -10s foreach *
-    units: media errors
-    every: 10s
-     warn: $this > 0
-    delay: up 1m down 5m multiplier 2 max 10m
-  summary: MegaCLI physical drive errors
-     info: Number of physical drive media errors
-       to: sysadmin
-
-## Battery Backup Units (BBU)
-
- template: megacli_bbu_relative_charge
-       on: megacli.bbu_relative_charge
-    class: Workload
-     type: System
-component: RAID
-   lookup: average -10s
-    units: percent
-    every: 10s
-     warn: $this <= (($status >= $WARNING)  ? (85) : (80))
-     crit: $this <= (($status == $CRITICAL)  ? (50) : (40))
-  summary: MegaCLI BBU charge state
-     info: Average battery backup unit (BBU) relative state of charge over the last 10 seconds
-       to: sysadmin
-
- template: megacli_bbu_cycle_count
-       on: megacli.bbu_cycle_count
-    class: Workload
-     type: System
-component: RAID
-   lookup: average -10s
-    units: cycles
-    every: 10s
-     warn: $this >= 100
-     crit: $this >= 500
-  summary: MegaCLI BBU cycles count
-     info: Average battery backup unit (BBU) charge cycles count over the last 10 seconds
-       to: sysadmin
diff --git a/health/health.d/memcached.conf b/health/health.d/memcached.conf
deleted file mode 100644
index 77ca0afa9..000000000
--- a/health/health.d/memcached.conf
+++ /dev/null
@@ -1,50 +0,0 @@
-
-# detect if memcached cache is full
-
- template: memcached_cache_memory_usage
-       on: memcached.cache
-    class: Utilization
-     type: KV Storage
-component: Memcached
-     calc: $used * 100 / ($used + $available)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-     crit: $this > (($status == $CRITICAL) ? (80) : (90))
-    delay: up 0 down 15m multiplier 1.5 max 1h
-  summary: Memcached memory utilization
-     info: Cache memory utilization
-       to: dba
-
-
-# find the rate memcached cache is filling
-
- template: memcached_cache_fill_rate
-       on: memcached.cache
-    class: Utilization
-     type: KV Storage
-component: Memcached
-   lookup: min -10m at -50m unaligned of available
-     calc: ($this - $available) / (($now - $after) / 3600)
-    units: KB/hour
-    every: 1m
-     info: Average rate the cache fills up (positive), or frees up (negative) space over the last hour
-
-
-# find the hours remaining until memcached cache is full
-
- template: memcached_out_of_cache_space_time
-       on: memcached.cache
-    class: Utilization
-     type: KV Storage
-component: Memcached
-     calc: ($memcached_cache_fill_rate > 0) ? ($available / $memcached_cache_fill_rate) : (inf)
-    units: hours
-    every: 10s
-     warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
-     crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Memcached estimation of lack of cache space
-     info: Estimated time the cache will run out of space \
-           if the system continues to add data at the same rate as the past hour
-       to: dba
diff --git a/health/health.d/memory.conf b/health/health.d/memory.conf
deleted file mode 100644
index 5ab3d2d92..000000000
--- a/health/health.d/memory.conf
+++ /dev/null
@@ -1,85 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: 1hour_memory_hw_corrupted
-       on: mem.hwcorrupt
-    class: Errors
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-     calc: $HardwareCorrupted
-    units: MB
-    every: 10s
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 1h
-  summary: System corrupted memory
-     info: Amount of memory corrupted due to a hardware failure
-       to: sysadmin
-
-## ECC Controller
-
- template: ecc_memory_mc_correctable
-       on: mem.edac_mc
-    class: Errors
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-   lookup: sum -10m unaligned of correctable, correctable_noinfo
-    units: errors
-    every: 1m
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 1h
-  summary: System ECC memory ${label:controller} correctable errors
-     info: Memory controller ${label:controller} ECC correctable errors in the last 10 minutes
-       to: sysadmin
-
- template: ecc_memory_mc_uncorrectable
-       on: mem.edac_mc
-    class: Errors
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-   lookup: sum -10m unaligned of uncorrectable,uncorrectable_noinfo
-    units: errors
-    every: 1m
-     crit: $this > 0
-    delay: down 1h multiplier 1.5 max 1h
-  summary: System ECC memory ${label:controller} uncorrectable errors
-     info: Memory controller ${label:controller} ECC uncorrectable errors in the last 10 minutes
-       to: sysadmin
-
-## ECC DIMM
-
- template: ecc_memory_dimm_correctable
-       on: mem.edac_mc_dimm
-    class: Errors
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-   lookup: sum -10m unaligned of correctable
-    units: errors
-    every: 1m
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 1h
-  summary: System ECC memory DIMM ${label:dimm} correctable errors
-     info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes
-       to: sysadmin
-
- template: ecc_memory_dimm_uncorrectable
-       on: mem.edac_mc_dimm
-    class: Errors
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-   lookup: sum -10m unaligned of uncorrectable
-    units: errors
-    every: 1m
-     crit: $this > 0
-    delay: down 1h multiplier 1.5 max 1h
-  summary: System ECC memory DIMM ${label:dimm} uncorrectable errors
-     info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes
-       to: sysadmin
diff --git a/health/health.d/ml.conf b/health/health.d/ml.conf
deleted file mode 100644
index aef9b0368..000000000
--- a/health/health.d/ml.conf
+++ /dev/null
@@ -1,56 +0,0 @@
-# below are some examples of using the `anomaly-bit` option to define alerts based on anomaly 
-# rates as opposed to raw metric values. You can read more about the anomaly-bit and Netdata's 
-# native anomaly detection here: 
-# https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal
-
-# some examples below are commented, you would need to uncomment and adjust as desired to enable them.
-
-# node level anomaly rate
-# https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate
-# if node level anomaly rate is above 1% then warning (pick your own threshold that works best via trial and error).
- template: ml_1min_node_ar
-       on: anomaly_detection.anomaly_rate
-    class: Workload
-     type: System
-component: ML
-       os: *
-    hosts: *
-   lookup: average -1m of anomaly_rate
-     calc: $this
-    units: %
-    every: 30s
-     warn: $this > 1
-  summary: ML node anomaly rate
-     info: Rolling 1min node level anomaly rate
-       to: silent
-
-# alert per dimension example
-# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
-# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
-# template: ml_5min_cpu_dims
-#       on: system.cpu
-#       os: linux
-#    hosts: *
-#   lookup: average -5m anomaly-bit foreach *
-#     calc: $this
-#    units: %
-#    every: 30s
-#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
-#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
-#     info: rolling 5min anomaly rate for each system.cpu dimension
-
-# alert per chart example
-# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
-# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
-# template: ml_5min_cpu_chart
-#       on: system.cpu
-#       os: linux
-#    hosts: *
-#   lookup: average -5m anomaly-bit of *
-#     calc: $this
-#    units: %
-#    every: 30s
-#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
-#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
-#     info: rolling 5min anomaly rate for system.cpu chart
-
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf
deleted file mode 100644
index 572560b4e..000000000
--- a/health/health.d/mysql.conf
+++ /dev/null
@@ -1,187 +0,0 @@
-
-# slow queries
-
- template: mysql_10s_slow_queries
-       on: mysql.queries
-    class: Latency
-     type: Database
-component: MySQL
-   lookup: sum -10s of slow_queries
-    units: slow queries
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (5)  : (10))
-     crit: $this > (($status == $CRITICAL) ? (10) : (20))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: MySQL slow queries
-     info: Number of slow queries in the last 10 seconds
-       to: dba
-
-
-# -----------------------------------------------------------------------------
-# lock waits
-
- template: mysql_10s_table_locks_immediate
-       on: mysql.table_locks
-    class: Utilization
-     type: Database
-component: MySQL
-   lookup: sum -10s absolute of immediate
-    units: immediate locks
-    every: 10s
-  summary: MySQL table immediate locks
-     info: Number of table immediate locks in the last 10 seconds
-       to: dba
-
- template: mysql_10s_table_locks_waited
-       on: mysql.table_locks
-    class: Latency
-     type: Database
-component: MySQL
-   lookup: sum -10s absolute of waited
-    units: waited locks
-    every: 10s
-  summary: MySQL table waited locks
-     info: Number of table waited locks in the last 10 seconds
-       to: dba
-
- template: mysql_10s_waited_locks_ratio
-       on: mysql.table_locks
-    class: Latency
-     type: Database
-component: MySQL
-     calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (10) : (25))
-     crit: $this > (($status == $CRITICAL) ? (25) : (50))
-    delay: down 30m multiplier 1.5 max 1h
-  summary: MySQL waited table locks ratio
-     info: Ratio of waited table locks over the last 10 seconds
-       to: dba
-
-
-# -----------------------------------------------------------------------------
-# connections
-
- template: mysql_connections
-       on: mysql.connections_active
-    class: Utilization
-     type: Database
-component: MySQL
-     calc: $active * 100 / $limit
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (60) : (70))
-     crit: $this > (($status == $CRITICAL) ? (80) : (90))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: MySQL connections utilization
-     info: Client connections utilization
-       to: dba
-
-
-# -----------------------------------------------------------------------------
-# replication
-
- template: mysql_replication
-       on: mysql.slave_status
-    class: Errors
-     type: Database
-component: MySQL
-     calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
-    units: ok/failed
-    every: 10s
-     crit: $this == 0
-    delay: down 5m multiplier 1.5 max 1h
-  summary: MySQL replication status
-     info: Replication status (0: stopped, 1: working)
-       to: dba
-
- template: mysql_replication_lag
-       on: mysql.slave_behind
-    class: Latency
-     type: Database
-component: MySQL
-     calc: $seconds
-    units: seconds
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (5)  : (10))
-     crit: $this > (($status == $CRITICAL) ? (10) : (30))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: MySQL replication lag
-     info: Difference between the timestamp of the latest transaction processed by the SQL thread and \
-           the timestamp of the same transaction when it was processed on the master
-       to: dba
-
-
-# -----------------------------------------------------------------------------
-# galera cluster size
-
- template: mysql_galera_cluster_size_max_2m
-       on: mysql.galera_cluster_size
-    class: Utilization
-     type: Database
-component: MySQL
-   lookup: max -2m at -1m unaligned
-    units: nodes
-    every: 10s
-     info: maximum galera cluster size in the last 2 minutes starting one minute ago
-       to: dba
-
- template: mysql_galera_cluster_size
-       on: mysql.galera_cluster_size
-    class: Utilization
-     type: Database
-component: MySQL
-     calc: $nodes
-    units: nodes
-    every: 10s
-     warn: $this > $mysql_galera_cluster_size_max_2m
-     crit: $this < $mysql_galera_cluster_size_max_2m
-    delay: up 20s down 5m multiplier 1.5 max 1h
-  summary: MySQL galera cluster size
-     info: Current galera cluster size, compared to the maximum size in the last 2 minutes
-       to: dba
-
-# galera node state
-
- template: mysql_galera_cluster_state_warn
-       on: mysql.galera_cluster_state
-    class: Errors
-     type: Database
-component: MySQL
-     calc: $donor + $joined
-    every: 10s
-     warn: $this != nan AND $this != 0
-    delay: up 30s down 5m multiplier 1.5 max 1h
-  summary: MySQL galera node state
-     info: Galera node state is either Donor/Desynced or Joined.
-       to: dba
-
- template: mysql_galera_cluster_state_crit
-       on: mysql.galera_cluster_state
-    class: Errors
-     type: Database
-component: MySQL
-     calc: $undefined + $joining + $error
-    every: 10s
-     crit: $this != nan AND $this != 0
-    delay: up 30s down 5m multiplier 1.5 max 1h
-  summary: MySQL galera node state
-     info: Galera node state is either Undefined or Joining or Error.
-       to: dba
-
-# galera node status
-
- template: mysql_galera_cluster_status
-       on: mysql.galera_cluster_status
-    class: Errors
-     type: Database
-component: MySQL
-     calc: $primary
-    every: 10s
-     crit: $this != nan AND $this != 1
-    delay: up 30s down 5m multiplier 1.5 max 1h
-  summary: MySQL galera cluster status
-     info: Galera node is part of a nonoperational component. \
-           This occurs in cases of multiple membership changes that result in a loss of Quorum or in cases of split-brain situations.
-       to: dba
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
deleted file mode 100644
index 2dfe6bbaf..000000000
--- a/health/health.d/net.conf
+++ /dev/null
@@ -1,258 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# -----------------------------------------------------------------------------
-# net traffic overflow
-
- template: interface_speed
-       on: net.net
-    class: Latency
-     type: System
-component: Network
-       os: *
-    hosts: *
-     calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max / 1000) : ( nan )
-    units: Mbit
-    every: 10s
-     info: Network interface ${label:device} current speed
-
- template: 1m_received_traffic_overflow
-       on: net.net
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m unaligned absolute of received
-     calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan )
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (85) : (90))
-    delay: up 1m down 1m multiplier 1.5 max 1h
-  summary: System network interface ${label:device} inbound utilization
-     info: Average inbound utilization for the network interface ${label:device} over the last minute
-       to: silent
-
- template: 1m_sent_traffic_overflow
-       on: net.net
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m unaligned absolute of sent
-     calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan )
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (85) : (90))
-    delay: up 1m down 1m multiplier 1.5 max 1h
-  summary: System network interface ${label:device} outbound utilization
-     info: Average outbound utilization for the network interface ${label:device} over the last minute
-       to: silent
-
-# -----------------------------------------------------------------------------
-# dropped packets
-
-# check if an interface is dropping packets
-# the alarm is checked every 1 minute
-# and examines the last 10 minutes of data
-#
-# it is possible to have expected packet drops on an interface for some network configurations
-# look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information
-
- template: net_interface_inbound_packets
-       on: net.packets
-    class: Workload
-     type: System
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute of received
-    units: packets
-    every: 1m
-  summary: Network interface ${label:device} received packets
-     info: Received packets for the network interface ${label:device} in the last 10 minutes
-
- template: net_interface_outbound_packets
-       on: net.packets
-    class: Workload
-     type: System
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute of sent
-    units: packets
-    every: 1m
-  summary: Network interface ${label:device} sent packets
-     info: Sent packets for the network interface ${label:device} in the last 10 minutes
-
- template: inbound_packets_dropped_ratio
-       on: net.drops
-    class: Errors
-     type: System
-component: Network
-       os: *
-    hosts: *
-chart labels: device=!wl* *
-   lookup: sum -10m unaligned absolute of inbound
-     calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0))
-    units: %
-    every: 1m
-     warn: $this >= 2
-    delay: up 1m down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} inbound drops
-     info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
-       to: silent
-
- template: outbound_packets_dropped_ratio
-       on: net.drops
-    class: Errors
-     type: System
-component: Network
-       os: *
-    hosts: *
-chart labels: device=!wl* *
-   lookup: sum -10m unaligned absolute of outbound
-     calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0))
-    units: %
-    every: 1m
-     warn: $this >= 2
-    delay: up 1m down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} outbound drops
-     info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
-       to: silent
-
- template: wifi_inbound_packets_dropped_ratio
-       on: net.drops
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-chart labels: device=wl*
-   lookup: sum -10m unaligned absolute of received
-     calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0))
-    units: %
-    every: 1m
-     warn: $this >= 10
-    delay: up 1m down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} inbound drops ratio
-     info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
-       to: silent
-
- template: wifi_outbound_packets_dropped_ratio
-       on: net.drops
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-chart labels: device=wl*
-   lookup: sum -10m unaligned absolute of sent
-     calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0))
-    units: %
-    every: 1m
-     warn: $this >= 10
-    delay: up 1m down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} outbound drops ratio
-     info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
-       to: silent
-
-# -----------------------------------------------------------------------------
-# interface errors
-
- template: interface_inbound_errors
-       on: net.errors
-    class: Errors
-     type: System
-component: Network
-       os: freebsd
-    hosts: *
-   lookup: sum -10m unaligned absolute of inbound
-    units: errors
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} inbound errors
-     info: Number of inbound errors for the network interface ${label:device} in the last 10 minutes
-       to: silent
-
- template: interface_outbound_errors
-       on: net.errors
-    class: Errors
-     type: System
-component: Network
-       os: freebsd
-    hosts: *
-   lookup: sum -10m unaligned absolute of outbound
-    units: errors
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} outbound errors
-     info: Number of outbound errors for the network interface ${label:device} in the last 10 minutes
-       to: silent
-
-# -----------------------------------------------------------------------------
-# FIFO errors
-
-# check if an interface is having FIFO
-# buffer errors
-# the alarm is checked every 1 minute
-# and examines the last 10 minutes of data
-
- template: 10min_fifo_errors
-       on: net.fifo
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: sum -10m unaligned absolute
-    units: errors
-    every: 1m
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System network interface ${label:device} FIFO errors
-     info: Number of FIFO errors for the network interface ${label:device} in the last 10 minutes
-       to: silent
-
-# -----------------------------------------------------------------------------
-# check for packet storms
-
-# 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
-# 2. do the same for the last 10s
-# 3. raise an alarm if the later is 10x or 20x the first
-# we assume the minimum packet storm should at least have
-# 10000 packets/s, average of the last 10 seconds
-
- template: 1m_received_packets_rate
-       on: net.packets
-    class: Workload
-     type: System
-component: Network
-       os: linux freebsd
-    hosts: *
-   lookup: average -1m unaligned of received
-    units: packets
-    every: 10s
-     info: Average number of packets received by the network interface ${label:device} over the last minute
-
- template: 10s_received_packets_storm
-       on: net.packets
-    class: Workload
-     type: System
-component: Network
-       os: linux freebsd
-    hosts: *
-   lookup: average -10s unaligned of received
-     calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
-    every: 10s
-    units: %
-     warn: $this > (($status >= $WARNING)?(200):(5000))
-     crit: $this > (($status == $CRITICAL)?(5000):(6000))
-  options: no-clear-notification
-  summary: System network interface ${label:device} inbound packet storm
-     info: Ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
-           compared to the rate over the last minute
-       to: silent
diff --git a/health/health.d/netfilter.conf b/health/health.d/netfilter.conf
deleted file mode 100644
index 417105d43..000000000
--- a/health/health.d/netfilter.conf
+++ /dev/null
@@ -1,20 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: netfilter_conntrack_full
-       on: netfilter.conntrack_sockets
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: max -10s unaligned of connections
-     calc: $this * 100 / $netfilter_conntrack_max
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (85) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (95))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: System Netfilter connection tracker utilization
-     info: Netfilter connection tracker table size utilization
-       to: sysadmin
diff --git a/health/health.d/nvme.conf b/health/health.d/nvme.conf
deleted file mode 100644
index aea402e88..000000000
--- a/health/health.d/nvme.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: nvme_device_critical_warnings_state
-       on: nvme.device_critical_warnings_state
-    class: Errors
-     type: System
-component: Disk
-   lookup: max -30s unaligned
-    units: state
-    every: 10s
-     crit: $this != nan AND $this != 0
-    delay: down 5m multiplier 1.5 max 2h
-  summary: NVMe device ${label:device} state
-     info: NVMe device ${label:device} has critical warnings
-       to: sysadmin
diff --git a/health/health.d/pihole.conf b/health/health.d/pihole.conf
deleted file mode 100644
index c4db835ce..000000000
--- a/health/health.d/pihole.conf
+++ /dev/null
@@ -1,33 +0,0 @@
-
-# Blocklist last update time.
-# Default update interval is a week.
-
- template: pihole_blocklist_last_update
-       on: pihole.blocklist_last_update
-    class: Errors
-     type: Ad Filtering
-component: Pi-hole
-    every: 10s
-    units: seconds
-     calc: $ago
-     warn: $this > 60 * 60 * 24 * 30
-  summary: Pi-hole blocklist last update
-     info: gravity.list (blocklist) file last update time
-       to: sysadmin
-
-# Pi-hole's ability to block unwanted domains.
-# Should be enabled. The whole point of Pi-hole!
-
- template: pihole_status
-       on: pihole.unwanted_domains_blocking_status
-    class: Errors
-     type: Ad Filtering
-component: Pi-hole
-    every: 10s
-    units: status
-     calc: $disabled
-     warn: $this != nan AND $this == 1
-    delay: up 2m down 5m
-  summary: Pi-hole domains blocking status
-     info: Unwanted domains blocking is disabled
-       to: sysadmin
diff --git a/health/health.d/ping.conf b/health/health.d/ping.conf
deleted file mode 100644
index 0e434420d..000000000
--- a/health/health.d/ping.conf
+++ /dev/null
@@ -1,50 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: ping_host_reachable
-       on: ping.host_packet_loss
-    class: Errors
-     type: Other
-component: Network
-   lookup: average -30s unaligned of loss
-     calc: $this != nan AND $this < 100
-    units: up/down
-    every: 10s
-     crit: $this == 0
-    delay: down 30m multiplier 1.5 max 2h
-  summary: Host ${label:host} ping status
-     info: Network host ${label:host} reachability status
-       to: sysadmin
-
- template: ping_packet_loss
-       on: ping.host_packet_loss
-    class: Errors
-     type: Other
-component: Network
-   lookup: average -10m unaligned of loss
-    green: 5
-      red: 10
-    units: %
-    every: 10s
-     warn: $this > $green
-     crit: $this > $red
-    delay: down 30m multiplier 1.5 max 2h
-  summary: Host ${label:host} ping packet loss
-     info: Packet loss percentage to the network host ${label:host} over the last 10 minutes
-       to: sysadmin
-
- template: ping_host_latency
-       on: ping.host_rtt
-    class: Latency
-     type: Other
-component: Network
-   lookup: average -10s unaligned of avg
-    units: ms
-    every: 10s
-    green: 500
-      red: 1000
-     warn: $this > $green OR $max > $red
-     crit: $this > $red
-    delay: down 30m multiplier 1.5 max 2h
-  summary: Host ${label:host} ping latency
-     info: Average latency to the network host ${label:host} over the last 10 seconds
-       to: sysadmin
diff --git a/health/health.d/plugin.conf b/health/health.d/plugin.conf
deleted file mode 100644
index 8615a0213..000000000
--- a/health/health.d/plugin.conf
+++ /dev/null
@@ -1,12 +0,0 @@
- template: plugin_availability_status
-       on: netdata.plugin_availability_status
-    class: Errors
-     type: Netdata
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : (20 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Plugin ${label:_collect_plugin} availability status
-     info: the amount of time that ${label:_collect_plugin} did not report its availability status
-       to: sysadmin
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf
deleted file mode 100644
index 281731c86..000000000
--- a/health/health.d/portcheck.conf
+++ /dev/null
@@ -1,44 +0,0 @@
-
-# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
- template: portcheck_service_reachable
-       on: portcheck.status
-    class: Workload
-     type: Other
-component: TCP endpoint
-   lookup: average -1m unaligned percentage of success
-     calc: ($this < 75) ? (0) : ($this)
-    every: 5s
-    units: up/down
-  summary: Portcheck status for ${label:host}:${label:port}
-     info: TCP host ${label:host} port ${label:port} liveness status
-       to: silent
-
- template: portcheck_connection_timeouts
-       on: portcheck.status
-    class: Errors
-     type: Other
-component: TCP endpoint
-   lookup: average -5m unaligned percentage of timeout
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Portcheck timeouts for ${label:host}:${label:port}
-     info: Percentage of timed-out TCP connections to host ${label:host} port ${label:port} in the last 5 minutes
-       to: sysadmin
-
- template: portcheck_connection_fails
-       on: portcheck.status
-    class: Errors
-     type: Other
-component: TCP endpoint
-   lookup: average -5m unaligned percentage of no_connection,failed
-    every: 10s
-    units: %
-     warn: $this >= 10 AND $this < 40
-     crit: $this >= 40
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Portcheck fails for ${label:host}:${label:port}
-     info: Percentage of failed TCP connections to host ${label:host} port ${label:port} in the last 5 minutes
-       to: sysadmin
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf
deleted file mode 100644
index de4c0078e..000000000
--- a/health/health.d/postgres.conf
+++ /dev/null
@@ -1,228 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: postgres_total_connection_utilization
-       on: postgres.connections_utilization
-    class: Utilization
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of used
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-     crit: $this > (($status == $CRITICAL) ? (80) : (90))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL connection utilization
-     info: Average total connection utilization over the last minute
-       to: dba
-
- template: postgres_acquired_locks_utilization
-       on: postgres.locks_utilization
-    class: Utilization
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of used
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (15) : (20))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL acquired locks utilization
-     info: Average acquired locks utilization over the last minute
-       to: dba
-
- template: postgres_txid_exhaustion_perc
-       on: postgres.txid_exhaustion_perc
-    class: Utilization
-     type: Database
-component: PostgreSQL
-    hosts: *
-     calc: $txid_exhaustion	
-    units: %
-    every: 1m
-     warn: $this > 90
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL TXID exhaustion
-     info: Percent towards TXID wraparound
-       to: dba
-
-# Database alarms
-
- template: postgres_db_cache_io_ratio
-       on: postgres.db_cache_io_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of miss
-     calc: 100 - $this
-    units: %
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (70) : (60))
-     crit: $this < (($status == $CRITICAL) ? (60) : (50))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL DB ${label:database} cache hit ratio
-     info: Average cache hit ratio in db ${label:database} over the last minute
-       to: dba
-
- template: postgres_db_transactions_rollback_ratio	
-       on: postgres.db_transactions_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -5m unaligned of rollback
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (0) : (2))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL DB ${label:database} aborted transactions
-     info: Average aborted transactions percentage in db ${label:database} over the last five minutes
-       to: dba
-
- template: postgres_db_deadlocks_rate
-       on: postgres.db_deadlocks_rate
-    class: Errors
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: sum -1m unaligned of deadlocks
-    units: deadlocks
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (0) : (10))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL DB ${label:database} deadlocks rate
-     info: Number of deadlocks detected in db ${label:database} in the last minute
-       to: dba
-
-# Table alarms
-
- template: postgres_table_cache_io_ratio
-       on: postgres.table_cache_io_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of miss
-     calc: 100 - $this
-    units: %
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (70) : (60))
-     crit: $this < (($status == $CRITICAL) ? (60) : (50))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} cache hit ratio
-     info: Average cache hit ratio in db ${label:database} table ${label:table} over the last minute
-       to: dba
-
- template: postgres_table_index_cache_io_ratio
-       on: postgres.table_index_cache_io_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of miss
-     calc: 100 - $this
-    units: %
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (70) : (60))
-     crit: $this < (($status == $CRITICAL) ? (60) : (50))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} index cache hit ratio
-     info: Average index cache hit ratio in db ${label:database} table ${label:table} over the last minute
-       to: dba
-
- template: postgres_table_toast_cache_io_ratio
-       on: postgres.table_toast_cache_io_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of miss
-     calc: 100 - $this
-    units: %
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (70) : (60))
-     crit: $this < (($status == $CRITICAL) ? (60) : (50))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} toast cache hit ratio
-     info: Average TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
-       to: dba
-
- template: postgres_table_toast_index_cache_io_ratio
-       on: postgres.table_toast_index_cache_io_ratio
-    class: Workload
-     type: Database
-component: PostgreSQL
-    hosts: *
-   lookup: average -1m unaligned of miss
-     calc: 100 - $this
-    units: %
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (70) : (60))
-     crit: $this < (($status == $CRITICAL) ? (60) : (50))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} index toast hit ratio
-     info: average index TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
-       to: dba
-
- template: postgres_table_bloat_size_perc
-       on: postgres.table_bloat_size_perc
-    class: Errors
-     type: Database
-component: PostgreSQL
-    hosts: *
-     calc: ($table_size > (1024 * 1024 * 100)) ? ($bloat) : (0)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (60) : (70))
-     crit: $this > (($status == $CRITICAL) ? (70) : (80))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} bloat size
-     info: Bloat size percentage in db ${label:database} table ${label:table}
-       to: dba
-
- template: postgres_table_last_autovacuum_time
-       on: postgres.table_autovacuum_since_time
-    class: Errors
-     type: Database
-component: PostgreSQL
-    hosts: !*
-     calc: $time
-    units: seconds
-    every: 1m
-     warn: $this != nan AND $this > (60 * 60 * 24 * 7)
-  summary: PostgreSQL table ${label:table} db ${label:database} last autovacuum
-     info: Time elapsed since db ${label:database} table ${label:table} was vacuumed by the autovacuum daemon
-       to: dba
-
- template: postgres_table_last_autoanalyze_time
-       on: postgres.table_autoanalyze_since_time
-    class: Errors
-     type: Database
-component: PostgreSQL
-    hosts: !*
-     calc: $time
-    units: seconds
-    every: 1m
-     warn: $this != nan AND $this > (60 * 60 * 24 * 7)
-  summary: PostgreSQL table ${label:table} db ${label:database} last autoanalyze
-     info: Time elapsed since db ${label:database} table ${label:table} was analyzed by the autovacuum daemon
-       to: dba
-
-# Index alarms
-
- template: postgres_index_bloat_size_perc
-       on: postgres.index_bloat_size_perc
-    class: Errors
-     type: Database
-component: PostgreSQL
-    hosts: *
-     calc: ($index_size > (1024 * 1024 * 10)) ? ($bloat) : (0)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (60) : (70))
-     crit: $this > (($status == $CRITICAL) ? (70) : (80))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: PostgreSQL table ${label:table} db ${label:database} index bloat size
-     info: Bloat size percentage in db ${label:database} table ${label:table} index ${label:index}
-       to: dba
diff --git a/health/health.d/processes.conf b/health/health.d/processes.conf
deleted file mode 100644
index 8f2e0fda5..000000000
--- a/health/health.d/processes.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: active_processes
-       on: system.active_processes
-    class: Workload
-     type: System
-component: Processes
-    hosts: *
-     calc: $active * 100 / $pidmax
-    units: %
-    every: 5s
-     warn: $this > (($status >= $WARNING)  ? (85) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (95))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: System PIDs utilization
-     info: System process IDs (PID) space utilization
-       to: sysadmin
diff --git a/health/health.d/python.d.plugin.conf b/health/health.d/python.d.plugin.conf
deleted file mode 100644
index da27ad5b7..000000000
--- a/health/health.d/python.d.plugin.conf
+++ /dev/null
@@ -1,18 +0,0 @@
-
-# make sure python.d.plugin data collection job is running
-
- template: python.d_job_last_collected_secs
-       on: netdata.pythond_runtime
-    class: Errors
-     type: Netdata
-component: python.d.plugin
-   module: !* *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Python.d plugin last collection
-     info: Number of seconds since the last successful data collection
-       to: webmaster
diff --git a/health/health.d/qos.conf b/health/health.d/qos.conf
deleted file mode 100644
index 970ea6363..000000000
--- a/health/health.d/qos.conf
+++ /dev/null
@@ -1,18 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# check if a QoS class is dropping packets
-# the alarm is checked every 10 seconds
-# and examines the last minute of data
-
-template: 10min_qos_packet_drops
-      on: tc.qos_dropped
-      os: linux
-   hosts: *
-  lookup: sum -5m unaligned absolute
-   every: 30s
-    warn: $this > 0
-   units: packets
- summary: QOS packet drops
-    info: Dropped packets in the last 5 minutes
-      to: silent
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
deleted file mode 100644
index 51f307ca6..000000000
--- a/health/health.d/ram.conf
+++ /dev/null
@@ -1,82 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: ram_in_use
-       on: system.ram
-    class: Utilization
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-     calc: $used * 100 / ($used + $cached + $free + $buffers)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System memory utilization
-     info: System memory utilization
-       to: sysadmin
-
-    alarm: ram_available
-       on: mem.available
-    class: Utilization
-     type: System
-component: Memory
-       os: linux
-    hosts: *
-     calc: $avail * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
-    units: %
-    every: 10s
-     warn: $this < (($status >= $WARNING)  ? (15) : (10))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System available memory
-     info: Percentage of estimated amount of RAM available for userspace processes, without causing swapping
-       to: silent
-
-    alarm: oom_kill
-       on: mem.oom_kill
-       os: linux
-    hosts: *
-   lookup: sum -30m unaligned
-    units: kills
-    every: 5m
-     warn: $this > 0
-    delay: down 10m
-  summary: System OOM kills
-     info: Number of out of memory kills in the last 30 minutes
-       to: silent
-
-## FreeBSD
-    alarm: ram_in_use
-       on: system.ram
-    class: Utilization
-     type: System
-component: Memory
-       os: freebsd
-    hosts: *
-     calc: ($active + $wired + $laundry + $buffers) * 100 / ($active + $wired + $laundry + $buffers + $cache + $free + $inactive)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System memory utilization
-     info: System memory utilization
-       to: sysadmin
-
-    alarm: ram_available
-       on: mem.available
-    class: Utilization
-     type: System
-component: Memory
-       os: freebsd
-    hosts: *
-     calc: $avail * 100 / ($system.ram.free + $system.ram.active + $system.ram.inactive + $system.ram.wired + $system.ram.cache + $system.ram.laundry + $system.ram.buffers)
-    units: %
-    every: 10s
-     warn: $this < (($status >= $WARNING)  ? (15) : (10))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System available memory
-     info: Percentage of estimated amount of RAM available for userspace processes, without causing swapping
-       to: silent
diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf
deleted file mode 100644
index 7c2945e68..000000000
--- a/health/health.d/redis.conf
+++ /dev/null
@@ -1,57 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: redis_connections_rejected
-       on: redis.connections
-    class: Errors
-     type: KV Storage
-component: Redis
-   lookup: sum -1m unaligned of rejected
-    every: 10s
-    units: connections
-     warn: $this > 0
-  summary: Redis rejected connections
-     info: Connections rejected because of maxclients limit in the last minute
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
-
- template: redis_bgsave_broken
-       on: redis.bgsave_health
-    class: Errors
-     type: KV Storage
-component: Redis
-    every: 10s
-     crit: $last_bgsave != nan AND $last_bgsave != 0
-    units: ok/failed
-  summary: Redis background save
-     info: Status of the last RDB save operation (0: ok, 1: error)
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
-
- template: redis_bgsave_slow
-       on: redis.bgsave_now
-    class: Latency
-     type: KV Storage
-component: Redis
-    every: 10s
-     calc: $current_bgsave_time
-     warn: $this > 600
-     crit: $this > 1200
-    units: seconds
-  summary: Redis slow background save
-     info: Duration of the on-going RDB save operation
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
-
- template: redis_master_link_down
-       on: redis.master_link_down_since_time
-    class: Errors
-     type: KV Storage
-component: Redis
-    every: 10s
-     calc: $time
-    units: seconds
-     crit: $this != nan AND $this > 0
-  summary: Redis master link down
-     info: Time elapsed since the link between master and slave is down
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
diff --git a/health/health.d/retroshare.conf b/health/health.d/retroshare.conf
deleted file mode 100644
index c665430fa..000000000
--- a/health/health.d/retroshare.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure the DHT is fine when active
-
- template: retroshare_dht_working
-       on: retroshare.dht
-    class: Utilization
-     type: Data Sharing
-component: Retroshare
-     calc: $dht_size_all
-    units: peers
-    every: 1m
-     warn: $this < (($status >= $WARNING)  ? (120) : (100))
-     crit: $this < (($status == $CRITICAL) ? (10)  : (1))
-    delay: up 0 down 15m multiplier 1.5 max 1h
-  summary: Retroshare DHT peers
-     info: Number of DHT peers
-       to: sysadmin
diff --git a/health/health.d/riakkv.conf b/health/health.d/riakkv.conf
deleted file mode 100644
index 677e3cb4f..000000000
--- a/health/health.d/riakkv.conf
+++ /dev/null
@@ -1,98 +0,0 @@
-
-# Warn if a list keys operation is running.
- template: riakkv_list_keys_active
-       on: riak.core.fsm_active
-    class: Utilization
-     type: Database
-component: Riak KV
-     calc: $list_fsm_active
-    units: state machines
-    every: 10s
-     warn: $list_fsm_active > 0
-  summary: Riak KV active list keys
-     info: Number of currently running list keys finite state machines
-       to: dba
-
-
-## Timing healthchecks
-# KV GET
- template: riakkv_1h_kv_get_mean_latency
-       on: riak.kv.latency.get
-    class: Latency
-     type: Database
-component: Riak KV
-     calc: $node_get_fsm_time_mean
-   lookup: average -1h unaligned of time
-    every: 30s
-    units: ms
-     info: average time between reception of client GET request and \
-           subsequent response to client over the last hour
-
- template: riakkv_kv_get_slow
-       on: riak.kv.latency.get
-    class: Latency
-     type: Database
-component: Riak KV
-     calc: $mean
-   lookup: average -3m unaligned of time
-    units: ms
-    every: 10s
-     warn: ($this > ($riakkv_1h_kv_get_mean_latency * 2) )
-     crit: ($this > ($riakkv_1h_kv_get_mean_latency * 3) )
-  summary: Riak KV GET latency
-     info: Average time between reception of client GET request and \
-           subsequent response to the client over the last 3 minutes, \
-           compared to the average over the last hour
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
-
-# KV PUT
- template: riakkv_1h_kv_put_mean_latency
-       on: riak.kv.latency.put
-    class: Latency
-     type: Database
-component: Riak KV
-     calc: $node_put_fsm_time_mean
-   lookup: average -1h unaligned of time
-    every: 30s
-    units: ms
-  summary: Riak KV PUT mean latency
-     info: Average time between reception of client PUT request and \
-           subsequent response to the client over the last hour
-
- template: riakkv_kv_put_slow
-       on: riak.kv.latency.put
-    class: Latency
-     type: Database
-component: Riak KV
-     calc: $mean
-   lookup: average -3m unaligned of time
-    units: ms
-    every: 10s
-     warn: ($this > ($riakkv_1h_kv_put_mean_latency * 2) )
-     crit: ($this > ($riakkv_1h_kv_put_mean_latency * 3) )
-  summary: Riak KV PUT latency
-     info: Average time between reception of client PUT request and \
-           subsequent response to the client over the last 3 minutes, \
-           compared to the average over the last hour
-    delay: down 5m multiplier 1.5 max 1h
-       to: dba
-
-
-## VM healthchecks
-
-# Default Erlang VM process limit: 262144
-# On systems observed, this is < 2000, but may grow depending on load.
- template: riakkv_vm_high_process_count
-       on: riak.vm
-    class: Utilization
-     type: Database
-component: Riak KV
-     calc: $sys_process_count
-    units: processes
-    every: 10s
-     warn: $this > 10000
-     crit: $this > 100000
-  summary: Riak KV number of processes
-     info: Number of processes running in the Erlang VM
-       to: dba
diff --git a/health/health.d/scaleio.conf b/health/health.d/scaleio.conf
deleted file mode 100644
index b089cb85e..000000000
--- a/health/health.d/scaleio.conf
+++ /dev/null
@@ -1,33 +0,0 @@
-
-# make sure Storage Pool capacity utilization is under limit
-
- template: scaleio_storage_pool_capacity_utilization
-       on: scaleio.storage_pool_capacity_utilization
-    class: Utilization
-     type: Storage
-component: ScaleIO
-     calc: $used
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (90))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: ScaleIO storage pool capacity utilization
-     info: Storage pool capacity utilization
-       to: sysadmin
-
-
-# make sure Sdc is connected to MDM
-
- template: scaleio_sdc_mdm_connection_state
-       on: scaleio.sdc_mdm_connection_state
-    class: Utilization
-     type: Storage
-component: ScaleIO
-     calc: $connected
-    every: 10s
-     warn: $this != 1
-    delay: up 30s down 5m multiplier 1.5 max 1h
-  summary: ScaleIO SDC-MDM connection state
-     info: Data Client (SDC) to Metadata Manager (MDM) connection state (0: disconnected, 1: connected)
-       to: sysadmin
diff --git a/health/health.d/softnet.conf b/health/health.d/softnet.conf
deleted file mode 100644
index 8d7ba5661..000000000
--- a/health/health.d/softnet.conf
+++ /dev/null
@@ -1,57 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# check for common /proc/net/softnet_stat errors
-
-    alarm: 1min_netdev_backlog_exceeded
-       on: system.softnet_stat
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m unaligned absolute of dropped
-    units: packets
-    every: 10s
-     warn: $this > (($status >= $WARNING) ? (0) : (10))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System netdev dropped packets
-     info: Average number of dropped packets in the last minute \
-           due to exceeded net.core.netdev_max_backlog
-       to: silent
-
-    alarm: 1min_netdev_budget_ran_outs
-       on: system.softnet_stat
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m unaligned absolute of squeezed
-    units: events
-    every: 10s
-     warn: $this > (($status >= $WARNING) ? (0) : (10))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System netdev budget run outs
-     info: Average number of times ksoftirq ran out of sysctl net.core.netdev_budget or \
-           net.core.netdev_budget_usecs with work remaining over the last minute \
-           (this can be a cause for dropped packets)
-       to: silent
-
-    alarm: 10min_netisr_backlog_exceeded
-       on: system.softnet_stat
-    class: Errors
-     type: System
-component: Network
-       os: freebsd
-    hosts: *
-   lookup: average -1m unaligned absolute of qdrops
-    units: packets
-    every: 10s
-     warn: $this > (($status >= $WARNING) ? (0) : (10))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System netisr drops
-     info: Average number of drops in the last minute \
-           due to exceeded sysctl net.route.netisr_maxqlen \
-           (this can be a cause for dropped packets)
-       to: silent
diff --git a/health/health.d/swap.conf b/health/health.d/swap.conf
deleted file mode 100644
index e39733996..000000000
--- a/health/health.d/swap.conf
+++ /dev/null
@@ -1,37 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-    alarm: 30min_ram_swapped_out
-       on: mem.swapio
-    class: Workload
-     type: System
-component: Memory
-       os: linux freebsd
-    hosts: *
-   lookup: sum -30m unaligned absolute of out
-           # we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024
-     calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
-    units: % of RAM
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (20) : (30))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System memory swapped out
-     info: Percentage of the system RAM swapped in the last 30 minutes
-       to: silent
-
-    alarm: used_swap
-       on: mem.swap
-    class: Utilization
-     type: System
-component: Memory
-       os: linux freebsd
-    hosts: *
-     calc: (($used + $free) > 0) ? ($used * 100 / ($used + $free)) : 0
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: up 30s down 15m multiplier 1.5 max 1h
-  summary: System swap memory utilization
-     info: Swap memory utilization
-       to: sysadmin
diff --git a/health/health.d/synchronization.conf b/health/health.d/synchronization.conf
deleted file mode 100644
index 6c947d90b..000000000
--- a/health/health.d/synchronization.conf
+++ /dev/null
@@ -1,13 +0,0 @@
-   alarm: sync_freq
-      on: mem.sync
-  lookup: sum -1m of sync
-   units: calls
-  plugin: ebpf.plugin
-   every: 1m
-    warn: $this > 6
-   delay: up 1m down 10m multiplier 1.5 max 1h
- summary: Sync system call frequency
-    info: Number of sync() system calls. \
-          Every call causes all pending modifications to filesystem metadata and \
-          cached file data to be written to the underlying filesystems.
-      to: silent
diff --git a/health/health.d/systemdunits.conf b/health/health.d/systemdunits.conf
deleted file mode 100644
index ad53a0e1c..000000000
--- a/health/health.d/systemdunits.conf
+++ /dev/null
@@ -1,161 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-## Service units
- template: systemd_service_unit_failed_state
-       on: systemd.service_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd service unit in the failed state
-       to: sysadmin
-
-## Socket units
- template: systemd_socket_unit_failed_state
-       on: systemd.socket_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd socket unit in the failed state
-       to: sysadmin
-
-## Target units
- template: systemd_target_unit_failed_state
-       on: systemd.target_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd target unit in the failed state
-       to: sysadmin
-
-## Path units
- template: systemd_path_unit_failed_state
-       on: systemd.path_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd path unit in the failed state
-       to: sysadmin
-
-## Device units
- template: systemd_device_unit_failed_state
-       on: systemd.device_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd device unit in the failed state
-       to: sysadmin
-
-## Mount units
- template: systemd_mount_unit_failed_state
-       on: systemd.mount_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd mount units in the failed state
-       to: sysadmin
-
-## Automount units
- template: systemd_automount_unit_failed_state
-       on: systemd.automount_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd automount unit in the failed state
-       to: sysadmin
-
-## Swap units
- template: systemd_swap_unit_failed_state
-       on: systemd.swap_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd swap units in the failed state
-       to: sysadmin
-
-## Scope units
- template: systemd_scope_unit_failed_state
-       on: systemd.scope_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd scope units in the failed state
-       to: sysadmin
-
-## Slice units
- template: systemd_slice_unit_failed_state
-       on: systemd.slice_unit_state
-    class: Errors
-     type: Linux
-component: Systemd units
-   module: !* *
-     calc: $failed
-    units: state
-    every: 10s
-     warn: $this != nan AND $this == 1
-    delay: down 5m multiplier 1.5 max 1h
-  summary: systemd unit ${label:unit_name} state
-     info: systemd slice units in the failed state
-       to: sysadmin
diff --git a/health/health.d/tcp_conn.conf b/health/health.d/tcp_conn.conf
deleted file mode 100644
index 2b2f97406..000000000
--- a/health/health.d/tcp_conn.conf
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#
-# ${tcp_max_connections} may be nan or -1 if the system
-# supports dynamic threshold for TCP connections.
-# In this case, the alarm will always be zero.
-#
-
-    alarm: tcp_connections
-       on: ip.tcpsock
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-     calc: (${tcp_max_connections} > 0) ? ( ${connections} * 100 / ${tcp_max_connections} ) : 0
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING ) ? ( 60 ) : ( 80 ))
-     crit: $this > (($status == $CRITICAL) ? ( 80 ) : ( 90 ))
-    delay: up 0 down 5m multiplier 1.5 max 1h
-  summary: System TCP connections utilization
-     info: IPv4 TCP connections utilization
-       to: sysadmin
diff --git a/health/health.d/tcp_listen.conf b/health/health.d/tcp_listen.conf
deleted file mode 100644
index 9d1104a51..000000000
--- a/health/health.d/tcp_listen.conf
+++ /dev/null
@@ -1,100 +0,0 @@
-#
-# There are two queues involved when incoming TCP connections are handled
-# (both at the kernel):
-#
-# SYN queue
-# The SYN queue tracks TCP handshakes until connections are fully established.
-# It overflows when too many incoming TCP connection requests hang in the
-# half-open state and the server is not configured to fall back to SYN cookies.
-# Overflows are usually caused by SYN flood DoS attacks (i.e. someone sends
-# lots of SYN packets and never completes the handshakes).
-#
-# Accept queue
-# The accept queue holds fully established TCP connections waiting to be handled
-# by the listening application. It overflows when the server application fails
-# to accept new connections at the rate they are coming in.
-#
-#
-# -----------------------------------------------------------------------------
-# tcp accept queue (at the kernel)
-
-    alarm: 1m_tcp_accept_queue_overflows
-       on: ip.tcp_accept_queue
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -60s unaligned absolute of ListenOverflows
-    units: overflows
-    every: 10s
-     warn: $this > 1
-     crit: $this > (($status == $CRITICAL) ? (1) : (5))
-    delay: up 0 down 5m multiplier 1.5 max 1h
-  summary: System TCP accept queue overflows
-     info: Average number of overflows in the TCP accept queue over the last minute
-       to: silent
-
-# THIS IS TOO GENERIC
-# CHECK: https://github.com/netdata/netdata/issues/3234#issuecomment-423935842
-    alarm: 1m_tcp_accept_queue_drops
-       on: ip.tcp_accept_queue
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -60s unaligned absolute of ListenDrops
-    units: drops
-    every: 10s
-     warn: $this > 1
-     crit: $this > (($status == $CRITICAL) ? (1) : (5))
-    delay: up 0 down 5m multiplier 1.5 max 1h
-  summary: System TCP accept queue dropped packets
-     info: Average number of dropped packets in the TCP accept queue over the last minute
-       to: silent
-
-
-# -----------------------------------------------------------------------------
-# tcp SYN queue (at the kernel)
-
-# When the SYN queue is full, either TcpExtTCPReqQFullDoCookies or
-# TcpExtTCPReqQFullDrop is incremented, depending on whether SYN cookies are
-# enabled or not. In both cases this probably indicates a SYN flood attack,
-# so i guess a notification should be sent.
-
-    alarm: 1m_tcp_syn_queue_drops
-       on: ip.tcp_syn_queue
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -60s unaligned absolute of TCPReqQFullDrop
-    units: drops
-    every: 10s
-     warn: $this > 1
-     crit: $this > (($status == $CRITICAL) ? (0) : (5))
-    delay: up 10 down 5m multiplier 1.5 max 1h
-  summary: System  TCP SYN queue drops
-     info: Average number of SYN requests was dropped due to the full TCP SYN queue over the last minute \
-           (SYN cookies were not enabled)
-       to: silent
-
-    alarm: 1m_tcp_syn_queue_cookies
-       on: ip.tcp_syn_queue
-    class: Workload
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -60s unaligned absolute of TCPReqQFullDoCookies
-    units: cookies
-    every: 10s
-     warn: $this > 1
-     crit: $this > (($status == $CRITICAL) ? (0) : (5))
-    delay: up 10 down 5m multiplier 1.5 max 1h
-  summary: System TCP SYN queue cookies
-     info: Average number of sent SYN cookies due to the full TCP SYN queue over the last minute
-       to: silent
-
diff --git a/health/health.d/tcp_mem.conf b/health/health.d/tcp_mem.conf
deleted file mode 100644
index 4e422ec1c..000000000
--- a/health/health.d/tcp_mem.conf
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# check
-# http://blog.tsunanet.net/2011/03/out-of-socket-memory.html
-#
-# We give a warning when TCP is under memory pressure
-# and a critical when TCP is 90% of its upper memory limit
-#
-
-    alarm: tcp_memory
-       on: ipv4.sockstat_tcp_mem
-    class: Utilization
-     type: System
-component: Network
-       os: linux
-    hosts: *
-     calc: ${mem} * 100 / ${tcp_mem_high}
-    units: %
-    every: 10s
-     warn: ${mem} > (($status >= $WARNING  ) ? ( ${tcp_mem_pressure} * 0.8 ) : ( ${tcp_mem_pressure}   ))
-     crit: ${mem} > (($status == $CRITICAL ) ? ( ${tcp_mem_pressure}       ) : ( ${tcp_mem_high} * 0.9 ))
-    delay: up 0 down 5m multiplier 1.5 max 1h
-  summary: System TCP memory utilization
-     info: TCP memory utilization
-       to: silent
diff --git a/health/health.d/tcp_orphans.conf b/health/health.d/tcp_orphans.conf
deleted file mode 100644
index 8f665d50e..000000000
--- a/health/health.d/tcp_orphans.conf
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#
-# check
-# http://blog.tsunanet.net/2011/03/out-of-socket-memory.html
-#
-# The kernel may penalize orphans by 2x or even 4x
-# so we alarm warning at 25% and critical at 50%
-#
-
-    alarm: tcp_orphans
-       on: ipv4.sockstat_tcp_sockets
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-     calc: ${orphan} * 100 / ${tcp_max_orphans}
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING ) ? ( 20 ) : ( 25 ))
-     crit: $this > (($status == $CRITICAL) ? ( 25 ) : ( 50 ))
-    delay: up 0 down 5m multiplier 1.5 max 1h
-  summary: System TCP orphan sockets utilization
-     info: Orphan IPv4 TCP sockets utilization
-       to: silent
diff --git a/health/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf
deleted file mode 100644
index 7c39db2db..000000000
--- a/health/health.d/tcp_resets.conf
+++ /dev/null
@@ -1,71 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# -----------------------------------------------------------------------------
-# tcp resets this host sends
-
-    alarm: 1m_ip_tcp_resets_sent
-       on: ip.tcphandshake
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m at -10s unaligned absolute of OutRsts
-    units: tcp resets/s
-    every: 10s
-     info: average number of sent TCP RESETS over the last minute
-
-    alarm: 10s_ip_tcp_resets_sent
-       on: ip.tcphandshake
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -10s unaligned absolute of OutRsts
-    units: tcp resets/s
-    every: 10s
-     warn: $netdata.uptime.uptime > (1 * 60) AND $this > ((($1m_ip_tcp_resets_sent < 5)?(5):($1m_ip_tcp_resets_sent)) * (($status >= $WARNING)  ? (1) : (10)))
-    delay: up 20s down 60m multiplier 1.2 max 2h
-  options: no-clear-notification
-  summary: System TCP outbound resets
-     info: Average number of sent TCP RESETS over the last 10 seconds. \
-           This can indicate a port scan, \
-           or that a service running on this host has crashed. \
-           Netdata will not send a clear notification for this alarm.
-       to: silent
-
-# -----------------------------------------------------------------------------
-# tcp resets this host receives
-
-    alarm: 1m_ip_tcp_resets_received
-       on: ip.tcphandshake
-    class: Errors
-     type: System
-component: Network
-       os: linux freebsd
-    hosts: *
-   lookup: average -1m at -10s unaligned absolute of AttemptFails
-    units: tcp resets/s
-    every: 10s
-     info: average number of received TCP RESETS over the last minute
-
-    alarm: 10s_ip_tcp_resets_received
-       on: ip.tcphandshake
-    class: Errors
-     type: System
-component: Network
-       os: linux freebsd
-    hosts: *
-   lookup: average -10s unaligned absolute of AttemptFails
-    units: tcp resets/s
-    every: 10s
-     warn: $netdata.uptime.uptime > (1 * 60) AND $this > ((($1m_ip_tcp_resets_received < 5)?(5):($1m_ip_tcp_resets_received)) * (($status >= $WARNING)  ? (1) : (10)))
-    delay: up 20s down 60m multiplier 1.2 max 2h
-  options: no-clear-notification
-  summary: System TCP inbound resets
-     info: average number of received TCP RESETS over the last 10 seconds. \
-           This can be an indication that a service this host needs has crashed. \
-           Netdata will not send a clear notification for this alarm.
-       to: silent
diff --git a/health/health.d/timex.conf b/health/health.d/timex.conf
deleted file mode 100644
index 65c9628b5..000000000
--- a/health/health.d/timex.conf
+++ /dev/null
@@ -1,18 +0,0 @@
-
-# It can take several minutes before ntpd selects a server to synchronize with;
-# try checking after 17 minutes (1024 seconds).
-
-    alarm: system_clock_sync_state
-       on: system.clock_sync_state
-       os: linux
-    class: Errors
-     type: System
-component: Clock
-     calc: $state
-    units: synchronization state
-    every: 10s
-     warn: $system.uptime.uptime > 17 * 60 AND $this == 0
-    delay: down 5m
-  summary: System clock sync state
-     info: When set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server
-       to: silent
diff --git a/health/health.d/udp_errors.conf b/health/health.d/udp_errors.conf
deleted file mode 100644
index dc0948403..000000000
--- a/health/health.d/udp_errors.conf
+++ /dev/null
@@ -1,40 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# -----------------------------------------------------------------------------
-# UDP receive buffer errors
-
-    alarm: 1m_ipv4_udp_receive_buffer_errors
-       on: ipv4.udperrors
-    class: Errors
-     type: System
-component: Network
-       os: linux freebsd
-    hosts: *
-   lookup: average -1m unaligned absolute of RcvbufErrors
-    units: errors
-    every: 10s
-     warn: $this > (($status >= $WARNING) ? (0) : (10))
-  summary: System UDP receive buffer errors
-     info: Average number of UDP receive buffer errors over the last minute
-    delay: up 1m down 60m multiplier 1.2 max 2h
-       to: silent
-
-# -----------------------------------------------------------------------------
-# UDP send buffer errors
-
-    alarm: 1m_ipv4_udp_send_buffer_errors
-       on: ipv4.udperrors
-    class: Errors
-     type: System
-component: Network
-       os: linux
-    hosts: *
-   lookup: average -1m unaligned absolute of SndbufErrors
-    units: errors
-    every: 10s
-     warn: $this > (($status >= $WARNING) ? (0) : (10))
-  summary: System UDP send buffer errors
-     info: Average number of UDP send buffer errors over the last minute
-    delay: up 1m down 60m multiplier 1.2 max 2h
-       to: silent
diff --git a/health/health.d/unbound.conf b/health/health.d/unbound.conf
deleted file mode 100644
index 3c898f1d5..000000000
--- a/health/health.d/unbound.conf
+++ /dev/null
@@ -1,30 +0,0 @@
-
-# make sure there is no overwritten/dropped queries in the request-list
-
- template: unbound_request_list_overwritten
-       on: unbound.request_list_jostle_list
-    class: Errors
-     type: DNS
-component: Unbound
-   lookup: average -60s unaligned absolute match-names of overwritten
-    units: queries
-    every: 10s
-     warn: $this > 5
-    delay: up 10 down 5m multiplier 1.5 max 1h
-  summary: Unbound overwritten queries
-     info: Number of overwritten queries in the request-list
-       to: sysadmin
-
- template: unbound_request_list_dropped
-       on: unbound.request_list_jostle_list
-    class: Errors
-     type: DNS
-component: Unbound
-   lookup: average -60s unaligned absolute match-names of dropped
-    units: queries
-    every: 10s
-     warn: $this > 0
-    delay: up 10 down 5m multiplier 1.5 max 1h
-  summary: Unbound dropped queries
-     info: Number of dropped queries in the request-list
-       to: sysadmin
diff --git a/health/health.d/upsd.conf b/health/health.d/upsd.conf
deleted file mode 100644
index 703a64881..000000000
--- a/health/health.d/upsd.conf
+++ /dev/null
@@ -1,50 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: upsd_10min_ups_load
-       on: upsd.ups_load
-    class: Utilization
-     type: Power Supply
-component: UPS
-       os: *
-    hosts: *
-   lookup: average -10m unaligned of load
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (70) : (80))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 10m multiplier 1.5 max 1h
-  summary: UPS ${label:ups_name} load
-     info: UPS ${label:ups_name} average load over the last 10 minutes
-       to: sitemgr
-
- template: upsd_ups_battery_charge
-       on: upsd.ups_battery_charge	
-    class: Errors
-     type: Power Supply
-component: UPS
-       os: *
-    hosts: *
-   lookup: average -60s unaligned of charge
-    units: %
-    every: 60s
-     warn: $this < 75
-     crit: $this < 40
-    delay: down 10m multiplier 1.5 max 1h
-  summary: UPS ${label:ups_name} battery charge
-     info: UPS ${label:ups_name} average battery charge over the last minute
-       to: sitemgr
-
- template: upsd_ups_last_collected_secs
-       on: upsd.ups_load
-    class: Latency
-     type: Power Supply
-component: UPS device
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-  summary: UPS ${label:ups_name} last collected
-     info: UPS ${label:ups_name} number of seconds since the last successful data collection
-       to: sitemgr
diff --git a/health/health.d/vcsa.conf b/health/health.d/vcsa.conf
deleted file mode 100644
index 3e20bfd1e..000000000
--- a/health/health.d/vcsa.conf
+++ /dev/null
@@ -1,230 +0,0 @@
-
-# Overall system health:
-#  - 0: all components are healthy.
-#  - 1: one or more components might become overloaded soon.
-#  - 2: one or more components in the appliance might be degraded.
-#  - 3: one or more components might be in an unusable status and the appliance might become unresponsive soon.
-#  - 4: no health data is available.
-
- template: vcsa_system_health_warn
-       on: vcsa.system_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA system status
-     info: VCSA overall system status is orange. One or more components are degraded.
-       to: sysadmin
-
- template: vcsa_system_health_crit
-       on: vcsa.system_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     crit: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA system status
-     info: VCSA overall system status is red. One or more components are unavailable or will stop functioning soon.
-       to: sysadmin
-
-# Components health:
-#  - 0: healthy.
-#  - 1: healthy, but may have some problems.
-#  - 2: degraded, and may have serious problems.
-#  - 3: unavailable, or will stop functioning soon.
-#  - 4: no health data is available.
-
- template: vcsa_applmgmt_health_warn
-       on: vcsa.applmgmt_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA ApplMgmt service status
-     info: VCSA ApplMgmt component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_applmgmt_health_crit
-       on: vcsa.applmgmt_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA ApplMgmt service status
-     info: VCSA ApplMgmt component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
- 
- template: vcsa_load_health_warn
-       on: vcsa.load_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Load status
-     info: VCSA Load component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_load_health_crit
-       on: vcsa.load_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Load status
-     info: VCSA Load component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
-
- template: vcsa_mem_health_warn
-       on: vcsa.mem_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Memory status
-     info: VCSA Memory component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_mem_health_crit
-       on: vcsa.mem_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Memory status
-     info: VCSA Memory component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
-
- template: vcsa_swap_health_warn
-       on: vcsa.swap_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Swap status
-     info: VCSA Swap component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_swap_health_crit
-       on: vcsa.swap_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Swap status
-     info: VCSA Swap component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
-
- template: vcsa_database_storage_health_warn
-       on: vcsa.database_storage_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Database status
-     info: VCSA Database Storage component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_database_storage_health_crit
-       on: vcsa.database_storage_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Database status
-     info: VCSA Database Storage component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
-
- template: vcsa_storage_health_warn
-       on: vcsa.storage_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Storage status
-     info: VCSA Storage component status is orange. It is degraded, and may have serious problems.
-       to: silent
-
- template: vcsa_storage_health_crit
-       on: vcsa.storage_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $red
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA Storage status
-     info: VCSA Storage component status is red. It is unavailable, or will stop functioning soon.
-       to: sysadmin
-
-# Software updates health:
-#  - 0: no updates available.
-#  - 2: non-security updates are available.
-#  - 3: security updates are available.
-#  - 4: an error retrieving information on software updates.
-
- template: vcsa_software_packages_health_warn
-       on: vcsa.software_packages_health_status
-    class: Errors
-     type: Virtual Machine
-component: VMware vCenter
-     calc: $orange
-    units: status
-    every: 10s
-     warn: $this == 1
-    delay: down 1m multiplier 1.5 max 1h
-  summary: VCSA software status
-     info: VCSA software packages security updates are available.
-       to: silent
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf
deleted file mode 100644
index 6ea9f99dc..000000000
--- a/health/health.d/vernemq.conf
+++ /dev/null
@@ -1,391 +0,0 @@
-
-# Socket errors
-
- template: vernemq_socket_errors
-       on: vernemq.socket_errors
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: sum -1m unaligned absolute of socket_error
-    units: errors
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ socket errors
-     info: Number of socket errors in the last minute
-       to: sysadmin
-
-# Queues dropped/expired/unhandled PUBLISH messages
-
- template: vernemq_queue_message_drop
-       on: vernemq.queue_undelivered_messages
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute of queue_message_drop
-    units: dropped messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ dropped messages
-     info: Number of dropped messages due to full queues in the last minute
-       to: sysadmin
-
- template: vernemq_queue_message_expired
-       on: vernemq.queue_undelivered_messages
-    class: Latency
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute of queue_message_expired
-    units: expired messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ expired messages
-     info: number of messages which expired before delivery in the last minute
-       to: sysadmin
-
- template: vernemq_queue_message_unhandled
-       on: vernemq.queue_undelivered_messages
-    class: Latency
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute of queue_message_unhandled
-    units: unhandled messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unhandled messages
-     info: Number of unhandled messages (connections with clean session=true) in the last minute
-       to: sysadmin
-
-# Erlang VM
-
- template: vernemq_average_scheduler_utilization
-       on: vernemq.average_scheduler_utilization
-    class: Utilization
-     type: Messaging
-component: VerneMQ
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: VerneMQ scheduler utilization
-     info: Average scheduler utilization over the last 10 minutes
-       to: sysadmin
-
-# Cluster communication and netsplits
-
- template: vernemq_cluster_dropped
-       on: vernemq.cluster_dropped
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: sum -1m unaligned
-    units: KiB
-    every: 1m
-     warn: $this > 0
-    delay: up 5m down 5m multiplier 1.5 max 1h
-  summary: VerneMQ dropped traffic
-     info: Amount of traffic dropped during communication with the cluster nodes in the last minute
-       to: sysadmin
-
- template: vernemq_netsplits
-       on: vernemq.netsplits
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: sum -1m unaligned absolute of netsplit_detected
-    units: netsplits
-    every: 10s
-     warn: $this > 0
-    delay: down 5m multiplier 1.5 max 2h
-  summary: VerneMQ netsplits
-     info: Number of detected netsplits (split brain situation) in the last minute
-       to: sysadmin
-
-# Unsuccessful CONNACK
-
- template: vernemq_mqtt_connack_sent_reason_unsuccessful
-       on: vernemq.mqtt_connack_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful CONNACK
-     info: Number of sent unsuccessful v3/v5 CONNACK packets in the last minute
-       to: sysadmin
-
-# Not normal DISCONNECT
-
- template: vernemq_mqtt_disconnect_received_reason_not_normal
-       on: vernemq.mqtt_disconnect_received_reason
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ received not normal DISCONNECT
-     info: Number of received not normal v5 DISCONNECT packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_disconnect_sent_reason_not_normal
-       on: vernemq.mqtt_disconnect_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ sent not normal DISCONNECT
-     info: Number of sent not normal v5 DISCONNECT packets in the last minute
-       to: sysadmin
-
-# SUBSCRIBE errors and unauthorized attempts
-
- template: vernemq_mqtt_subscribe_error
-       on: vernemq.mqtt_subscribe_error
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: failed ops
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ failed SUBSCRIBE
-     info: Number of failed v3/v5 SUBSCRIBE operations in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_subscribe_auth_error
-       on: vernemq.mqtt_subscribe_auth_error
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: attempts
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unauthorized SUBSCRIBE
-     info: number of unauthorized v3/v5 SUBSCRIBE attempts in the last minute
-       to: sysadmin
-
-# UNSUBSCRIBE errors
-
- template: vernemq_mqtt_unsubscribe_error
-       on: vernemq.mqtt_unsubscribe_error
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: failed ops
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ failed UNSUBSCRIBE
-     info: Number of failed v3/v5 UNSUBSCRIBE operations in the last minute
-       to: sysadmin
-
-# PUBLISH errors and unauthorized attempts
-
- template: vernemq_mqtt_publish_errors
-       on: vernemq.mqtt_publish_errors
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: failed ops
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ failed PUBLISH
-     info: Number of failed v3/v5 PUBLISH operations in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_publish_auth_errors
-       on: vernemq.mqtt_publish_auth_errors
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: attempts
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unauthorized PUBLISH
-     info: Number of unauthorized v3/v5 PUBLISH attempts in the last minute
-       to: sysadmin
-
-# Unsuccessful and unexpected PUBACK
-
- template: vernemq_mqtt_puback_received_reason_unsuccessful
-       on: vernemq.mqtt_puback_received_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful received PUBACK
-     info: Number of received unsuccessful v5 PUBACK packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_puback_sent_reason_unsuccessful
-       on: vernemq.mqtt_puback_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful sent PUBACK
-     info: Number of sent unsuccessful v5 PUBACK packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_puback_unexpected
-       on: vernemq.mqtt_puback_invalid_error
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unnexpected recieved PUBACK
-     info: Number of received unexpected v3/v5 PUBACK packets in the last minute
-       to: sysadmin
-
-# Unsuccessful and unexpected PUBREC
-
- template: vernemq_mqtt_pubrec_received_reason_unsuccessful
-       on: vernemq.mqtt_pubrec_received_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful received PUBREC
-     info: Number of received unsuccessful v5 PUBREC packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
-       on: vernemq.mqtt_pubrec_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful sent PUBREC
-     info: Number of sent unsuccessful v5 PUBREC packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_pubrec_invalid_error
-       on: vernemq.mqtt_pubrec_invalid_error
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ invalid received PUBREC
-     info: Number of received invalid v3 PUBREC packets in the last minute
-       to: sysadmin
-
-# Unsuccessful PUBREL
-
- template: vernemq_mqtt_pubrel_received_reason_unsuccessful
-       on: vernemq.mqtt_pubrel_received_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful received PUBREL
-     info: Number of received unsuccessful v5 PUBREL packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
-       on: vernemq.mqtt_pubrel_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful sent PUBREL
-     info: number of sent unsuccessful v5 PUBREL packets in the last minute
-       to: sysadmin
-
-# Unsuccessful and unexpected PUBCOMP
-
- template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
-       on: vernemq.mqtt_pubcomp_received_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful received PUBCOMP
-     info: Number of received unsuccessful v5 PUBCOMP packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
-       on: vernemq.mqtt_pubcomp_sent_reason
-    class: Errors
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute match-names of !success,*
-    units: packets
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unsuccessful sent PUBCOMP
-     info: number of sent unsuccessful v5 PUBCOMP packets in the last minute
-       to: sysadmin
-
- template: vernemq_mqtt_pubcomp_unexpected
-       on: vernemq.mqtt_pubcomp_invalid_error
-    class: Workload
-     type: Messaging
-component: VerneMQ
-   lookup: average -1m unaligned absolute
-    units: messages
-    every: 1m
-     warn: $this > (($status >= $WARNING) ? (0) : (5))
-    delay: up 2m down 5m multiplier 1.5 max 2h
-  summary: VerneMQ unexpected received PUBCOMP
-     info: number of received unexpected v3/v5 PUBCOMP packets in the last minute
-       to: sysadmin
diff --git a/health/health.d/vsphere.conf b/health/health.d/vsphere.conf
deleted file mode 100644
index b8ad9aee4..000000000
--- a/health/health.d/vsphere.conf
+++ /dev/null
@@ -1,70 +0,0 @@
-
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-# -----------------------------------------------Virtual Machine--------------------------------------------------------
-
- template: vsphere_vm_cpu_utilization
-       on: vsphere.vm_cpu_utilization
-    class: Utilization
-     type: Virtual Machine
-component: CPU
-    hosts: *
-   lookup: average -10m unaligned match-names of used
-    units: %
-    every: 20s
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: vSphere CPU utilization for VM ${label:vm}
-     info: CPU utilization VM ${label:vm} host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
-       to: silent
-
- template: vsphere_vm_mem_utilization
-       on: vsphere.vm_mem_utilization
-    class: Utilization
-     type: Virtual Machine
-component: Memory
-    hosts: *
-     calc: $used
-    units: %
-    every: 20s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: vSphere memory utilization for VM ${label:vm}
-     info: Memory utilization VM ${label:vm} host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
-       to: silent
-
-# -----------------------------------------------ESXI host--------------------------------------------------------------
-
- template: vsphere_host_cpu_utilization
-       on: vsphere.host_cpu_utilization
-    class: Utilization
-     type: Virtual Machine
-component: CPU
-    hosts: *
-   lookup: average -10m unaligned match-names of used
-    units: %
-    every: 20s
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: vSphere ESXi CPU utilization for host ${label:host}
-     info: CPU utilization ESXi host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
-       to: sysadmin
-
- template: vsphere_host_mem_utilization
-       on: vsphere.host_mem_utilization
-    class: Utilization
-     type: Virtual Machine
-component: Memory
-    hosts: *
-     calc: $used
-    units: %
-    every: 20s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: vSphere ESXi Ram utilization for host ${label:host}
-     info: Memory utilization ESXi host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
-       to: sysadmin
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
deleted file mode 100644
index 78f1cc7f5..000000000
--- a/health/health.d/web_log.conf
+++ /dev/null
@@ -1,205 +0,0 @@
-
-# unmatched lines
-
-# the following alarms trigger only when there are enough data.
-# we assume there are enough data when:
-#
-#  $1m_total_requests > 120
-#
-# i.e. when there are at least 120 requests during the last minute
-
- template: web_log_1m_total_requests
-       on: web_log.requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned
-     calc: ($this == 0)?(1):($this)
-    units: requests
-    every: 10s
-     info: number of HTTP requests in the last minute
-
- template: web_log_1m_unmatched
-       on: web_log.excluded_requests
-    class: Errors
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned of unmatched
-     calc: $this * 100 / $web_log_1m_total_requests
-    units: %
-    every: 10s
-     warn: ($web_log_1m_total_requests > 120) ? ($this > 1) : ( 0 )
-    delay: up 1m down 5m multiplier 1.5 max 1h
-  summary: Web log unparsed
-     info: Percentage of unparsed log lines over the last minute
-       to: webmaster
-
-# -----------------------------------------------------------------------------
-# high level response code alarms
-
-# the following alarms trigger only when there are enough data.
-# we assume there are enough data when:
-#
-#  $1m_requests > 120
-#
-# i.e. when there are at least 120 requests during the last minute
-
- template: web_log_1m_requests
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned
-     calc: ($this == 0)?(1):($this)
-    units: requests
-    every: 10s
-     info: number of HTTP requests in the last minute
-
- template: web_log_1m_successful
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned of success
-     calc: $this * 100 / $web_log_1m_requests
-    units: %
-    every: 10s
-     warn: ($web_log_1m_requests > 120) ? ($this < (($status >= $WARNING ) ? ( 95 ) : ( 85 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this < (($status == $CRITICAL) ? ( 85 ) : ( 75 )) ) : ( 0 )
-    delay: up 2m down 15m multiplier 1.5 max 1h
-  summary: Web log successful
-     info: Ratio of successful HTTP requests over the last minute (1xx, 2xx, 304, 401)
-       to: webmaster
-
- template: web_log_1m_redirects
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned of redirect
-     calc: $this * 100 / $web_log_1m_requests
-    units: %
-    every: 10s
-     warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING ) ? (  1 ) : ( 20 )) ) : ( 0 )
-    delay: up 2m down 15m multiplier 1.5 max 1h
-  summary: Web log redirects
-     info: Ratio of redirection HTTP requests over the last minute (3xx except 304)
-       to: webmaster
-
- template: web_log_1m_bad_requests
-       on: web_log.type_requests
-    class: Errors
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned of bad
-     calc: $this * 100 / $web_log_1m_requests
-    units: %
-    every: 10s
-     warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 10 ) : ( 30 )) ) : ( 0 )
-    delay: up 2m down 15m multiplier 1.5 max 1h
-  summary: Web log bad requests
-     info: Ratio of client error HTTP requests over the last minute (4xx except 401)
-       to: webmaster
-
- template: web_log_1m_internal_errors
-       on: web_log.type_requests
-    class: Errors
-     type: Web Server
-component: Web log
-   lookup: sum -1m unaligned of error
-     calc: $this * 100 / $web_log_1m_requests
-    units: %
-    every: 10s
-     warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 1 ) : ( 2 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 )
-    delay: up 2m down 15m multiplier 1.5 max 1h
-  summary: Web log server errors
-     info: Ratio of server error HTTP requests over the last minute (5xx)
-       to: webmaster
-
-# -----------------------------------------------------------------------------
-# web slow
-
-# the following alarms trigger only when there are enough data.
-# we assume there are enough data when:
-#
-#  $1m_requests > 120
-#
-# i.e. when there are at least 120 requests during the last minute
-
- template: web_log_10m_response_time
-       on: web_log.request_processing_time
-    class: Latency
-     type: System
-component: Web log
-   lookup: average -10m unaligned of avg
-    units: ms
-    every: 30s
-     info: average HTTP response time over the last 10 minutes
-
- template: web_log_web_slow
-       on: web_log.request_processing_time
-    class: Latency
-     type: Web Server
-component: Web log
-   lookup: average -1m unaligned of avg
-    units: ms
-    every: 10s
-    green: 500
-      red: 1000
-     warn: ($web_log_1m_requests > 120) ? ($this > $green && $this > ($web_log_10m_response_time * 2) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > $red   && $this > ($web_log_10m_response_time * 4) ) : ( 0 )
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Web log processing time
-     info: Average HTTP response time over the last 1 minute
-  options: no-clear-notification
-       to: webmaster
-
-# -----------------------------------------------------------------------------
-# web too many or too few requests
-
-# the following alarms trigger only when there are enough data.
-# we assume there are enough data when:
-#
-#  $5m_successful_old > 120
-#
-# i.e. when there were at least 120 requests during the 5 minutes starting
-#      at -10m and ending at -5m
-
- template: web_log_5m_successful_old
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: average -5m at -5m unaligned of success
-    units: requests/s
-    every: 30s
-     info: average number of successful HTTP requests for the 5 minutes starting 10 minutes ago
-
- template: web_log_5m_successful
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-   lookup: average -5m unaligned of success
-    units: requests/s
-    every: 30s
-     info: average number of successful HTTP requests over the last 5 minutes
-
- template: web_log_5m_requests_ratio
-       on: web_log.type_requests
-    class: Workload
-     type: Web Server
-component: Web log
-     calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100)
-    units: %
-    every: 30s
-     warn: ($web_log_5m_successful_old > 120) ? ($this > 200 OR $this < 50) : (0)
-     crit: ($web_log_5m_successful_old > 120) ? ($this > 400 OR $this < 25) : (0)
-    delay: down 15m multiplier 1.5 max 1h
-  options: no-clear-notification
-  summary: Web log 5 minutes requests ratio
-     info: Ratio of successful HTTP requests over over the last 5 minutes, \
-           compared with the previous 5 minutes \
-           (clear notification for this alarm will not be sent)
-       to: webmaster
diff --git a/health/health.d/whoisquery.conf b/health/health.d/whoisquery.conf
deleted file mode 100644
index 0a328b592..000000000
--- a/health/health.d/whoisquery.conf
+++ /dev/null
@@ -1,14 +0,0 @@
-
- template: whoisquery_days_until_expiration
-       on: whoisquery.time_until_expiration
-    class: Utilization
-     type: Other
-component: WHOIS
-     calc: $expiry
-    units: seconds
-    every: 60s
-     warn: $this < $days_until_expiration_warning*24*60*60
-     crit: $this < $days_until_expiration_critical*24*60*60
-  summary: Whois expiration time for domain ${label:domain}
-     info: Time until the domain name registration for ${label:domain} expires
-       to: webmaster
diff --git a/health/health.d/windows.conf b/health/health.d/windows.conf
deleted file mode 100644
index 706fcbf22..000000000
--- a/health/health.d/windows.conf
+++ /dev/null
@@ -1,126 +0,0 @@
-
-## CPU
-
- template: windows_10min_cpu_usage
-       on: windows.cpu_utilization_total
-    class: Utilization
-     type: Windows
-component: CPU
-       os: *
-    hosts: *
-   lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: CPU utilization
-     info: Average CPU utilization over the last 10 minutes
-       to: silent
-
-
-## Memory
-
- template: windows_ram_in_use
-       on: windows.memory_utilization
-    class: Utilization
-     type: Windows
-component: Memory
-       os: *
-    hosts: *
-     calc: ($used) * 100 / ($used + $available)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Ram utilization
-     info: Memory utilization
-       to: sysadmin
-
-
-## Network
-
- template: windows_inbound_packets_discarded
-       on: windows.net_nic_discarded
-    class: Errors
-     type: Windows
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute match-names of inbound
-    units: packets
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: Inbound network packets discarded
-     info: Number of inbound discarded packets for the network interface in the last 10 minutes
-       to: silent
-
- template: windows_outbound_packets_discarded
-       on: windows.net_nic_discarded
-    class: Errors
-     type: Windows
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute match-names of outbound
-    units: packets
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: Outbound network packets discarded
-     info: Number of outbound discarded packets for the network interface in the last 10 minutes
-       to: silent
-
- template: windows_inbound_packets_errors
-       on: windows.net_nic_errors
-    class: Errors
-     type: Windows
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute match-names of inbound
-    units: packets
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: Inbound network errors
-     info: Number of inbound errors for the network interface in the last 10 minutes
-       to: silent
-
- template: windows_outbound_packets_errors
-       on: windows.net_nic_errors
-    class: Errors
-     type: Windows
-component: Network
-       os: *
-    hosts: *
-   lookup: sum -10m unaligned absolute match-names of outbound
-    units: packets
-    every: 1m
-     warn: $this >= 5
-    delay: down 1h multiplier 1.5 max 2h
-  summary: Outbound network errors
-     info: Number of outbound errors for the network interface in the last 10 minutes
-       to: silent
-
-
-## Disk
-
- template: windows_disk_in_use
-       on: windows.logical_disk_space_usage
-    class: Utilization
-     type: Windows
-component: Disk
-       os: *
-    hosts: *
-     calc: ($used) * 100 / ($used + $free)
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Disk space usage
-     info: Disk space utilization
-       to: sysadmin
diff --git a/health/health.d/x509check.conf b/health/health.d/x509check.conf
deleted file mode 100644
index d05f3ef0f..000000000
--- a/health/health.d/x509check.conf
+++ /dev/null
@@ -1,26 +0,0 @@
-
- template: x509check_days_until_expiration
-       on: x509check.time_until_expiration
-    class: Latency
-     type: Certificates
-component: x509 certificates
-     calc: $expiry
-    units: seconds
-    every: 60s
-     warn: $this < $days_until_expiration_warning*24*60*60
-     crit: $this < $days_until_expiration_critical*24*60*60
-  summary: x509 certificate expiration for ${label:source}
-     info: Time until x509 certificate expires for ${label:source}
-       to: webmaster
-      
- template: x509check_revocation_status
-       on: x509check.revocation_status
-    class: Errors
-     type: Certificates
-component: x509 certificates
-     calc: $revoked
-    every: 60s
-     crit: $this != nan AND $this != 0
-  summary: x509 certificate revocation status for ${label:source}
-     info: x509 certificate revocation status (0: revoked, 1: valid) for ${label:source}
-       to: webmaster
diff --git a/health/health.d/zfs.conf b/health/health.d/zfs.conf
deleted file mode 100644
index d2a561000..000000000
--- a/health/health.d/zfs.conf
+++ /dev/null
@@ -1,44 +0,0 @@
-
-    alarm: zfs_memory_throttle
-       on: zfs.memory_ops
-    class: Utilization
-     type: System
-component: File system
-   lookup: sum -10m unaligned absolute of throttled
-    units: events
-    every: 1m
-     warn: $this > 0
-    delay: down 1h multiplier 1.5 max 2h
-  summary: ZFS ARC growth throttling
-     info: number of times ZFS had to limit the ARC growth in the last 10 minutes
-       to: silent
-
-# ZFS pool state
-
- template: zfs_pool_state_warn
-       on: zfspool.state
-    class: Errors
-     type: System
-component: File system
-     calc: $degraded
-    units: boolean
-    every: 10s
-     warn: $this > 0
-    delay: down 1m multiplier 1.5 max 1h
-  summary: ZFS pool ${label:pool} state
-     info: ZFS pool ${label:pool} state is degraded
-       to: sysadmin
-
- template: zfs_pool_state_crit
-       on: zfspool.state
-    class: Errors
-     type: System
-component: File system
-     calc: $faulted + $unavail
-    units: boolean
-    every: 10s
-     crit: $this > 0
-    delay: down 1m multiplier 1.5 max 1h
-  summary: Critical ZFS pool ${label:pool} state
-     info: ZFS pool ${label:pool} state is faulted or unavail
-       to: sysadmin
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-07-24 09:54:23 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-07-24 09:54:44 +0000
commit	836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch)
tree	1604da8f482d02effa033c94a84be42bc0c848c3 /health/health.d
parent	Releasing debian version 1.44.3-2. (diff)
download	netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip