Merging upstream version 1.32.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-12-01 06:15:11 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-12-01 06:15:11 +0000
commit: 483926a283e118590da3f9ecfa75a8a4d62143ce (patch)
tree: cb77052778df9a128a8cd3ff5bf7645322a13bc5 /health/health.d
parent: Releasing debian version 1.31.0-4. (diff)
download: netdata-483926a283e118590da3f9ecfa75a8a4d62143ce.tar.xz
netdata-483926a283e118590da3f9ecfa75a8a4d62143ce.zip
84 files changed, 616 insertions, 1180 deletions
diff --git a/health/health.d/adaptec_raid.conf b/health/health.d/adaptec_raid.conf
index b067e1840..1d823addd 100644
--- a/health/health.d/adaptec_raid.conf
+++ b/health/health.d/adaptec_raid.conf
@@ -3,9 +3,9 @@
 
  template: adaptec_raid_ld_status
        on: adaptec_raid.ld_status
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: bool
     every: 10s
@@ -18,9 +18,9 @@ component: RAID
 
  template: adaptec_raid_pd_state
        on: adaptec_raid.pd_state
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: bool
     every: 10s
diff --git a/health/health.d/am2320.conf b/health/health.d/am2320.conf
deleted file mode 100644
index 4bac98fbb..000000000
--- a/health/health.d/am2320.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# make sure am2320 is sending stats
-
- template: am2320_last_collected_secs
-       on: am2320.temperature
-    class: Other
-component: Sensors
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
diff --git a/health/health.d/anomalies.conf b/health/health.d/anomalies.conf
index f27e39fc1..269ae544b 100644
--- a/health/health.d/anomalies.conf
+++ b/health/health.d/anomalies.conf
@@ -2,9 +2,9 @@
 
  template: anomalies_anomaly_probabilities
        on: anomalies.probability
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: ML
-     type: Errors
    lookup: average -2m foreach *
     every: 1m
      warn: $this > 50
@@ -14,9 +14,9 @@ component: ML
 
  template: anomalies_anomaly_flags
        on: anomalies.anomaly
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: ML
-     type: Errors
    lookup: sum -2m foreach *
     every: 1m
      warn: $this > 10
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf
index 07b5c28c9..65f1a69ab 100644
--- a/health/health.d/apcupsd.conf
+++ b/health/health.d/apcupsd.conf
@@ -2,9 +2,9 @@
 
  template: apcupsd_10min_ups_load
        on: apcupsd.load
-    class: Power Supply
+    class: Utilization
+     type: Power Supply
 component: UPS
-     type: Utilization
        os: *
     hosts: *
    lookup: average -10m unaligned of percentage
@@ -20,9 +20,9 @@ component: UPS
 # Fire the alarm as soon as it's going on battery (99% charge) and clear only when full.
  template: apcupsd_ups_charge
        on: apcupsd.charge
-    class: Power Supply
+    class: Errors
+     type: Power Supply
 component: UPS
-     type: Errors
        os: *
     hosts: *
    lookup: average -60s unaligned of charge
@@ -36,9 +36,9 @@ component: UPS
 
  template: apcupsd_last_collected_secs
        on: apcupsd.load
-    class: Power Supply
+    class: Latency
+     type: Power Supply
 component: UPS device
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf
index 948ea551a..91d469395 100644
--- a/health/health.d/backend.conf
+++ b/health/health.d/backend.conf
@@ -1,9 +1,9 @@
 # Alert that backends subsystem will be disabled soon
     alarm: backend_metrics_eol
        on: netdata.backend_metrics
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: Exporting engine
-     type: Errors
     units: boolean
      calc: $now - $last_collected_t
     every: 1m
@@ -16,9 +16,9 @@ component: Exporting engine
 
     alarm: backend_last_buffering
        on: netdata.backend_metrics
-    class: Netdata
+    class: Latency
+     type: Netdata
 component: Exporting engine
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -30,9 +30,9 @@ component: Exporting engine
 
     alarm: backend_metrics_sent
        on: netdata.backend_metrics
-    class: Netdata
+    class: Workload
+     type: Netdata
 component: Exporting engine
-     type: Workload
     units: %
      calc: abs($sent) * 100 / abs($buffered)
     every: 10s
diff --git a/health/health.d/bcache.conf b/health/health.d/bcache.conf
index d75d8e19b..49cb5ad0f 100644
--- a/health/health.d/bcache.conf
+++ b/health/health.d/bcache.conf
@@ -1,9 +1,9 @@
 
  template: bcache_cache_errors
        on: disk.bcache_cache_read_races
-    class: System
+    class: Errors
+     type: System
 component: Disk
-     type: Errors
    lookup: sum -1m unaligned absolute
     units: errors
     every: 1m
@@ -16,9 +16,9 @@ component: Disk
 
  template: bcache_cache_dirty
        on: disk.bcache_cache_alloc
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
      calc: $dirty + $metadata + $undefined
     units: %
     every: 1m
diff --git a/health/health.d/beanstalkd.conf b/health/health.d/beanstalkd.conf
index 99c754571..13ac8c182 100644
--- a/health/health.d/beanstalkd.conf
+++ b/health/health.d/beanstalkd.conf
@@ -2,9 +2,9 @@
 
  template: beanstalk_server_buried_jobs
        on: beanstalk.current_jobs
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: Beanstalk
-     type: Workload
      calc: $buried
     units: jobs
     every: 10s
diff --git a/health/health.d/bind_rndc.conf b/health/health.d/bind_rndc.conf
index e88f87a4f..7c09225ff 100644
--- a/health/health.d/bind_rndc.conf
+++ b/health/health.d/bind_rndc.conf
@@ -1,8 +1,8 @@
  template: bind_rndc_stats_file_size
        on: bind_rndc.stats_size
-    class: DNS
+    class: Utilization
+     type: DNS
 component: BIND
-     type: Utilization
     units: megabytes
     every: 60
      calc: $stats_size
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf
index 8604abee9..7d7a4fdae 100644
--- a/health/health.d/boinc.conf
+++ b/health/health.d/boinc.conf
@@ -3,9 +3,9 @@
 # Warn on any compute errors encountered.
  template: boinc_compute_errors
        on: boinc.states
-    class: Computing
+    class: Errors
+     type: Computing
 component: BOINC
-     type: Errors
        os: *
     hosts: *
  families: *
@@ -21,9 +21,9 @@ component: BOINC
 # Warn on lots of upload errors
  template: boinc_upload_errors
        on: boinc.states
-    class: Computing
+    class: Errors
+     type: Computing
 component: BOINC
-     type: Errors
        os: *
     hosts: *
  families: *
@@ -39,9 +39,9 @@ component: BOINC
 # Warn on the task queue being empty
  template: boinc_total_tasks
        on: boinc.tasks
-    class: Computing
+    class: Utilization
+     type: Computing
 component: BOINC
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -57,9 +57,9 @@ component: BOINC
 # Warn on no active tasks with a non-empty queue
  template: boinc_active_tasks
        on: boinc.tasks
-    class: Computing
+    class: Utilization
+     type: Computing
 component: BOINC
-     type: Utilization
        os: *
     hosts: *
  families: *
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
index d3200a7ee..8d197aa8d 100644
--- a/health/health.d/btrfs.conf
+++ b/health/health.d/btrfs.conf
@@ -1,9 +1,9 @@
 
  template: btrfs_allocated
        on: btrfs.disk
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -18,9 +18,9 @@ component: File system
 
  template: btrfs_data
        on: btrfs.data
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -35,9 +35,9 @@ component: File system
 
  template: btrfs_metadata
        on: btrfs.metadata
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -52,9 +52,9 @@ component: File system
 
  template: btrfs_system
        on: btrfs.system
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
diff --git a/health/health.d/ceph.conf b/health/health.d/ceph.conf
index ed8f9b4b9..1f9da25c7 100644
--- a/health/health.d/ceph.conf
+++ b/health/health.d/ceph.conf
@@ -2,9 +2,9 @@
 
  template: ceph_cluster_space_usage
        on: ceph.general_usage
-    class: Storage
+    class: Utilization
+     type: Storage
 component: Ceph
-     type: Utilization
      calc: $used * 100 / ($used + $avail)
     units: %
     every: 1m
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
index 068533f10..45b34806c 100644
--- a/health/health.d/cgroups.conf
+++ b/health/health.d/cgroups.conf
@@ -3,9 +3,9 @@
 
  template: cgroup_10min_cpu_usage
        on: cgroup.cpu_limit
-    class: Cgroups
+    class: Utilization
+     type: Cgroups
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned
@@ -19,9 +19,9 @@ component: CPU
 
  template: cgroup_ram_in_use
        on: cgroup.mem_usage
-    class: Cgroups
+    class: Utilization
+     type: Cgroups
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($ram) * 100 / $memory_limit
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
index dccd2b064..1f227841e 100644
--- a/health/health.d/cockroachdb.conf
+++ b/health/health.d/cockroachdb.conf
@@ -1,27 +1,11 @@
 
-# Availability
-
- template: cockroachdb_last_collected_secs
-       on: cockroachdb.live_nodes
-    class: Database
-component: CockroachDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
 # Capacity
 
  template: cockroachdb_used_storage_capacity
        on: cockroachdb.storage_used_capacity_percentage
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $capacity_used_percent
     units: %
     every: 10s
@@ -33,9 +17,9 @@ component: CockroachDB
 
  template: cockroachdb_used_usable_storage_capacity
        on: cockroachdb.storage_used_capacity_percentage
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $capacity_usable_used_percent
     units: %
     every: 10s
@@ -49,37 +33,37 @@ component: CockroachDB
 
  template: cockroachdb_unavailable_ranges
        on: cockroachdb.ranges_replication_problem
-    class: Database
+    class: Errors
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $ranges_unavailable
     units: num
     every: 10s
      warn: $this > 0
     delay: down 15m multiplier 1.5 max 1h
-     info: number of ranges with fewer live replicas than the replication target
+     info: number of ranges with fewer live replicas than needed for quorum
        to: dba
 
- template: cockroachdb_replicas_leaders_not_leaseholders
-       on: cockroachdb.replicas_leaders
-    class: Database
+ template: cockroachdb_underreplicated_ranges
+       on: cockroachdb.ranges_replication_problem
+    class: Errors
+     type: Database
 component: CockroachDB
-     type: Utilization
-     calc: $replicas_leaders_not_leaseholders
+     calc: $ranges_underreplicated
     units: num
     every: 10s
      warn: $this > 0
     delay: down 15m multiplier 1.5 max 1h
-     info: number of replicas that are Raft leaders whose range lease is held by another store
+     info: number of ranges with fewer live replicas than the replication target
        to: dba
 
 # FD
 
  template: cockroachdb_open_file_descriptors_limit
        on: cockroachdb.process_file_descriptors
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $sys_fd_open/$sys_fd_softlimit * 100
     units: %
     every: 10s
@@ -87,29 +71,3 @@ component: CockroachDB
     delay: down 15m multiplier 1.5 max 1h
      info: open file descriptors utilization (against softlimit)
        to: dba
-
-# SQL
-
- template: cockroachdb_sql_active_connections
-       on: cockroachdb.sql_connections
-    class: Database
-component: CockroachDB
-     type: Utilization
-     calc: $sql_conns
-    units: active connections
-    every: 10s
-     info: number of active SQL connections
-       to: dba
-
- template: cockroachdb_sql_executed_statements_total_last_5m
-       on: cockroachdb.sql_statements_total
-    class: Database
-component: CockroachDB
-     type: Workload
-   lookup: sum -5m absolute of sql_query_count
-    units: statements
-    every: 10s
-     warn: $this == 0 AND $cockroachdb_sql_active_connections != 0
-    delay: down 15m up 30s multiplier 1.5 max 1h
-     info: number of executed SQL statements in the last 5 minutes
-       to: dba
diff --git a/health/health.d/couchdb.conf b/health/health.d/couchdb.conf
deleted file mode 100644
index c86c6b988..000000000
--- a/health/health.d/couchdb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure couchdb is running
-
- template: couchdb_last_collected_secs
-       on: couchdb.request_methods
-    class: Database
-component: CouchDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/cpu.conf b/health/health.d/cpu.conf
index d11215768..ad6952825 100644
--- a/health/health.d/cpu.conf
+++ b/health/health.d/cpu.conf
@@ -3,9 +3,9 @@
 
  template: 10min_cpu_usage
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned of user,system,softirq,irq,guest
@@ -19,9 +19,9 @@ component: CPU
 
  template: 10min_cpu_iowait
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned of iowait
@@ -35,9 +35,9 @@ component: CPU
 
  template: 20min_steal_cpu
        on: system.cpu
-    class: System
+    class: Latency
+     type: System
 component: CPU
-     type: Latency
        os: linux
     hosts: *
    lookup: average -20m unaligned of steal
@@ -52,9 +52,9 @@ component: CPU
 ## FreeBSD
  template: 10min_cpu_usage
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: freebsd
     hosts: *
    lookup: average -10m unaligned of user,system,interrupt
diff --git a/health/health.d/dbengine.conf b/health/health.d/dbengine.conf
index 79c156ab8..65c41b846 100644
--- a/health/health.d/dbengine.conf
+++ b/health/health.d/dbengine.conf
@@ -3,9 +3,9 @@
 
     alarm: 10min_dbengine_global_fs_errors
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of fs_errors
@@ -18,9 +18,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_io_errors
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of io_errors
@@ -33,9 +33,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_flushing_warnings
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
@@ -49,9 +49,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_flushing_errors
        on: netdata.dbengine_long_term_page_stats
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of flushing_pressure_deletions
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index 60f8faed9..5daff61a1 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -11,9 +11,9 @@
 
  template: disk_space_usage
        on: disk.space
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: !/dev !/dev/* !/run !/run/* *
@@ -28,9 +28,9 @@ component: Disk
 
  template: disk_inode_usage
        on: disk.inodes
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: !/dev !/dev/* !/run !/run/* *
@@ -136,19 +136,16 @@ component: Disk
 
  template: 10min_disk_utilization
        on: disk.util
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: *
    lookup: average -10m unaligned
     units: %
     every: 1m
-    green: 90
-      red: 98
-     warn: $this > $green * (($status >= $WARNING)  ? (0.7) : (1))
-     crit: $this > $red   * (($status == $CRITICAL) ? (0.7) : (1))
+     warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
     delay: down 15m multiplier 1.2 max 1h
      info: average percentage of time $family disk was busy over the last 10 minutes
        to: silent
@@ -161,19 +158,16 @@ component: Disk
 
  template: 10min_disk_backlog
        on: disk.backlog
-    class: System
+    class: Latency
+     type: System
 component: Disk
-     type: Latency
        os: linux
     hosts: *
  families: *
    lookup: average -10m unaligned
     units: ms
     every: 1m
-    green: 2000
-      red: 5000
-     warn: $this > $green * (($status >= $WARNING)  ? (0.7) : (1))
-     crit: $this > $red   * (($status == $CRITICAL) ? (0.7) : (1))
+     warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
     delay: down 15m multiplier 1.2 max 1h
      info: average backlog size of the $family disk over the last 10 minutes
        to: silent
diff --git a/health/health.d/dns_query.conf b/health/health.d/dns_query.conf
index 1fbb2c598..ec4937c0a 100644
--- a/health/health.d/dns_query.conf
+++ b/health/health.d/dns_query.conf
@@ -3,9 +3,9 @@
 
  template: dns_query_time_query_time
        on: dns_query_time.query_time
-    class: DNS
+    class: Latency
+     type: DNS
 component: DNS
-     type: Latency
    lookup: average -10s unaligned foreach *
     units: ms
     every: 10s
diff --git a/health/health.d/dnsmasq_dhcp.conf b/health/health.d/dnsmasq_dhcp.conf
index 10d139f77..010b94599 100644
--- a/health/health.d/dnsmasq_dhcp.conf
+++ b/health/health.d/dnsmasq_dhcp.conf
@@ -2,9 +2,9 @@
 
  template: dnsmasq_dhcp_dhcp_range_utilization
        on: dnsmasq_dhcp.dhcp_range_utilization
-    class: DHCP
+    class: Utilization
+     type: DHCP
 component: Dnsmasq
-     type: Utilization
     every: 10s
     units: %
      calc: $used
diff --git a/health/health.d/dockerd.conf b/health/health.d/dockerd.conf
index ba866f81b..220ddd664 100644
--- a/health/health.d/dockerd.conf
+++ b/health/health.d/dockerd.conf
@@ -1,8 +1,8 @@
  template: docker_unhealthy_containers
        on: docker.unhealthy_containers
-    class: Containers
+    class: Errors
+     type: Containers
 component: Docker
-     type: Errors
     units: unhealthy containers
     every: 10s
    lookup: average -10s
diff --git a/health/health.d/elasticsearch.conf b/health/health.d/elasticsearch.conf
deleted file mode 100644
index 05d576c39..000000000
--- a/health/health.d/elasticsearch.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-
-# make sure elasticsearch is running
-
- template: elasticsearch_last_collected
-       on: elasticsearch.cluster_health_status
-    class: Search engine
-component: Elasticsearch
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/entropy.conf b/health/health.d/entropy.conf
index 0478fa0be..13b0fcde4 100644
--- a/health/health.d/entropy.conf
+++ b/health/health.d/entropy.conf
@@ -5,9 +5,9 @@
 
     alarm: lowest_entropy
        on: system.entropy
-    class: System
+    class: Utilization
+     type: System
 component: Cryptography
-     type: Utilization
        os: linux
     hosts: *
    lookup: min -5m unaligned
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
index 4430f3fd8..06f398c6e 100644
--- a/health/health.d/exporting.conf
+++ b/health/health.d/exporting.conf
@@ -1,22 +1,25 @@
 
-template: exporting_last_buffering
-families: *
-      on: exporting_data_size
-    calc: $now - $last_collected_t
-   units: seconds ago
-   every: 10s
-    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-   delay: down 5m multiplier 1.5 max 1h
-    info: number of seconds since the last successful buffering of exporting data
-      to: dba
+ template: exporting_last_buffering
+ families: *
+       on: exporting_data_size
+    class: Latency
+     type: Netdata
+component: Exporting engine
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful buffering of exporting data
+       to: dba
 
  template: exporting_metrics_sent
  families: *
        on: exporting_data_size
-    class: Netdata
+    class: Workload
+     type: Netdata
 component: Exporting engine
-     type: Workload
     units: %
      calc: abs($sent) * 100 / abs($buffered)
     every: 10s
diff --git a/health/health.d/fping.conf b/health/health.d/fping.conf
index 120fe8f28..bb22419fa 100644
--- a/health/health.d/fping.conf
+++ b/health/health.d/fping.conf
@@ -2,9 +2,9 @@
  template: fping_last_collected_secs
  families: *
        on: fping.latency
-    class: Other
+    class: Latency
+     type: Other
 component: Network
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -17,9 +17,9 @@ component: Network
  template: fping_host_reachable
  families: *
        on: fping.latency
-    class: Other
+    class: Errors
+     type: Other
 component: Network
-     type: Errors
      calc: $average != nan
     units: up/down
     every: 10s
@@ -31,9 +31,9 @@ component: Network
  template: fping_host_latency
  families: *
        on: fping.latency
-    class: Other
+    class: Latency
+     type: Other
 component: Network
-     type: Latency
    lookup: average -10s unaligned of average
     units: ms
     every: 10s
@@ -48,9 +48,9 @@ component: Network
  template: fping_packet_loss
  families: *
        on: fping.quality
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
    lookup: average -10m unaligned of returned
      calc: 100 - $this
     green: 1
diff --git a/health/health.d/fronius.conf b/health/health.d/fronius.conf
index 81aafaa60..853bd7fbc 100644
--- a/health/health.d/fronius.conf
+++ b/health/health.d/fronius.conf
@@ -1,9 +1,9 @@
  template: fronius_last_collected_secs
  families: *
        on: fronius.power
-    class: Power Supply
+    class: Latency
+     type: Power Supply
 component: Solar
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/gearman.conf b/health/health.d/gearman.conf
index e2031bf2b..14010d445 100644
--- a/health/health.d/gearman.conf
+++ b/health/health.d/gearman.conf
@@ -1,24 +1,10 @@
-# make sure Gearman is running
- template: gearman_last_collected_secs
-       on: gearman.total_jobs
-    class: Computing
-component: Gearman
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
  template: gearman_workers_queued
        on: gearman.single_job
-    class: Computing
+    class: Latency
+     type: Computing
 component: Gearman
-     type: Latency
-   lookup: average -10m unaligned match-names of Queued
+   lookup: average -10m unaligned match-names of Pending
     units: workers
     every: 10s
      warn: $this > 30000
diff --git a/health/health.d/geth.conf b/health/health.d/geth.conf
new file mode 100644
index 000000000..dd1eb4701
--- /dev/null
+++ b/health/health.d/geth.conf
@@ -0,0 +1,12 @@
+#chainhead_header is expected momenterarily to be ahead. If its considerably ahead (e.g more than 5 blocks), then the node is definitely out of sync. 
+ template: geth_chainhead_diff_between_header_block
+       on: geth.chainhead
+    class: Workload
+     type: ethereum_node
+component: geth
+    every: 10s
+     calc: $chain_head_block -  $chain_head_header
+    units: blocks
+     warn: $this != 0
+     crit: $this > 5
+    delay: down 1m multiplier 1.5 max 1h
diff --git a/health/health.d/nginx_plus.conf b/health/health.d/go.d.plugin.conf
index 5849a9e7e..8bf84a976 100644
--- a/health/health.d/nginx_plus.conf
+++ b/health/health.d/go.d.plugin.conf
@@ -1,11 +1,12 @@
 
-# make sure nginx_plus is running
+# make sure go.d.plugin data collection job is running
 
- template: nginx_plus_last_collected_secs
-       on: nginx_plus.requests_total
-    class: Web Server
-component: NGINX Plus
-     type: Latency
+ template: go.d_job_last_collected_secs
+       on: netdata.go_plugin_execution_time
+    class: Error
+     type: Netdata
+component: go.d.plugin
+   module: *
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -14,4 +15,3 @@ component: NGINX Plus
     delay: down 5m multiplier 1.5 max 1h
      info: number of seconds since the last successful data collection
        to: webmaster
-
diff --git a/health/health.d/haproxy.conf b/health/health.d/haproxy.conf
index 9f6b1c577..a0ab52bca 100644
--- a/health/health.d/haproxy.conf
+++ b/health/health.d/haproxy.conf
@@ -1,8 +1,8 @@
  template: haproxy_backend_server_status
        on: haproxy_hs.down
-    class: Web Proxy
+    class: Errors
+     type: Web Proxy
 component: HAProxy
-     type: Errors
     units: failed servers
     every: 10s
    lookup: average -10s
@@ -12,25 +12,12 @@ component: HAProxy
 
  template: haproxy_backend_status
        on: haproxy_hb.down
-    class: Web Proxy
+    class: Errors
+     type: Web Proxy
 component: HAProxy
-     type: Errors
     units: failed backend
     every: 10s
    lookup: average -10s
      crit: $this > 0
      info: average number of failed haproxy backends over the last 10 seconds
        to: sysadmin
-
- template: haproxy_last_collected
-       on: haproxy_hb.down
-    class: Web Proxy
-component: HAProxy
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
index bd8308bed..ca8df31b9 100644
--- a/health/health.d/hdfs.conf
+++ b/health/health.d/hdfs.conf
@@ -1,28 +1,11 @@
 
-# make sure hdfs is running
-
- template: hdfs_last_collected_secs
-       on: hdfs.heap_memory
-    class: Storage
-component: HDFS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # Common
 
  template: hdfs_capacity_usage
        on: hdfs.capacity
-    class: Storage
+    class: Utilization
+     type: Storage
 component: HDFS
-     type: Utilization
      calc: ($used) * 100 / ($used + $remaining)
     units: %
     every: 10s
@@ -37,9 +20,9 @@ component: HDFS
 
  template: hdfs_missing_blocks
        on: hdfs.blocks
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $missing
     units: missing blocks
     every: 10s
@@ -51,9 +34,9 @@ component: HDFS
 
  template: hdfs_stale_nodes
        on: hdfs.data_nodes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $stale
     units: dead nodes
     every: 10s
@@ -65,9 +48,9 @@ component: HDFS
 
  template: hdfs_dead_nodes
        on: hdfs.data_nodes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $dead
     units: dead nodes
     every: 10s
@@ -81,9 +64,9 @@ component: HDFS
 
  template: hdfs_num_failed_volumes
        on: hdfs.num_failed_volumes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $fsds_num_failed_volumes
     units: failed volumes
     every: 10s
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
index d4d6376a3..599c47acc 100644
--- a/health/health.d/httpcheck.conf
+++ b/health/health.d/httpcheck.conf
@@ -1,25 +1,11 @@
- template: httpcheck_last_collected_secs
- families: *
-       on: httpcheck.status
-    class: Other
-component: HTTP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: httpcheck_web_service_up
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Utilization
+     type: Web Server
 component: HTTP endpoint
-     type: Utilization
    lookup: average -1m unaligned percentage of success
      calc: ($this < 75) ? (0) : ($this)
     every: 5s
@@ -30,9 +16,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_bad_content
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: HTTP endpoint
-     type: Workload
    lookup: average -5m unaligned percentage of bad_content
     every: 10s
     units: %
@@ -46,9 +32,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_bad_status
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: HTTP endpoint
-     type: Workload
    lookup: average -5m unaligned percentage of bad_status
     every: 10s
     units: %
@@ -62,9 +48,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_timeouts
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: HTTP endpoint
-     type: Latency
    lookup: average -5m unaligned percentage of timeout
     every: 10s
     units: %
@@ -73,9 +59,9 @@ component: HTTP endpoint
  template: httpcheck_no_web_service_connections
  families: *
        on: httpcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: HTTP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of no_connection
     every: 10s
     units: %
@@ -85,9 +71,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_unreachable
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: HTTP endpoint
-     type: Errors
      calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
     units: %
     every: 10s
@@ -101,9 +87,9 @@ component: HTTP endpoint
  template: httpcheck_1h_web_service_response_time
  families: *
        on: httpcheck.responsetime
-    class: Other
+    class: Latency
+     type: Other
 component: HTTP endpoint
-     type: Latency
    lookup: average -1h unaligned of time
     every: 30s
     units: ms
@@ -112,9 +98,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_slow
  families: *
        on: httpcheck.responsetime
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: HTTP endpoint
-     type: Latency
    lookup: average -3m unaligned of time
     units: ms
     every: 10s
diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf
index 57ce4e866..ee4befbea 100644
--- a/health/health.d/ioping.conf
+++ b/health/health.d/ioping.conf
@@ -1,9 +1,9 @@
  template: ioping_disk_latency
  families: *
        on: ioping.latency
-    class: System
+    class: Latency
+     type: System
 component: Disk
-     type: Latency
    lookup: average -10s unaligned of average
     units: ms
     every: 10s
diff --git a/health/health.d/ipc.conf b/health/health.d/ipc.conf
index 6eaf7abe9..c178a410a 100644
--- a/health/health.d/ipc.conf
+++ b/health/health.d/ipc.conf
@@ -3,9 +3,9 @@
 
     alarm: semaphores_used
        on: system.ipc_semaphores
-    class: System
+    class: Utilization
+     type: System
 component: IPC
-     type: Utilization
        os: linux
     hosts: *
      calc: $semaphores * 100 / $ipc_semaphores_max
@@ -19,9 +19,9 @@ component: IPC
 
     alarm: semaphore_arrays_used
        on: system.ipc_semaphore_arrays
-    class: System
+    class: Utilization
+     type: System
 component: IPC
-     type: Utilization
        os: linux
     hosts: *
      calc: $arrays * 100 / $ipc_semaphores_arrays_max
diff --git a/health/health.d/ipfs.conf b/health/health.d/ipfs.conf
index 6268f4092..a514ddfd0 100644
--- a/health/health.d/ipfs.conf
+++ b/health/health.d/ipfs.conf
@@ -1,9 +1,9 @@
 
  template: ipfs_datastore_usage
        on: ipfs.repo_size
-    class: Data Sharing
+    class: Utilization
+     type: Data Sharing
 component: IPFS
-     type: Utilization
      calc: $size * 100 / $avail
     units: %
     every: 10s
diff --git a/health/health.d/ipmi.conf b/health/health.d/ipmi.conf
index d4fdc6c79..feadba1b7 100644
--- a/health/health.d/ipmi.conf
+++ b/health/health.d/ipmi.conf
@@ -1,8 +1,8 @@
     alarm: ipmi_sensors_states
        on: ipmi.sensors_states
-    class: System
+    class: Errors
+     type: System
 component: IPMI
-     type: Errors
      calc: $warning + $critical
     units: sensors
     every: 10s
@@ -14,9 +14,9 @@ component: IPMI
 
     alarm: ipmi_events
        on: ipmi.events
-    class: System
+    class: Utilization
+     type: System
 component: IPMI
-     type: Utilization
      calc: $events
     units: events
     every: 10s
diff --git a/health/health.d/kubelet.conf b/health/health.d/kubelet.conf
index 4d3c45f97..c2778cc5e 100644
--- a/health/health.d/kubelet.conf
+++ b/health/health.d/kubelet.conf
@@ -6,9 +6,9 @@
 
  template: kubelet_node_config_error
        on: k8s_kubelet.kubelet_node_config_error
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
      calc: $kubelet_node_config_error
     units: bool
     every: 10s
@@ -22,9 +22,9 @@ component: Kubelet
  template: kubelet_token_requests
    lookup: sum -10s of token_fail_count
        on: k8s_kubelet.kubelet_token_requests
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
     units: failed requests
     every: 10s
      warn: $this > 0
@@ -37,9 +37,9 @@ component: Kubelet
  template: kubelet_operations_error
    lookup: sum -1m
        on: k8s_kubelet.kubelet_operations_errors
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
     units: errors
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (0) : (20))
@@ -64,9 +64,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_05
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_05
     units: microseconds
     every: 10s
@@ -74,9 +74,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_05
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_05
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_05 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_05))
     every: 10s
@@ -92,9 +92,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_09
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_09
     units: microseconds
     every: 10s
@@ -102,9 +102,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_09
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_09
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_09 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_09))
     every: 10s
@@ -120,9 +120,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_099
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_099
     units: microseconds
     every: 10s
@@ -130,9 +130,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_099
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_099
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_099 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_099))
     every: 10s
diff --git a/health/health.d/lighttpd.conf b/health/health.d/lighttpd.conf
deleted file mode 100644
index 0f067549e..000000000
--- a/health/health.d/lighttpd.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure lighttpd is running
-
- template: lighttpd_last_collected_secs
-       on: lighttpd.requests
-    class: Web Server
-component: Lighttpd
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
index e28c246a3..c0bc6de8a 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/health/health.d/linux_power_supply.conf
@@ -2,9 +2,9 @@
 
  template: linux_power_supply_capacity
        on: powersupply.capacity
-    class: Power Supply
+    class: Utilization
+     type: Power Supply
 component: Battery
-     type: Utilization
      calc: $capacity
     units: %
     every: 10s
diff --git a/health/health.d/load.conf b/health/health.d/load.conf
index e811f6ee2..0bd872f85 100644
--- a/health/health.d/load.conf
+++ b/health/health.d/load.conf
@@ -6,9 +6,9 @@
 # minute, with a special case for a single CPU of setting the trigger at 2.
     alarm: load_cpu_number
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
      calc: ($active_processors == nan or $active_processors == inf or $active_processors < 2) ? ( 2 ) : ( $active_processors )
@@ -22,9 +22,9 @@ component: Load
 
     alarm: load_average_15
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load15
@@ -37,9 +37,9 @@ component: Load
 
     alarm: load_average_5
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load5
@@ -52,9 +52,9 @@ component: Load
 
     alarm: load_average_1
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load1
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index 67483b201..cedaa000e 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -1,8 +1,8 @@
  template: mdstat_last_collected
        on: md.disks
-    class: System
+    class: Latency
+     type: System
 component: RAID
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -13,9 +13,9 @@ component: RAID
 
  template: mdstat_disks
        on: md.disks
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
     units: failed devices
     every: 10s
      calc: $down
@@ -26,9 +26,9 @@ component: RAID
 
  template: mdstat_mismatch_cnt
        on: md.mismatch_cnt
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
  families: !*(raid1) !*(raid10) *
     units: unsynchronized blocks
      calc: $count
@@ -40,9 +40,9 @@ component: RAID
 
  template: mdstat_nonredundant_last_collected
        on: md.nonredundant
-    class: System
+    class: Latency
+     type: System
 component: RAID
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf
index 1b6502f62..9fbcfdb92 100644
--- a/health/health.d/megacli.conf
+++ b/health/health.d/megacli.conf
@@ -3,9 +3,9 @@
 
  template: megacli_adapter_state
        on: megacli.adapter_degraded
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: boolean
     every: 10s
@@ -18,9 +18,9 @@ component: RAID
 
  template: megacli_pd_predictive_failures
        on: megacli.pd_predictive_failure
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: sum -10s foreach *
     units: predictive failures
     every: 10s
@@ -31,9 +31,9 @@ component: RAID
 
  template: megacli_pd_media_errors
        on: megacli.pd_media_error
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: sum -10s foreach *
     units: media errors
     every: 10s
@@ -46,9 +46,9 @@ component: RAID
 
  template: megacli_bbu_relative_charge
        on: megacli.bbu_relative_charge
-    class: System
+    class: Workload
+     type: System
 component: RAID
-     type: Workload
    lookup: average -10s
     units: percent
     every: 10s
@@ -59,9 +59,9 @@ component: RAID
 
  template: megacli_bbu_cycle_count
        on: megacli.bbu_cycle_count
-    class: System
+    class: Workload
+     type: System
 component: RAID
-     type: Workload
    lookup: average -10s
     units: cycles
     every: 10s
diff --git a/health/health.d/memcached.conf b/health/health.d/memcached.conf
index f4b734c38..2a2fe4b82 100644
--- a/health/health.d/memcached.conf
+++ b/health/health.d/memcached.conf
@@ -1,28 +1,11 @@
 
-# make sure memcached is running
-
- template: memcached_last_collected_secs
-       on: memcached.cache
-    class: KV Storage
-component: Memcached
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
 # detect if memcached cache is full
 
  template: memcached_cache_memory_usage
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
      calc: $used * 100 / ($used + $available)
     units: %
     every: 10s
@@ -37,9 +20,9 @@ component: Memcached
 
  template: memcached_cache_fill_rate
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
    lookup: min -10m at -50m unaligned of available
      calc: ($this - $available) / (($now - $after) / 3600)
     units: KB/hour
@@ -51,9 +34,9 @@ component: Memcached
 
  template: memcached_out_of_cache_space_time
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
      calc: ($memcached_cache_fill_rate > 0) ? ($available / $memcached_cache_fill_rate) : (inf)
     units: hours
     every: 10s
diff --git a/health/health.d/memory.conf b/health/health.d/memory.conf
index ab651315f..010cbbd7b 100644
--- a/health/health.d/memory.conf
+++ b/health/health.d/memory.conf
@@ -3,9 +3,9 @@
 
     alarm: 1hour_ecc_memory_correctable
        on: mem.ecc_ce
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
    lookup: sum -10m unaligned
@@ -18,9 +18,9 @@ component: Memory
 
     alarm: 1hour_ecc_memory_uncorrectable
        on: mem.ecc_ue
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
    lookup: sum -10m unaligned
@@ -33,9 +33,9 @@ component: Memory
 
     alarm: 1hour_memory_hw_corrupted
        on: mem.hwcorrupt
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
      calc: $HardwareCorrupted
diff --git a/health/health.d/mongodb.conf b/health/health.d/mongodb.conf
deleted file mode 100644
index 8c9bdeb6f..000000000
--- a/health/health.d/mongodb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure mongodb is running
-
- template: mongodb_last_collected_secs
-       on: mongodb.read_operations
-    class: Database
-component: MongoDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf
index 91860c4a7..34452d983 100644
--- a/health/health.d/mysql.conf
+++ b/health/health.d/mysql.conf
@@ -1,29 +1,11 @@
 
-# make sure mysql is running
-
- template: mysql_last_collected_secs
-       on: mysql.queries
-    class: Database
-component: MySQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
-# -----------------------------------------------------------------------------
 # slow queries
 
  template: mysql_10s_slow_queries
        on: mysql.queries
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
    lookup: sum -10s of slow_queries
     units: slow queries
     every: 10s
@@ -39,9 +21,9 @@ component: MySQL
 
  template: mysql_10s_table_locks_immediate
        on: mysql.table_locks
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
    lookup: sum -10s absolute of immediate
     units: immediate locks
     every: 10s
@@ -50,9 +32,9 @@ component: MySQL
 
  template: mysql_10s_table_locks_waited
        on: mysql.table_locks
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
    lookup: sum -10s absolute of waited
     units: waited locks
     every: 10s
@@ -61,9 +43,9 @@ component: MySQL
 
  template: mysql_10s_waited_locks_ratio
        on: mysql.table_locks
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
      calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
     units: %
     every: 10s
@@ -79,9 +61,9 @@ component: MySQL
 
  template: mysql_connections
        on: mysql.connections_active
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
      calc: $active * 100 / $limit
     units: %
     every: 10s
@@ -97,9 +79,9 @@ component: MySQL
 
  template: mysql_replication
        on: mysql.slave_status
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
     units: ok/failed
     every: 10s
@@ -110,9 +92,9 @@ component: MySQL
 
  template: mysql_replication_lag
        on: mysql.slave_behind
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Errors
      calc: $seconds
     units: seconds
     every: 10s
@@ -129,9 +111,9 @@ component: MySQL
 
  template: mysql_galera_cluster_size_max_2m
        on: mysql.galera_cluster_size
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
    lookup: max -2m absolute
     units: nodes
     every: 10s
@@ -140,9 +122,9 @@ component: MySQL
 
  template: mysql_galera_cluster_size
        on: mysql.galera_cluster_size
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
      calc: $nodes
     units: nodes
     every: 10s
@@ -156,9 +138,9 @@ component: MySQL
 
  template: mysql_galera_cluster_state
        on: mysql.galera_cluster_state
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: $state
     every: 10s
      warn: $this == 2 OR $this == 3
@@ -173,9 +155,9 @@ component: MySQL
 
  template: mysql_galera_cluster_status
        on: mysql.galera_cluster_status
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: $wsrep_cluster_status
     every: 10s
      crit: $mysql_galera_cluster_state != nan AND $this != 0
diff --git a/health/health.d/named.conf b/health/health.d/named.conf
deleted file mode 100644
index 90266df16..000000000
--- a/health/health.d/named.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure named is running
-
- template: named_last_collected_secs
-       on: named.global_queries
-    class: DNS
-component: BIND
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: domainadmin
-
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index 04219e163..028ca7b81 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -6,9 +6,9 @@
 
  template: interface_speed
        on: net.net
-    class: System
+    class: Latency
+     type: System
 component: Network
-     type: Latency
        os: *
     hosts: *
  families: *
@@ -19,9 +19,9 @@ component: Network
 
  template: 1m_received_traffic_overflow
        on: net.net
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
  families: *
@@ -36,9 +36,9 @@ component: Network
 
  template: 1m_sent_traffic_overflow
        on: net.net
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
  families: *
@@ -63,9 +63,9 @@ component: Network
 
  template: inbound_packets_dropped
        on: net.drops
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* *
@@ -76,9 +76,9 @@ component: Network
 
  template: outbound_packets_dropped
        on: net.drops
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* *
@@ -89,14 +89,14 @@ component: Network
 
  template: inbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* !wl* *
    lookup: sum -10m unaligned absolute of received
-     calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0))
+     calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
     units: %
     every: 1m
      warn: $this >= 2
@@ -106,9 +106,9 @@ component: Network
 
  template: outbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* !wl* *
@@ -123,14 +123,14 @@ component: Network
 
  template: wifi_inbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: wl*
    lookup: sum -10m unaligned absolute of received
-     calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0))
+     calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
     units: %
     every: 1m
      warn: $this >= 10
@@ -140,9 +140,9 @@ component: Network
 
  template: wifi_outbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: wl*
@@ -160,9 +160,9 @@ component: Network
 
  template: interface_inbound_errors
        on: net.errors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
  families: *
@@ -176,9 +176,9 @@ component: Network
 
  template: interface_outbound_errors
        on: net.errors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
  families: *
@@ -200,9 +200,9 @@ component: Network
 
  template: 10min_fifo_errors
        on: net.fifo
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -225,9 +225,9 @@ component: Network
 
  template: 1m_received_packets_rate
        on: net.packets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux freebsd
     hosts: *
  families: *
@@ -238,9 +238,9 @@ component: Network
 
  template: 10s_received_packets_storm
        on: net.packets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux freebsd
     hosts: *
  families: *
diff --git a/health/health.d/netfilter.conf b/health/health.d/netfilter.conf
index 35c89caf7..7de383fa2 100644
--- a/health/health.d/netfilter.conf
+++ b/health/health.d/netfilter.conf
@@ -3,9 +3,9 @@
 
     alarm: netfilter_conntrack_full
        on: netfilter.conntrack_sockets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: max -10s unaligned of connections
diff --git a/health/health.d/nginx.conf b/health/health.d/nginx.conf
deleted file mode 100644
index 30c738f47..000000000
--- a/health/health.d/nginx.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure nginx is running
-
- template: nginx_last_collected_secs
-       on: nginx.requests
-    class: Web Server
-component: NGINX
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/phpfpm.conf b/health/health.d/phpfpm.conf
deleted file mode 100644
index fc073a944..000000000
--- a/health/health.d/phpfpm.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure phpfpm is running
-
- template: phpfpm_last_collected_secs
-       on: phpfpm.requests
-    class: Web Server
-component: PHP-FPM
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/pihole.conf b/health/health.d/pihole.conf
index 72622caed..2e5c1cbfd 100644
--- a/health/health.d/pihole.conf
+++ b/health/health.d/pihole.conf
@@ -1,45 +1,12 @@
 
-# Make sure Pi-hole is responding.
-
- template: pihole_last_collected_secs
-       on: pihole.dns_queries_total
-    class: Ad Filtering
-component: Pi-hole
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-# Blocked DNS queries.
-
- template: pihole_blocked_queries
-       on: pihole.dns_queries_percentage
-    class: Ad Filtering
-component: Pi-hole
-     type: Errors
-    every: 10s
-    units: %
-     calc: $blocked
-     warn: $this > ( ($status >= $WARNING ) ? ( 45 ) : ( 55 ) )
-     crit: $this > ( ($status == $CRITICAL) ? ( 55 ) : ( 75 ) )
-    delay: up 2m down 5m
-     info: percentage of blocked dns queries over the last 24 hour
-       to: sysadmin
-
-
 # Blocklist last update time.
 # Default update interval is a week.
 
  template: pihole_blocklist_last_update
        on: pihole.blocklist_last_update
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: seconds
      calc: $ago
@@ -52,15 +19,15 @@ component: Pi-hole
 
  template: pihole_blocklist_gravity_file
        on: pihole.blocklist_last_update
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: boolean
      calc: $file_exists
      crit: $this != 1
     delay: up 2m down 5m
-     info: gravity.list (blocklist) file existence state (0: exists, 1: not-exists)
+     info: gravity.list (blocklist) file existence state (0: not-exists, 1: exists)
        to: sysadmin
 
 # Pi-hole's ability to block unwanted domains.
@@ -68,13 +35,13 @@ component: Pi-hole
 
  template: pihole_status
        on: pihole.unwanted_domains_blocking_status
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: boolean
      calc: $enabled
      warn: $this != 1
     delay: up 2m down 5m
-     info: unwanted domains blocking status (0: enabled, 1: disabled)
+     info: unwanted domains blocking status (0: disabled, 1: enabled)
        to: sysadmin
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf
index b977dbb31..8cbd7729c 100644
--- a/health/health.d/portcheck.conf
+++ b/health/health.d/portcheck.conf
@@ -1,25 +1,11 @@
- template: portcheck_last_collected_secs
- families: *
-       on: portcheck.status
-    class: Other
-component: TCP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: portcheck_service_reachable
  families: *
        on: portcheck.status
-    class: Other
+    class: Workload
+     type: Other
 component: TCP endpoint
-     type: Workload
    lookup: average -1m unaligned percentage of success
      calc: ($this < 75) ? (0) : ($this)
     every: 5s
@@ -30,9 +16,9 @@ component: TCP endpoint
  template: portcheck_connection_timeouts
  families: *
        on: portcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: TCP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of timeout
     every: 10s
     units: %
@@ -45,9 +31,9 @@ component: TCP endpoint
  template: portcheck_connection_fails
  families: *
        on: portcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: TCP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of no_connection,failed
     every: 10s
     units: %
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf
deleted file mode 100644
index f908a802a..000000000
--- a/health/health.d/postgres.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure postgres is running
-
- template: postgres_last_collected_secs
-       on: postgres.db_stat_transactions
-    class: Database
-component: PostgreSQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/processes.conf b/health/health.d/processes.conf
index b44a24c0b..2929ee3d4 100644
--- a/health/health.d/processes.conf
+++ b/health/health.d/processes.conf
@@ -2,9 +2,9 @@
 
     alarm: active_processes
        on: system.active_processes
-    class: System
+    class: Workload
+     type: System
 component: Processes
-     type: Workload
     hosts: *
      calc: $active * 100 / $pidmax
     units: %
diff --git a/health/health.d/pulsar.conf b/health/health.d/pulsar.conf
deleted file mode 100644
index 9903d4e38..000000000
--- a/health/health.d/pulsar.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# Availability
-
- template: pulsar_last_collected_secs
-       on: pulsar.broker_components
-    class: Messaging
-component: Pulsar
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/apache.conf b/health/health.d/python.d.plugin.conf
index c623fb880..f3abc588f 100644
--- a/health/health.d/apache.conf
+++ b/health/health.d/python.d.plugin.conf
@@ -1,11 +1,12 @@
 
-# make sure apache is running
+# make sure python.d.plugin data collection job is running
 
- template: apache_last_collected_secs
-       on: apache.requests
-    class: Web Server
-component: Apache
-     type: Latency
+ template: python.d_job_last_collected_secs
+       on: netdata.pythond_runtime
+    class: Error
+     type: Netdata
+component: python.d.plugin
+   module: *
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -14,4 +15,3 @@ component: Apache
     delay: down 5m multiplier 1.5 max 1h
      info: number of seconds since the last successful data collection
        to: webmaster
-
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
index 0e3cc29fa..6e6e3b400 100644
--- a/health/health.d/ram.conf
+++ b/health/health.d/ram.conf
@@ -3,9 +3,9 @@
 
     alarm: used_ram_to_ignore
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux freebsd
     hosts: *
      calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz - $zfs.arc_size.min)
@@ -15,13 +15,12 @@ component: Memory
 
     alarm: ram_in_use
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
-#    calc: $used * 100 / ($used + $cached + $free)
-     calc: ($used - $used_ram_to_ignore) * 100 / ($used  + $cached + $free)
+     calc: ($used - $used_ram_to_ignore) * 100 / ($used + $cached + $free + $buffers)
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
@@ -32,12 +31,12 @@ component: Memory
 
     alarm: ram_available
        on: mem.available
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
-     calc: ($avail + $system.ram.used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
+     calc: $avail * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
     units: %
     every: 10s
      warn: $this < (($status >= $WARNING)  ? (15) : (10))
@@ -46,24 +45,25 @@ component: Memory
      info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
        to: sysadmin
 
-   alarm: oom_kill
-      on: mem.oom_kill
-      os: linux
-   hosts: *
-  lookup: sum -1m unaligned
-   units: kills
-   every: 10s
-    warn: $this > 0
-   delay: down 5m
-    info: number of out of memory kills in the last minute
-      to: sysadmin
+      alarm: oom_kill
+         on: mem.oom_kill
+         os: linux
+      hosts: *
+     lookup: sum -30m unaligned
+      units: kills
+      every: 5m
+       warn: $this > 0
+      delay: down 10m
+host labels: _is_k8s_node = false
+       info: number of out of memory kills in the last 30 minutes
+         to: sysadmin
 
 ## FreeBSD
     alarm: ram_in_use
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: freebsd
     hosts: *
      calc: ($active + $wired + $laundry + $buffers - $used_ram_to_ignore) * 100 / ($active + $wired + $laundry + $buffers - $used_ram_to_ignore + $cache + $free + $inactive)
@@ -77,9 +77,9 @@ component: Memory
 
     alarm: ram_available
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: freebsd
     hosts: *
      calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers)
diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf
index e8b289942..dfb771e8c 100644
--- a/health/health.d/redis.conf
+++ b/health/health.d/redis.conf
@@ -1,26 +1,10 @@
 
-# make sure redis is running
-
- template: redis_last_collected_secs
-       on: redis.operations
-    class: KV Storage
-component: Redis
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
  template: redis_bgsave_broken
  families: *
        on: redis.bgsave_health
-    class: KV Storage
+    class: Errors
+     type: KV Storage
 component: Redis
-     type: Errors
     every: 10s
      crit: $rdb_last_bgsave_status != 0
     units: ok/failed
@@ -31,9 +15,9 @@ component: Redis
  template: redis_bgsave_slow
  families: *
        on: redis.bgsave_now
-    class: KV Storage
+    class: Latency
+     type: KV Storage
 component: Redis
-     type: Latency
     every: 10s
      warn: $rdb_bgsave_in_progress > 600
      crit: $rdb_bgsave_in_progress > 1200
diff --git a/health/health.d/retroshare.conf b/health/health.d/retroshare.conf
index ca22e60de..14aa76b4c 100644
--- a/health/health.d/retroshare.conf
+++ b/health/health.d/retroshare.conf
@@ -1,26 +1,11 @@
-# make sure RetroShare is running
-
- template: retroshare_last_collected_secs
-       on: retroshare.peers
-    class: Data Sharing
-component: Retroshare
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # make sure the DHT is fine when active
 
  template: retroshare_dht_working
        on: retroshare.dht
-    class: Data Sharing
+    class: Utilization
+     type: Data Sharing
 component: Retroshare
-     type: Utilization
      calc: $dht_size_all
     units: peers
     every: 1m
diff --git a/health/health.d/riakkv.conf b/health/health.d/riakkv.conf
index b2c0e8d9c..261fd48c6 100644
--- a/health/health.d/riakkv.conf
+++ b/health/health.d/riakkv.conf
@@ -1,24 +1,10 @@
-# Ensure that Riak is running.  template: riak_last_collected_secs
- template: riakkv_last_collected_secs
-       on: riak.kv.throughput
-    class: Database
-component: Riak KV
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
 
 # Warn if a list keys operation is running.
  template: riakkv_list_keys_active
        on: riak.core.fsm_active
-    class: Database
+    class: Utilization
+     type: Database
 component: Riak KV
-     type: Utilization
      calc: $list_fsm_active
     units: state machines
     every: 10s
@@ -31,9 +17,9 @@ component: Riak KV
 # KV GET
  template: riakkv_1h_kv_get_mean_latency
        on: riak.kv.latency.get
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $node_get_fsm_time_mean
    lookup: average -1h unaligned of time
     every: 30s
@@ -43,9 +29,9 @@ component: Riak KV
 
  template: riakkv_kv_get_slow
        on: riak.kv.latency.get
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $mean
    lookup: average -3m unaligned of time
     units: ms
@@ -61,9 +47,9 @@ component: Riak KV
 # KV PUT
  template: riakkv_1h_kv_put_mean_latency
        on: riak.kv.latency.put
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $node_put_fsm_time_mean
    lookup: average -1h unaligned of time
     every: 30s
@@ -73,9 +59,9 @@ component: Riak KV
 
  template: riakkv_kv_put_slow
        on: riak.kv.latency.put
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $mean
    lookup: average -3m unaligned of time
     units: ms
@@ -95,9 +81,9 @@ component: Riak KV
 # On systems observed, this is < 2000, but may grow depending on load.
  template: riakkv_vm_high_process_count
        on: riak.vm
-    class: Database
+    class: Utilization
+     type: Database
 component: Riak KV
-     type: Utilization
      calc: $sys_process_count
     units: processes
     every: 10s
diff --git a/health/health.d/scaleio.conf b/health/health.d/scaleio.conf
index 3c0dc1168..ab110bf07 100644
--- a/health/health.d/scaleio.conf
+++ b/health/health.d/scaleio.conf
@@ -1,27 +1,11 @@
 
-# make sure scaleio is running
-
- template: scaleio_last_collected_secs
-       on: scaleio.system_capacity_total
-    class: Storage
-component: ScaleIO
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure Storage Pool capacity utilization is under limit
 
  template: scaleio_storage_pool_capacity_utilization
        on: scaleio.storage_pool_capacity_utilization
-    class: Storage
+    class: Utilization
+     type: Storage
 component: ScaleIO
-     type: Utilization
      calc: $used
     units: %
     every: 10s
@@ -36,9 +20,9 @@ component: ScaleIO
 
  template: scaleio_sdc_mdm_connection_state
        on: scaleio.sdc_mdm_connection_state
-    class: Storage
+    class: Utilization
+     type: Storage
 component: ScaleIO
-     type: Utilization
      calc: $connected
     every: 10s
      warn: $this != 1
diff --git a/health/health.d/softnet.conf b/health/health.d/softnet.conf
index d8b01caff..345f87505 100644
--- a/health/health.d/softnet.conf
+++ b/health/health.d/softnet.conf
@@ -5,9 +5,9 @@
 
     alarm: 1min_netdev_backlog_exceeded
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of dropped
@@ -21,9 +21,9 @@ component: Network
 
     alarm: 1min_netdev_budget_ran_outs
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of squeezed
@@ -38,9 +38,9 @@ component: Network
 
     alarm: 10min_netisr_backlog_exceeded
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
    lookup: average -1m unaligned absolute of qdrops
diff --git a/health/health.d/squid.conf b/health/health.d/squid.conf
deleted file mode 100644
index 5c3d17629..000000000
--- a/health/health.d/squid.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure squid is running
-
- template: squid_last_collected_secs
-       on: squid.clients_requests
-    class: Web Proxy
-component: Squid
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: proxyadmin
-
diff --git a/health/health.d/stiebeleltron.conf b/health/health.d/stiebeleltron.conf
index f793b5ed1..493c8b73a 100644
--- a/health/health.d/stiebeleltron.conf
+++ b/health/health.d/stiebeleltron.conf
@@ -1,9 +1,9 @@
  template: stiebeleltron_last_collected_secs
  families: *
        on: stiebeleltron.heating.hc1
-    class: Other
+    class: Latency
+     type: Other
 component: Sensors
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/swap.conf b/health/health.d/swap.conf
index 5b3f89a97..03c319320 100644
--- a/health/health.d/swap.conf
+++ b/health/health.d/swap.conf
@@ -3,9 +3,9 @@
 
     alarm: 30min_ram_swapped_out
        on: system.swapio
-    class: System
+    class: Workload
+     type: System
 component: Memory
-     type: Workload
        os: linux freebsd
     hosts: *
    lookup: sum -30m unaligned absolute of out
@@ -20,12 +20,12 @@ component: Memory
 
     alarm: used_swap
        on: system.swap
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux freebsd
     hosts: *
-     calc: $used * 100 / ( $used + $free )
+     calc: ($used + $free) > 0 ? ($used * 100 / ($used + $free)) : 0
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
diff --git a/health/health.d/systemdunits.conf b/health/health.d/systemdunits.conf
index cc1a8698d..38213a8db 100644
--- a/health/health.d/systemdunits.conf
+++ b/health/health.d/systemdunits.conf
@@ -4,9 +4,9 @@
 ## Service units
  template: systemd_service_units_state
        on: systemd.service_units_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -18,9 +18,9 @@ component: Systemd units
 ## Socket units
  template: systemd_socket_units_state
        on: systemd.socket_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -32,9 +32,9 @@ component: Systemd units
 ## Target units
  template: systemd_target_units_state
        on: systemd.target_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -46,9 +46,9 @@ component: Systemd units
 ## Path units
  template: systemd_path_units_state
        on: systemd.path_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -60,9 +60,9 @@ component: Systemd units
 ## Device units
  template: systemd_device_units_state
        on: systemd.device_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -74,9 +74,9 @@ component: Systemd units
 ## Mount units
  template: systemd_mount_units_state
        on: systemd.mount_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -88,9 +88,9 @@ component: Systemd units
 ## Automount units
  template: systemd_automount_units_state
        on: systemd.automount_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -102,9 +102,9 @@ component: Systemd units
 ## Swap units
  template: systemd_swap_units_state
        on: systemd.swap_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -116,9 +116,9 @@ component: Systemd units
 ## Scope units
  template: systemd_scope_units_state
        on: systemd.scope_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -130,9 +130,9 @@ component: Systemd units
 ## Slice units
  template: systemd_slice_units_state
        on: systemd.slice_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
diff --git a/health/health.d/tcp_conn.conf b/health/health.d/tcp_conn.conf
index f2c5e4e5d..67b3bee53 100644
--- a/health/health.d/tcp_conn.conf
+++ b/health/health.d/tcp_conn.conf
@@ -7,9 +7,9 @@
 
     alarm: tcp_connections
        on: ipv4.tcpsock
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
      calc: (${tcp_max_connections} > 0) ? ( ${connections} * 100 / ${tcp_max_connections} ) : 0
diff --git a/health/health.d/tcp_listen.conf b/health/health.d/tcp_listen.conf
index 51a0e461c..d4bcfa248 100644
--- a/health/health.d/tcp_listen.conf
+++ b/health/health.d/tcp_listen.conf
@@ -20,9 +20,9 @@
 
     alarm: 1m_tcp_accept_queue_overflows
        on: ip.tcp_accept_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of ListenOverflows
@@ -38,9 +38,9 @@ component: Network
 # CHECK: https://github.com/netdata/netdata/issues/3234#issuecomment-423935842
     alarm: 1m_tcp_accept_queue_drops
        on: ip.tcp_accept_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of ListenDrops
@@ -63,9 +63,9 @@ component: Network
 
     alarm: 1m_tcp_syn_queue_drops
        on: ip.tcp_syn_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of TCPReqQFullDrop
@@ -80,9 +80,9 @@ component: Network
 
     alarm: 1m_tcp_syn_queue_cookies
        on: ip.tcp_syn_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of TCPReqQFullDoCookies
diff --git a/health/health.d/tcp_mem.conf b/health/health.d/tcp_mem.conf
index 646e5c6da..318be20ac 100644
--- a/health/health.d/tcp_mem.conf
+++ b/health/health.d/tcp_mem.conf
@@ -8,9 +8,9 @@
 
     alarm: tcp_memory
        on: ipv4.sockstat_tcp_mem
-    class: System
+    class: Utilization
+     type: System
 component: Network
-     type: Utilization
        os: linux
     hosts: *
      calc: ${mem} * 100 / ${tcp_mem_high}
diff --git a/health/health.d/tcp_orphans.conf b/health/health.d/tcp_orphans.conf
index 6e94d67d1..cbd628da5 100644
--- a/health/health.d/tcp_orphans.conf
+++ b/health/health.d/tcp_orphans.conf
@@ -9,9 +9,9 @@
 
     alarm: tcp_orphans
        on: ipv4.sockstat_tcp_sockets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
      calc: ${orphan} * 100 / ${tcp_max_orphans}
diff --git a/health/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf
index 41355dad6..190271e47 100644
--- a/health/health.d/tcp_resets.conf
+++ b/health/health.d/tcp_resets.conf
@@ -6,9 +6,9 @@
 
     alarm: 1m_ipv4_tcp_resets_sent
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m at -10s unaligned absolute of OutRsts
@@ -18,9 +18,9 @@ component: Network
 
     alarm: 10s_ipv4_tcp_resets_sent
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -10s unaligned absolute of OutRsts
@@ -40,9 +40,9 @@ component: Network
 
     alarm: 1m_ipv4_tcp_resets_received
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -1m at -10s unaligned absolute of AttemptFails
@@ -52,9 +52,9 @@ component: Network
 
     alarm: 10s_ipv4_tcp_resets_received
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -10s unaligned absolute of AttemptFails
diff --git a/health/health.d/timex.conf b/health/health.d/timex.conf
new file mode 100644
index 000000000..ea90c4000
--- /dev/null
+++ b/health/health.d/timex.conf
@@ -0,0 +1,17 @@
+
+# It can take several minutes before ntpd selects a server to synchronize with;
+# try checking after 17 minutes (1024 seconds).
+
+    alarm: system_clock_sync_state
+       on: system.clock_sync_state
+       os: linux
+    class: Error
+     type: System
+component: Clock
+     calc: $state
+    units: synchronization state
+    every: 10s
+     warn: $system.uptime.uptime > 17 * 60 AND $this == 0
+    delay: down 5m
+     info: the system time is not synchronized to a reliable server
+       to: silent
diff --git a/health/health.d/udp_errors.conf b/health/health.d/udp_errors.conf
index 342a1aedd..64f47dfa7 100644
--- a/health/health.d/udp_errors.conf
+++ b/health/health.d/udp_errors.conf
@@ -6,9 +6,9 @@
 
     alarm: 1m_ipv4_udp_receive_buffer_errors
        on: ipv4.udperrors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -1m unaligned absolute of RcvbufErrors
@@ -24,9 +24,9 @@ component: Network
 
     alarm: 1m_ipv4_udp_send_buffer_errors
        on: ipv4.udperrors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of SndbufErrors
diff --git a/health/health.d/unbound.conf b/health/health.d/unbound.conf
index 1df15474f..4e8d164d2 100644
--- a/health/health.d/unbound.conf
+++ b/health/health.d/unbound.conf
@@ -1,27 +1,11 @@
 
-# make sure unbound is running
-
- template: unbound_last_collected_secs
-       on: unbound.queries
-    class: DNS
-component: Unbound
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure there is no overwritten/dropped queries in the request-list
 
  template: unbound_request_list_overwritten
        on: unbound.request_list_jostle_list
-    class: DNS
+    class: Errors
+     type: DNS
 component: Unbound
-     type: Errors
    lookup: average -60s unaligned absolute match-names of overwritten
     units: queries
     every: 10s
@@ -32,9 +16,9 @@ component: Unbound
 
  template: unbound_request_list_dropped
        on: unbound.request_list_jostle_list
-    class: DNS
+    class: Errors
+     type: DNS
 component: Unbound
-     type: Errors
    lookup: average -60s unaligned absolute match-names of dropped
     units: queries
     every: 10s
diff --git a/health/health.d/varnish.conf b/health/health.d/varnish.conf
deleted file mode 100644
index 7f3bd6c82..000000000
--- a/health/health.d/varnish.conf
+++ /dev/null
@@ -1,12 +0,0 @@
-    alarm: varnish_last_collected
-       on: varnish.uptime
-    class: Web Proxy
-component: Varnish
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/vcsa.conf b/health/health.d/vcsa.conf
index 8538e488c..a9cc7ceef 100644
--- a/health/health.d/vcsa.conf
+++ b/health/health.d/vcsa.conf
@@ -1,20 +1,4 @@
 
-# make sure vcsa is running and responding
-
- template: vcsa_last_collected_secs
-       on: vcsa.system_health
-    class: Virtual Machine
-component: VMware vCenter
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Overall system health:
 #  - 0: all components are healthy.
 #  - 1: one or more components might become overloaded soon.
@@ -24,9 +8,9 @@ component: VMware vCenter
 
  template: vcsa_system_health
        on: vcsa.system_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of system
     units: status
     every: 10s
@@ -46,9 +30,9 @@ component: VMware vCenter
 
  template: vcsa_swap_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of swap
     units: status
     every: 10s
@@ -61,9 +45,9 @@ component: VMware vCenter
 
  template: vcsa_storage_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of storage
     units: status
     every: 10s
@@ -76,9 +60,9 @@ component: VMware vCenter
 
  template: vcsa_mem_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of mem
     units: status
     every: 10s
@@ -91,9 +75,9 @@ component: VMware vCenter
 
  template: vcsa_load_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: VMware vCenter
-     type: Utilization
    lookup: max -10s unaligned of load
     units: status
     every: 10s
@@ -106,9 +90,9 @@ component: VMware vCenter
 
  template: vcsa_database_storage_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of database_storage
     units: status
     every: 10s
@@ -121,9 +105,9 @@ component: VMware vCenter
 
  template: vcsa_applmgmt_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of applmgmt
     units: status
     every: 10s
@@ -143,9 +127,9 @@ component: VMware vCenter
 
  template: vcsa_software_updates_health
        on: vcsa.software_updates_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of software_packages
     units: status
     every: 10s
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf
index 737147f38..cfbe2a524 100644
--- a/health/health.d/vernemq.conf
+++ b/health/health.d/vernemq.conf
@@ -1,27 +1,11 @@
 
-# Availability
-
- template: vernemq_last_collected_secs
-       on: vernemq.node_uptime
-    class: Messaging
-component: VerneMQ
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Socket errors
 
  template: vernemq_socket_errors
        on: vernemq.socket_errors
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: sum -1m unaligned absolute of socket_error
     units: errors
     every: 1m
@@ -34,9 +18,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_drop
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute of queue_message_drop
     units: dropped messages
     every: 1m
@@ -47,9 +31,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_expired
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Latency
+     type: Messaging
 component: VerneMQ
-     type: Latency
    lookup: average -1m unaligned absolute of queue_message_expired
     units: expired messages
     every: 1m
@@ -60,9 +44,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_unhandled
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Latency
+     type: Messaging
 component: VerneMQ
-     type: Latency
    lookup: average -1m unaligned absolute of queue_message_unhandled
     units: unhandled messages
     every: 1m
@@ -75,9 +59,9 @@ component: VerneMQ
 
  template: vernemq_average_scheduler_utilization
        on: vernemq.average_scheduler_utilization
-    class: Messaging
+    class: Utilization
+     type: Messaging
 component: VerneMQ
-     type: Utilization
    lookup: average -10m unaligned
     units: %
     every: 1m
@@ -91,9 +75,9 @@ component: VerneMQ
 
  template: vernemq_cluster_dropped
        on: vernemq.cluster_dropped
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: sum -1m unaligned
     units: KiB
     every: 1m
@@ -104,9 +88,9 @@ component: VerneMQ
 
  template: vernemq_netsplits
        on: vernemq.netsplits
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: sum -1m unaligned absolute of netsplit_detected
     units: netsplits
     every: 10s
@@ -119,9 +103,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_connack_sent_reason_unsuccessful
        on: vernemq.mqtt_connack_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -134,9 +118,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_disconnect_received_reason_not_normal
        on: vernemq.mqtt_disconnect_received_reason
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
     units: packets
     every: 1m
@@ -147,9 +131,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_disconnect_sent_reason_not_normal
        on: vernemq.mqtt_disconnect_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
     units: packets
     every: 1m
@@ -162,9 +146,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_subscribe_error
        on: vernemq.mqtt_subscribe_error
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -175,9 +159,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_subscribe_auth_error
        on: vernemq.mqtt_subscribe_auth_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: attempts
     every: 1m
@@ -190,9 +174,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_unsubscribe_error
        on: vernemq.mqtt_unsubscribe_error
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -205,9 +189,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_publish_errors
        on: vernemq.mqtt_publish_errors
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -218,9 +202,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_publish_auth_errors
        on: vernemq.mqtt_publish_auth_errors
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: attempts
     every: 1m
@@ -233,9 +217,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_received_reason_unsuccessful
        on: vernemq.mqtt_puback_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -246,9 +230,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_sent_reason_unsuccessful
        on: vernemq.mqtt_puback_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -259,9 +243,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_unexpected
        on: vernemq.mqtt_puback_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
@@ -274,9 +258,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_received_reason_unsuccessful
        on: vernemq.mqtt_pubrec_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -287,9 +271,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
        on: vernemq.mqtt_pubrec_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -300,9 +284,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_invalid_error
        on: vernemq.mqtt_pubrec_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
@@ -315,9 +299,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrel_received_reason_unsuccessful
        on: vernemq.mqtt_pubrel_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -328,9 +312,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
        on: vernemq.mqtt_pubrel_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -343,9 +327,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
        on: vernemq.mqtt_pubcomp_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -356,9 +340,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
        on: vernemq.mqtt_pubcomp_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -369,9 +353,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_unexpected
        on: vernemq.mqtt_pubcomp_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
diff --git a/health/health.d/vsphere.conf b/health/health.d/vsphere.conf
index aee7c5cd4..d8fc899b9 100644
--- a/health/health.d/vsphere.conf
+++ b/health/health.d/vsphere.conf
@@ -6,9 +6,9 @@
 
  template: vsphere_vm_mem_usage
        on: vsphere.vm_mem_usage_percentage
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: Memory
-     type: Utilization
     hosts: *
      calc: $used
     units: %
@@ -23,9 +23,9 @@ component: Memory
 
  template: vsphere_host_mem_usage
        on: vsphere.host_mem_usage_percentage
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: Memory
-     type: Utilization
     hosts: *
      calc: $used
     units: %
@@ -39,9 +39,9 @@ component: Memory
 
  template: vsphere_inbound_packets_errors
        on: vsphere.net_errors_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -51,9 +51,9 @@ component: Network
 
  template: vsphere_outbound_packets_errors
        on: vsphere.net_errors_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -65,9 +65,9 @@ component: Network
 
  template: vsphere_inbound_packets_errors_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -81,9 +81,9 @@ component: Network
 
  template: vsphere_outbound_packets_errors_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -100,9 +100,9 @@ component: Network
 
  template: vsphere_cpu_usage
        on: vsphere.cpu_usage_total
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: CPU
-     type: Utilization
     hosts: *
    lookup: average -10m unaligned match-names of used
     units: %
@@ -117,9 +117,9 @@ component: CPU
 
  template: vsphere_inbound_packets_dropped
        on: vsphere.net_drops_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -129,9 +129,9 @@ component: Network
 
  template: vsphere_outbound_packets_dropped
        on: vsphere.net_drops_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -143,9 +143,9 @@ component: Network
 
  template: vsphere_inbound_packets_dropped_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -159,9 +159,9 @@ component: Network
 
  template: vsphere_outbound_packets_dropped_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index 127c9a9c6..454e0abef 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -1,22 +1,4 @@
 
-# make sure we can collect web log data
-
- template: last_collected_secs
-       on: web_log.response_codes
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # -----------------------------------------------------------------------------
 # high level response code alarms
 
@@ -29,9 +11,9 @@ component: Web log
 
  template: 1m_requests
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -41,9 +23,9 @@ component: Web log
 
  template: 1m_successful
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of successful_requests
      calc: $this * 100 / $1m_requests
@@ -57,41 +39,39 @@ component: Web log
 
  template: 1m_redirects
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of redirects
      calc: $this * 100 / $1m_requests
     units: %
     every: 10s
      warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? (  1 ) : ( 20 )) ) : ( 0 )
-     crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of redirection HTTP requests over the last minute (3xx except 304)
        to: webmaster
 
  template: 1m_bad_requests
        on: web_log.response_statuses
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of bad_requests
      calc: $this * 100 / $1m_requests
     units: %
     every: 10s
      warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 10 ) : ( 30 )) ) : ( 0 )
-     crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of client error HTTP requests over the last minute (4xx except 401)
        to: webmaster
 
  template: 1m_internal_errors
        on: web_log.response_statuses
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of server_errors
      calc: $this * 100 / $1m_requests
@@ -114,9 +94,9 @@ component: Web log
 
  template: 1m_total_requests
        on: web_log.response_codes
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -126,9 +106,9 @@ component: Web log
 
  template: 1m_unmatched
        on: web_log.response_codes
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of unmatched
      calc: $this * 100 / $1m_total_requests
@@ -151,9 +131,9 @@ component: Web log
 
  template: 10m_response_time
        on: web_log.response_time
-    class: System
+    class: Latency
+     type: System
 component: Web log
-     type: Latency
  families: *
    lookup: average -10m unaligned of avg
     units: ms
@@ -162,9 +142,9 @@ component: Web log
 
  template: web_slow
        on: web_log.response_time
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: Web log
-     type: Latency
  families: *
    lookup: average -1m unaligned of avg
     units: ms
@@ -191,9 +171,9 @@ component: Web log
 
  template: 5m_successful_old
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m at -5m unaligned of successful_requests
     units: requests/s
@@ -202,9 +182,9 @@ component: Web log
 
  template: 5m_successful
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m unaligned of successful_requests
     units: requests/s
@@ -213,9 +193,9 @@ component: Web log
 
  template: 5m_requests_ratio
        on: web_log.response_codes
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
      calc: ($5m_successful_old > 0)?($5m_successful * 100 / $5m_successful_old):(100)
     units: %
@@ -233,23 +213,6 @@ component: Web log
 
 # ---------------------------------------------------GO-VERSION---------------------------------------------------------
 
-# make sure we can collect web log data
-
- template: web_log_last_collected_secs
-       on: web_log.requests
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
 # unmatched lines
 
 # the following alarms trigger only when there are enough data.
@@ -261,9 +224,9 @@ component: Web log
 
  template: web_log_1m_total_requests
        on: web_log.requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -273,9 +236,9 @@ component: Web log
 
  template: web_log_1m_unmatched
        on: web_log.excluded_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of unmatched
      calc: $this * 100 / $web_log_1m_total_requests
@@ -298,9 +261,9 @@ component: Web log
 
  template: web_log_1m_requests
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -310,9 +273,9 @@ component: Web log
 
  template: web_log_1m_successful
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of success
      calc: $this * 100 / $web_log_1m_requests
@@ -326,41 +289,39 @@ component: Web log
 
  template: web_log_1m_redirects
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of redirect
      calc: $this * 100 / $web_log_1m_requests
     units: %
     every: 10s
      warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING ) ? (  1 ) : ( 20 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of redirection HTTP requests over the last minute (3xx except 304)
        to: webmaster
 
  template: web_log_1m_bad_requests
        on: web_log.type_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of bad
      calc: $this * 100 / $web_log_1m_requests
     units: %
     every: 10s
      warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 10 ) : ( 30 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of client error HTTP requests over the last minute (4xx except 401)
        to: webmaster
 
  template: web_log_1m_internal_errors
        on: web_log.type_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of error
      calc: $this * 100 / $web_log_1m_requests
@@ -384,9 +345,9 @@ component: Web log
 
  template: web_log_10m_response_time
        on: web_log.request_processing_time
-    class: System
+    class: Latency
+     type: System
 component: Web log
-     type: Latency
  families: *
    lookup: average -10m unaligned of avg
     units: ms
@@ -395,9 +356,9 @@ component: Web log
 
  template: web_log_web_slow
        on: web_log.request_processing_time
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: Web log
-     type: Latency
  families: *
    lookup: average -1m unaligned of avg
     units: ms
@@ -424,9 +385,9 @@ component: Web log
 
  template: web_log_5m_successful_old
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m at -5m unaligned of success
     units: requests/s
@@ -435,9 +396,9 @@ component: Web log
 
  template: web_log_5m_successful
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m unaligned of success
     units: requests/s
@@ -446,9 +407,9 @@ component: Web log
 
  template: web_log_5m_requests_ratio
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
      calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100)
     units: %
diff --git a/health/health.d/whoisquery.conf b/health/health.d/whoisquery.conf
index c6d3a9de0..be5eb58f9 100644
--- a/health/health.d/whoisquery.conf
+++ b/health/health.d/whoisquery.conf
@@ -1,26 +1,9 @@
 
-# make sure whoisquery is running
-
- template: whoisquery_last_collected_secs
-       on: whoisquery.time_until_expiration
-    class: Other
-component: WHOIS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: whoisquery_days_until_expiration
        on: whoisquery.time_until_expiration
-    class: Other
+    class: Utilization
+     type: Other
 component: WHOIS
-     type: Utilization
      calc: $expiry
     units: seconds
     every: 60s
diff --git a/health/health.d/wmi.conf b/health/health.d/wmi.conf
index 6bd4e077f..90d39ce9d 100644
--- a/health/health.d/wmi.conf
+++ b/health/health.d/wmi.conf
@@ -1,29 +1,11 @@
 
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-## Availability
-
- template: wmi_last_collected_secs
-       on: cpu.collector_duration
-    class: Windows
-component: Availability
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 ## CPU
 
  template: wmi_10min_cpu_usage
        on: wmi.cpu_utilization_total
-    class: Windows
+    class: Utilization
+     type: Windows
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt
@@ -40,9 +22,9 @@ component: CPU
 
  template: wmi_ram_in_use
        on: wmi.memory_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $available)
@@ -56,9 +38,9 @@ component: Memory
 
  template: wmi_swap_in_use
        on: wmi.memory_swap_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $available)
@@ -75,9 +57,9 @@ component: Memory
 
  template: wmi_inbound_packets_discarded
        on: wmi.net_discarded
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -91,9 +73,9 @@ component: Network
 
  template: wmi_outbound_packets_discarded
        on: wmi.net_discarded
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -107,9 +89,9 @@ component: Network
 
  template: wmi_inbound_packets_errors
        on: wmi.net_errors
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -123,9 +105,9 @@ component: Network
 
  template: wmi_outbound_packets_errors
        on: wmi.net_errors
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -142,9 +124,9 @@ component: Network
 
  template: wmi_disk_in_use
        on: wmi.logical_disk_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Disk
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $free)
diff --git a/health/health.d/x509check.conf b/health/health.d/x509check.conf
index 93c406b7a..fc69d0288 100644
--- a/health/health.d/x509check.conf
+++ b/health/health.d/x509check.conf
@@ -1,26 +1,9 @@
 
-# make sure x509check is running
-
- template: x509check_last_collected_secs
-       on: x509check.time_until_expiration
-    class: Certificates
-component: x509 certificates
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: x509check_days_until_expiration
        on: x509check.time_until_expiration
-    class: Certificates
+    class: Latency
+     type: Certificates
 component: x509 certificates
-     type: Latency
      calc: $expiry
     units: seconds
     every: 60s
@@ -31,9 +14,9 @@ component: x509 certificates
       
  template: x509check_revocation_status
        on: x509check.revocation_status
-    class: Certificates
+    class: Errors
+     type: Certificates
 component: x509 certificates
-     type: Errors
      calc: $revoked
     every: 60s
      crit: $this != nan AND $this != 0
diff --git a/health/health.d/zfs.conf b/health/health.d/zfs.conf
index d6f5fa2fe..785838d47 100644
--- a/health/health.d/zfs.conf
+++ b/health/health.d/zfs.conf
@@ -1,9 +1,9 @@
 
     alarm: zfs_memory_throttle
        on: zfs.memory_ops
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
    lookup: sum -10m unaligned absolute of throttled
     units: events
     every: 1m
@@ -16,9 +16,9 @@ component: File system
 
  template: zfs_pool_state_warn
        on: zfspool.state
-    class: System
+    class: Errors
+     type: System
 component: File system
-     type: Errors
      calc: $degraded
     units: boolean
     every: 10s
@@ -29,9 +29,9 @@ component: File system
 
  template: zfs_pool_state_crit
        on: zfspool.state
-    class: System
+    class: Errors
+     type: System
 component: File system
-     type: Errors
      calc: $faulted + $unavail
     units: boolean
     every: 10s
diff --git a/health/health.d/zookeeper.conf b/health/health.d/zookeeper.conf
deleted file mode 100644
index 8c7d5a73d..000000000
--- a/health/health.d/zookeeper.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure zookeeper is running
-
- template: zookeeper_last_collected_secs
-       on: zookeeper.requests
-    class: KV Storage
-component: ZooKeeper
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-12-01 06:15:11 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-12-01 06:15:11 +0000
commit	483926a283e118590da3f9ecfa75a8a4d62143ce (patch)
tree	cb77052778df9a128a8cd3ff5bf7645322a13bc5 /health/health.d
parent	Releasing debian version 1.31.0-4. (diff)
download	netdata-483926a283e118590da3f9ecfa75a8a4d62143ce.tar.xz netdata-483926a283e118590da3f9ecfa75a8a4d62143ce.zip