From e970e0b37b8bd7f246feb3f70c4136418225e434 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 1 Dec 2021 07:15:04 +0100
Subject: Adding upstream version 1.32.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 health/Makefile.am                            |  19 +-
 health/REFERENCE.md                           |  60 +-
 health/health.c                               |  73 ++-
 health/health.d/adaptec_raid.conf             |   8 +-
 health/health.d/am2320.conf                   |  15 -
 health/health.d/anomalies.conf                |   8 +-
 health/health.d/apache.conf                   |  17 -
 health/health.d/apcupsd.conf                  |  12 +-
 health/health.d/backend.conf                  |  12 +-
 health/health.d/bcache.conf                   |   8 +-
 health/health.d/beanstalkd.conf               |   4 +-
 health/health.d/bind_rndc.conf                |   4 +-
 health/health.d/boinc.conf                    |  16 +-
 health/health.d/btrfs.conf                    |  16 +-
 health/health.d/ceph.conf                     |   4 +-
 health/health.d/cgroups.conf                  |   8 +-
 health/health.d/cockroachdb.conf              |  72 +--
 health/health.d/couchdb.conf                  |  16 -
 health/health.d/cpu.conf                      |  16 +-
 health/health.d/dbengine.conf                 |  16 +-
 health/health.d/disks.conf                    |  26 +-
 health/health.d/dns_query.conf                |   4 +-
 health/health.d/dnsmasq_dhcp.conf             |   4 +-
 health/health.d/dockerd.conf                  |   4 +-
 health/health.d/elasticsearch.conf            |  15 -
 health/health.d/entropy.conf                  |   4 +-
 health/health.d/exporting.conf                |  29 +-
 health/health.d/fping.conf                    |  16 +-
 health/health.d/fronius.conf                  |   4 +-
 health/health.d/gearman.conf                  |  20 +-
 health/health.d/geth.conf                     |  12 +
 health/health.d/go.d.plugin.conf              |  17 +
 health/health.d/haproxy.conf                  |  21 +-
 health/health.d/hdfs.conf                     |  37 +-
 health/health.d/httpcheck.conf                |  46 +-
 health/health.d/ioping.conf                   |   4 +-
 health/health.d/ipc.conf                      |   8 +-
 health/health.d/ipfs.conf                     |   4 +-
 health/health.d/ipmi.conf                     |   8 +-
 health/health.d/kubelet.conf                  |  36 +-
 health/health.d/lighttpd.conf                 |  17 -
 health/health.d/linux_power_supply.conf       |   4 +-
 health/health.d/load.conf                     |  16 +-
 health/health.d/mdstat.conf                   |  16 +-
 health/health.d/megacli.conf                  |  20 +-
 health/health.d/memcached.conf                |  29 +-
 health/health.d/memory.conf                   |  12 +-
 health/health.d/mongodb.conf                  |  16 -
 health/health.d/mysql.conf                    |  62 +-
 health/health.d/named.conf                    |  17 -
 health/health.d/net.conf                      |  60 +-
 health/health.d/netfilter.conf                |   4 +-
 health/health.d/nginx.conf                    |  17 -
 health/health.d/nginx_plus.conf               |  17 -
 health/health.d/phpfpm.conf                   |  17 -
 health/health.d/pihole.conf                   |  49 +-
 health/health.d/portcheck.conf                |  26 +-
 health/health.d/postgres.conf                 |  16 -
 health/health.d/processes.conf                |   4 +-
 health/health.d/pulsar.conf                   |  16 -
 health/health.d/python.d.plugin.conf          |  17 +
 health/health.d/ram.conf                      |  48 +-
 health/health.d/redis.conf                    |  24 +-
 health/health.d/retroshare.conf               |  19 +-
 health/health.d/riakkv.conf                   |  38 +-
 health/health.d/scaleio.conf                  |  24 +-
 health/health.d/softnet.conf                  |  12 +-
 health/health.d/squid.conf                    |  17 -
 health/health.d/stiebeleltron.conf            |   4 +-
 health/health.d/swap.conf                     |  10 +-
 health/health.d/systemdunits.conf             |  40 +-
 health/health.d/tcp_conn.conf                 |   4 +-
 health/health.d/tcp_listen.conf               |  16 +-
 health/health.d/tcp_mem.conf                  |   4 +-
 health/health.d/tcp_orphans.conf              |   4 +-
 health/health.d/tcp_resets.conf               |  16 +-
 health/health.d/timex.conf                    |  17 +
 health/health.d/udp_errors.conf               |   8 +-
 health/health.d/unbound.conf                  |  24 +-
 health/health.d/varnish.conf                  |  12 -
 health/health.d/vcsa.conf                     |  48 +-
 health/health.d/vernemq.conf                  | 120 ++--
 health/health.d/vsphere.conf                  |  44 +-
 health/health.d/web_log.conf                  | 135 ++--
 health/health.d/whoisquery.conf               |  21 +-
 health/health.d/wmi.conf                      |  50 +-
 health/health.d/x509check.conf                |  25 +-
 health/health.d/zfs.conf                      |  12 +-
 health/health.d/zookeeper.conf                |  17 -
 health/health.h                               |   6 +-
 health/health_config.c                        | 153 ++++-
 health/health_json.c                          |  18 +
 health/health_log.c                           |  64 +-
 health/notifications/alarm-notify.sh.in       | 888 +++++++++++++++++++++++---
 health/notifications/custom/README.md         |   6 +-
 health/notifications/email/README.md          |  18 +-
 health/notifications/health_alarm_notify.conf |   4 +-
 health/notifications/syslog/README.md         |   2 +-
 98 files changed, 1752 insertions(+), 1395 deletions(-)
 delete mode 100644 health/health.d/am2320.conf
 delete mode 100644 health/health.d/apache.conf
 delete mode 100644 health/health.d/couchdb.conf
 delete mode 100644 health/health.d/elasticsearch.conf
 create mode 100644 health/health.d/geth.conf
 create mode 100644 health/health.d/go.d.plugin.conf
 delete mode 100644 health/health.d/lighttpd.conf
 delete mode 100644 health/health.d/mongodb.conf
 delete mode 100644 health/health.d/named.conf
 delete mode 100644 health/health.d/nginx.conf
 delete mode 100644 health/health.d/nginx_plus.conf
 delete mode 100644 health/health.d/phpfpm.conf
 delete mode 100644 health/health.d/postgres.conf
 delete mode 100644 health/health.d/pulsar.conf
 create mode 100644 health/health.d/python.d.plugin.conf
 delete mode 100644 health/health.d/squid.conf
 create mode 100644 health/health.d/timex.conf
 delete mode 100644 health/health.d/varnish.conf
 delete mode 100644 health/health.d/zookeeper.conf

(limited to 'health')

diff --git a/health/Makefile.am b/health/Makefile.am
index b963ea0cd..349b86d61 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -25,9 +25,7 @@ install-exec-local:
 healthconfigdir=$(libconfigdir)/health.d
 dist_healthconfig_DATA = \
     health.d/adaptec_raid.conf \
-    health.d/am2320.conf \
     health.d/anomalies.conf \
-    health.d/apache.conf \
     health.d/apcupsd.conf \
     health.d/backend.conf \
     health.d/bcache.conf \
@@ -39,18 +37,18 @@ dist_healthconfig_DATA = \
     health.d/cgroups.conf \
     health.d/cpu.conf \
     health.d/cockroachdb.conf \
-    health.d/couchdb.conf \
     health.d/disks.conf \
     health.d/dnsmasq_dhcp.conf \
     health.d/dns_query.conf \
     health.d/dockerd.conf \
-    health.d/elasticsearch.conf \
     health.d/entropy.conf \
     health.d/exporting.conf \
     health.d/fping.conf \
+    health.d/geth.conf \
     health.d/ioping.conf \
     health.d/fronius.conf \
     health.d/gearman.conf \
+    health.d/go.d.plugin.conf \
     health.d/haproxy.conf \
     health.d/hdfs.conf \
     health.d/httpcheck.conf \
@@ -59,26 +57,19 @@ dist_healthconfig_DATA = \
     health.d/ipmi.conf \
     health.d/isc_dhcpd.conf \
     health.d/kubelet.conf \
-    health.d/lighttpd.conf \
     health.d/linux_power_supply.conf \
     health.d/load.conf \
     health.d/mdstat.conf \
     health.d/megacli.conf \
     health.d/memcached.conf \
     health.d/memory.conf \
-    health.d/mongodb.conf \
     health.d/mysql.conf \
-    health.d/named.conf \
     health.d/net.conf \
     health.d/netfilter.conf \
-    health.d/nginx.conf \
-    health.d/nginx_plus.conf \
     health.d/pihole.conf \
-    health.d/phpfpm.conf \
     health.d/portcheck.conf \
-    health.d/postgres.conf \
     health.d/processes.conf \
-    health.d/pulsar.conf \
+    health.d/python.d.plugin.conf \
     health.d/qos.conf \
     health.d/ram.conf \
     health.d/redis.conf \
@@ -86,11 +77,11 @@ dist_healthconfig_DATA = \
     health.d/riakkv.conf \
     health.d/scaleio.conf \
     health.d/softnet.conf \
-    health.d/squid.conf \
     health.d/stiebeleltron.conf \
     health.d/synchronization.conf \
     health.d/swap.conf \
     health.d/systemdunits.conf \
+    health.d/timex.conf \
     health.d/tcp_conn.conf \
     health.d/tcp_listen.conf \
     health.d/tcp_mem.conf \
@@ -98,7 +89,6 @@ dist_healthconfig_DATA = \
     health.d/tcp_resets.conf \
     health.d/udp_errors.conf \
     health.d/unbound.conf \
-    health.d/varnish.conf \
     health.d/vcsa.conf \
     health.d/vernemq.conf \
     health.d/vsphere.conf \
@@ -107,6 +97,5 @@ dist_healthconfig_DATA = \
     health.d/wmi.conf \
     health.d/x509check.conf \
     health.d/zfs.conf \
-    health.d/zookeeper.conf \
     health.d/dbengine.conf \
     $(NULL)
diff --git a/health/REFERENCE.md b/health/REFERENCE.md
index 5ea6b7c5d..f1bb5557d 100644
--- a/health/REFERENCE.md
+++ b/health/REFERENCE.md
@@ -54,14 +54,17 @@ Netdata parses the following lines. Beneath the table is an in-depth explanation
 -   A few lines use space-separated lists to define how the entity behaves. You can use `*` as a wildcard or prefix with
     `!` for a negative match. Order is important, too! See our [simple patterns docs](../libnetdata/simple_pattern/) for
     more examples.
+-   Lines terminated by a `\` are spliced together with the next line. The backslash is removed and the following line is
+    joined with the current one. No space is inserted, so you may split a line anywhere, even in the middle of a word.
+    This comes in handy if your `info` line consists of several sentences.  
 
 | line                                                | required        | functionality                                                                         |
 | --------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
 | [`alarm`/`template`](#alarm-line-alarm-or-template) | yes             | Name of the alarm/template.                                                           |
 | [`on`](#alarm-line-on)                              | yes             | The chart this alarm should attach to.                                                |
-| [`class`](#alarm-line-class)                        | no              | The general classification of the alarm.                                              |
-| [`component`](#alarm-line-component)                | no              | Specify the component of the class of the alarm.                                      |
-| [`type`](#alarm-line-type)                          | no              | The type of error the alarm monitors.                                                 |
+| [`class`](#alarm-line-class)                        | no              | The general alarm classification.                                                     |
+| [`type`](#alarm-line-type)                          | no              | What area of the system the alarm monitors.                                           |
+| [`component`](#alarm-line-component)                | no              | Specific component of the type of the alarm.                                          |
 | [`os`](#alarm-line-os)                              | no              | Which operating systems to run this chart.                                            |
 | [`hosts`](#alarm-line-hosts)                        | no              | Which hostnames will run this alarm.                                                  |
 | [`plugin`](#alarm-line-plugin)                      | no              | Restrict an alarm or template to only a certain plugin.                                             |
@@ -136,24 +139,45 @@ If you create a template using the `disk.io` context, it will apply an alarm to
 
 #### Alarm line `class`
 
-Specify the classification of the alarm or template. 
+This indicates the type of error (or general problem area) that the alarm or template applies to. For example, `Latency` can be used for alarms that trigger on latency issues on network interfaces, web servers, or database systems. Example:
 
-Class can be used to indicate the broader area of the system that the alarm applies to. For example, under the general `Database` class, you can group together alarms that operate on various database systems, like `MySQL`, `CockroachDB`, `CouchDB` etc. Example:
+```yaml
+class: Latency
+```
+
+<details>
+<summary>Netdata's stock alarms use the following `class` attributes by default:</summary>
+
+| Class           |
+| ----------------|
+| Errors          |
+| Latency         |
+| Utilization     |
+| Workload        |
+
+
+</details>
+
+`class` will default to `Unknown` if the line is missing from the alarm configuration.
+
+#### Alarm line `type`
+
+Type can be used to indicate the broader area of the system that the alarm applies to. For example, under the general `Database` type, you can group together alarms that operate on various database systems, like `MySQL`, `CockroachDB`, `CouchDB` etc. Example:
 
 ```yaml
-class: Database
+type: Database
 ```
 <details>
-<summary>Netdata's stock alarms use the following `class` attributes by default, but feel free to adjust for your own requirements.</summary>
+<summary>Netdata's stock alarms use the following `type` attributes by default, but feel free to adjust for your own requirements.</summary>
 
-| Class                    | Description                                                                                      |
+| Type                     | Description                                                                                      |
 | ------------------------ | ------------------------------------------------------------------------------------------------ |
 | Ad Filtering             | Services related to Ad Filtering (like pi-hole)                                                  |
 | Certificates             | Certificates monitoring related                                                                  |
 | Cgroups                  | Alerts for cpu and memory usage of control groups                                                |
 | Computing                | Alerts for shared computing applications (e.g. boinc)                                            |
 | Containers               | Container related alerts (e.g. docker instances)                                                 |
-| Database                 | Database systems (e.g. MySQL, Postgress, etc)                                                    |
+| Database                 | Database systems (e.g. MySQL, PostgreSQL, etc)                                                    |
 | Data Sharing             | Used to group together alerts for data sharing applications                                      |
 | DHCP                     | Alerts for dhcp related services                                                                 |
 | DNS                      | Alerts for dns related services                                                                  |
@@ -162,7 +186,7 @@ class: Database
 | Linux                    | Services specific to Linux (e.g. systemd)                                                        |
 | Messaging                | Alerts for message passing services (e.g. vernemq)                                               |
 | Netdata                  | Internal Netdata components monitoring                                                           |
-| Other                    | Use as a general class of alerts                                                                 |
+| Other                    | When an alert doesn't fit in other types.                                                        |
 | Power Supply             | Alerts from power supply related services (e.g. apcupsd)                                         |
 | Search engine            | Alerts for search services (e.g. elasticsearch)                                                  |
 | Storage                  | Class for alerts dealing with storage services (storage devices typically live under `System`)   |
@@ -174,26 +198,16 @@ class: Database
 
 </details>
 
-If an alarm configuration is missing the `class` line, its value will default to `Unknown`.
+If an alarm configuration is missing the `type` line, its value will default to `Unknown`.
 
 #### Alarm line `component`
 
-Component can be used to narrow down what the previous `class` value specifies for each alarm or template. Continuing from the previous example, `component` might include `MySQL`, `CockroachDB`, `MongoDB`, all under the same `Database` classification. Example:
+Component can be used to narrow down what the previous `type` value specifies for each alarm or template. Continuing from the previous example, `component` might include `MySQL`, `CockroachDB`, `MongoDB`, all under the same `Database` type. Example:
 
 ```yaml
 component: MySQL
 ```
-As with the `class` line, if `component` is missing from the configuration, its value will default to `Unknown`.
-
-#### Alarm line `type`
-
-This indicates the type of error (or general problem area) that the alarm or template applies to. For example, `Latency` can be used for alarms that trigger on latency issues in network interfaces, web servers, or database systems. Example:
-
-```yaml
-type: Latency
-```
-
-`type` will also (as with `class` and `component`) default to `Unknown` if the line is missing from the alarm configuration.
+As with the `class` and `type` line, if `component` is missing from the configuration, its value will default to `Unknown`.
 
 #### Alarm line `os`
 
diff --git a/health/health.c b/health/health.c
index 85d2a2458..d8e1d4b77 100644
--- a/health/health.c
+++ b/health/health.c
@@ -230,6 +230,9 @@ void health_reload(void) {
     if (netdata_cloud_setting) {
         aclk_single_update_enable();
         aclk_alarm_reload();
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+        aclk_alert_reloaded = 1;
+#endif
     }
 #endif
 }
@@ -308,26 +311,44 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
     int n_warn=0, n_crit=0;
     RRDCALC *rc;
     EVAL_EXPRESSION *expr=NULL;
+    BUFFER *warn_alarms, *crit_alarms;
+
+    warn_alarms = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
+    crit_alarms = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
 
     for(rc = host->alarms; rc ; rc = rc->next) {
         if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
             continue;
 
-        if(unlikely(rc->status == RRDCALC_STATUS_WARNING)) {
-            n_warn++;
-            if (ae->alarm_id == rc->id)
-                expr=rc->warning;
+        if (unlikely(rc->status == RRDCALC_STATUS_WARNING)) {
+            if (likely(ae->alarm_id != rc->id) || likely(ae->alarm_event_id != rc->next_event_id - 1)) {
+                if (n_warn)
+                    buffer_strcat(warn_alarms, ",");
+                buffer_strcat(warn_alarms, rc->name);
+                buffer_strcat(warn_alarms, "=");
+                buffer_snprintf(warn_alarms, 11, "%ld", rc->last_status_change);
+                n_warn++;
+            } else if (ae->alarm_id == rc->id)
+                expr = rc->warning;
         } else if (unlikely(rc->status == RRDCALC_STATUS_CRITICAL)) {
-            n_crit++;
-            if (ae->alarm_id == rc->id)
-                expr=rc->critical;
+            if (likely(ae->alarm_id != rc->id) || likely(ae->alarm_event_id != rc->next_event_id - 1)) {
+                if (n_crit)
+                    buffer_strcat(crit_alarms, ",");
+                buffer_strcat(crit_alarms, rc->name);
+                buffer_strcat(crit_alarms, "=");
+                buffer_snprintf(crit_alarms, 11, "%ld", rc->last_status_change);
+                n_crit++;
+            } else if (ae->alarm_id == rc->id)
+                expr = rc->critical;
         } else if (unlikely(rc->status == RRDCALC_STATUS_CLEAR)) {
             if (ae->alarm_id == rc->id)
-                expr=rc->warning;
+                expr = rc->warning;
         }
     }
 
-    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" CALCULATED_NUMBER_FORMAT_ZERO "' '" CALCULATED_NUMBER_FORMAT_ZERO "' '%s' '%u' '%u' '%s' '%s' '%s' '%s' '%s' '%s' '%d' '%d'",
+    char *edit_command = ae->source ? health_edit_command_from_source(ae->source) : strdupz("UNKNOWN=0");
+
+    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" CALCULATED_NUMBER_FORMAT_ZERO "' '" CALCULATED_NUMBER_FORMAT_ZERO "' '%s' '%u' '%u' '%s' '%s' '%s' '%s' '%s' '%s' '%d' '%d' '%s' '%s' '%s' '%s' '%s'",
               exec,
               recipient,
               host->registry_hostname,
@@ -352,7 +373,12 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
               (expr && expr->source)?expr->source:"NOSOURCE",
               (expr && expr->error_msg)?buffer_tostring(expr->error_msg):"NOERRMSG",
               n_warn,
-              n_crit
+              n_crit,
+              buffer_tostring(warn_alarms),
+              buffer_tostring(crit_alarms),
+              ae->classification?ae->classification:"Unknown",
+              edit_command,
+              localhost->registry_hostname
     );
 
     ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
@@ -363,6 +389,10 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
     ae->exec_spawn_serial = spawn_enq_cmd(command_to_run);
     enqueue_alarm_notify_in_progress(ae);
 
+    freez(edit_command);
+    buffer_free(warn_alarms);
+    buffer_free(crit_alarms);
+
     return; //health_alarm_wait_for_execution
 done:
     health_alarm_log_save(host, ae);
@@ -635,6 +665,8 @@ void *health_main(void *ptr) {
     int min_run_every = (int)config_get_number(CONFIG_SECTION_HEALTH, "run at least every seconds", 10);
     if(min_run_every < 1) min_run_every = 1;
 
+    int cleanup_sql_every_loop = 7200 / min_run_every;
+
     time_t now                = now_realtime_sec();
     time_t hibernation_delay  = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60);
 
@@ -689,6 +721,9 @@ void *health_main(void *ptr) {
                 host->health_delay_up_to = 0;
             }
 
+            if(likely(!host->health_log_fp) && (loop == 1 || loop % cleanup_sql_every_loop == 0))
+                sql_health_alarm_log_cleanup(host);
+
             rrdhost_rdlock(host);
 
             // the first loop is to lookup values from the db
@@ -929,7 +964,7 @@ void *health_main(void *ptr) {
 
                         if(likely(!rrdcalc_isrepeating(rc))) {
                             ALARM_ENTRY *ae = health_create_alarm_entry(
-                                    host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id,
+                                    host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
                                     rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
                                     rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
                                     rc->delay_last,
@@ -979,7 +1014,7 @@ void *health_main(void *ptr) {
                     if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
                         rc->last_repeat = now;
                         ALARM_ENTRY *ae = health_create_alarm_entry(
-                                host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id,
+                                host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
                                 rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
                                 rc->old_value, rc->value, rc->old_status, rc->status, rc->source, rc->units, rc->info,
                                 rc->delay_last,
@@ -1003,6 +1038,14 @@ void *health_main(void *ptr) {
                 rrdhost_unlock(host);
             }
 
+#ifdef ENABLE_ACLK
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+            if (netdata_cloud_setting && unlikely(aclk_alert_reloaded) && loop > 2) {
+                sql_queue_removed_alerts_to_aclk(host);
+            }
+#endif
+#endif
+
             if (unlikely(netdata_exit))
                 break;
 
@@ -1027,8 +1070,12 @@ void *health_main(void *ptr) {
             health_alarm_wait_for_execution(ae);
         }
 
-        rrd_unlock();
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+        if (netdata_cloud_setting && unlikely(aclk_alert_reloaded))
+            aclk_alert_reloaded = 0;
+#endif
 
+        rrd_unlock();
 
         if(unlikely(netdata_exit))
             break;
diff --git a/health/health.d/adaptec_raid.conf b/health/health.d/adaptec_raid.conf
index b067e1840..1d823addd 100644
--- a/health/health.d/adaptec_raid.conf
+++ b/health/health.d/adaptec_raid.conf
@@ -3,9 +3,9 @@
 
  template: adaptec_raid_ld_status
        on: adaptec_raid.ld_status
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: bool
     every: 10s
@@ -18,9 +18,9 @@ component: RAID
 
  template: adaptec_raid_pd_state
        on: adaptec_raid.pd_state
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: bool
     every: 10s
diff --git a/health/health.d/am2320.conf b/health/health.d/am2320.conf
deleted file mode 100644
index 4bac98fbb..000000000
--- a/health/health.d/am2320.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# make sure am2320 is sending stats
-
- template: am2320_last_collected_secs
-       on: am2320.temperature
-    class: Other
-component: Sensors
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
diff --git a/health/health.d/anomalies.conf b/health/health.d/anomalies.conf
index f27e39fc1..269ae544b 100644
--- a/health/health.d/anomalies.conf
+++ b/health/health.d/anomalies.conf
@@ -2,9 +2,9 @@
 
  template: anomalies_anomaly_probabilities
        on: anomalies.probability
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: ML
-     type: Errors
    lookup: average -2m foreach *
     every: 1m
      warn: $this > 50
@@ -14,9 +14,9 @@ component: ML
 
  template: anomalies_anomaly_flags
        on: anomalies.anomaly
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: ML
-     type: Errors
    lookup: sum -2m foreach *
     every: 1m
      warn: $this > 10
diff --git a/health/health.d/apache.conf b/health/health.d/apache.conf
deleted file mode 100644
index c623fb880..000000000
--- a/health/health.d/apache.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure apache is running
-
- template: apache_last_collected_secs
-       on: apache.requests
-    class: Web Server
-component: Apache
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf
index 07b5c28c9..65f1a69ab 100644
--- a/health/health.d/apcupsd.conf
+++ b/health/health.d/apcupsd.conf
@@ -2,9 +2,9 @@
 
  template: apcupsd_10min_ups_load
        on: apcupsd.load
-    class: Power Supply
+    class: Utilization
+     type: Power Supply
 component: UPS
-     type: Utilization
        os: *
     hosts: *
    lookup: average -10m unaligned of percentage
@@ -20,9 +20,9 @@ component: UPS
 # Fire the alarm as soon as it's going on battery (99% charge) and clear only when full.
  template: apcupsd_ups_charge
        on: apcupsd.charge
-    class: Power Supply
+    class: Errors
+     type: Power Supply
 component: UPS
-     type: Errors
        os: *
     hosts: *
    lookup: average -60s unaligned of charge
@@ -36,9 +36,9 @@ component: UPS
 
  template: apcupsd_last_collected_secs
        on: apcupsd.load
-    class: Power Supply
+    class: Latency
+     type: Power Supply
 component: UPS device
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf
index 948ea551a..91d469395 100644
--- a/health/health.d/backend.conf
+++ b/health/health.d/backend.conf
@@ -1,9 +1,9 @@
 # Alert that backends subsystem will be disabled soon
     alarm: backend_metrics_eol
        on: netdata.backend_metrics
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: Exporting engine
-     type: Errors
     units: boolean
      calc: $now - $last_collected_t
     every: 1m
@@ -16,9 +16,9 @@ component: Exporting engine
 
     alarm: backend_last_buffering
        on: netdata.backend_metrics
-    class: Netdata
+    class: Latency
+     type: Netdata
 component: Exporting engine
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -30,9 +30,9 @@ component: Exporting engine
 
     alarm: backend_metrics_sent
        on: netdata.backend_metrics
-    class: Netdata
+    class: Workload
+     type: Netdata
 component: Exporting engine
-     type: Workload
     units: %
      calc: abs($sent) * 100 / abs($buffered)
     every: 10s
diff --git a/health/health.d/bcache.conf b/health/health.d/bcache.conf
index d75d8e19b..49cb5ad0f 100644
--- a/health/health.d/bcache.conf
+++ b/health/health.d/bcache.conf
@@ -1,9 +1,9 @@
 
  template: bcache_cache_errors
        on: disk.bcache_cache_read_races
-    class: System
+    class: Errors
+     type: System
 component: Disk
-     type: Errors
    lookup: sum -1m unaligned absolute
     units: errors
     every: 1m
@@ -16,9 +16,9 @@ component: Disk
 
  template: bcache_cache_dirty
        on: disk.bcache_cache_alloc
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
      calc: $dirty + $metadata + $undefined
     units: %
     every: 1m
diff --git a/health/health.d/beanstalkd.conf b/health/health.d/beanstalkd.conf
index 99c754571..13ac8c182 100644
--- a/health/health.d/beanstalkd.conf
+++ b/health/health.d/beanstalkd.conf
@@ -2,9 +2,9 @@
 
  template: beanstalk_server_buried_jobs
        on: beanstalk.current_jobs
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: Beanstalk
-     type: Workload
      calc: $buried
     units: jobs
     every: 10s
diff --git a/health/health.d/bind_rndc.conf b/health/health.d/bind_rndc.conf
index e88f87a4f..7c09225ff 100644
--- a/health/health.d/bind_rndc.conf
+++ b/health/health.d/bind_rndc.conf
@@ -1,8 +1,8 @@
  template: bind_rndc_stats_file_size
        on: bind_rndc.stats_size
-    class: DNS
+    class: Utilization
+     type: DNS
 component: BIND
-     type: Utilization
     units: megabytes
     every: 60
      calc: $stats_size
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf
index 8604abee9..7d7a4fdae 100644
--- a/health/health.d/boinc.conf
+++ b/health/health.d/boinc.conf
@@ -3,9 +3,9 @@
 # Warn on any compute errors encountered.
  template: boinc_compute_errors
        on: boinc.states
-    class: Computing
+    class: Errors
+     type: Computing
 component: BOINC
-     type: Errors
        os: *
     hosts: *
  families: *
@@ -21,9 +21,9 @@ component: BOINC
 # Warn on lots of upload errors
  template: boinc_upload_errors
        on: boinc.states
-    class: Computing
+    class: Errors
+     type: Computing
 component: BOINC
-     type: Errors
        os: *
     hosts: *
  families: *
@@ -39,9 +39,9 @@ component: BOINC
 # Warn on the task queue being empty
  template: boinc_total_tasks
        on: boinc.tasks
-    class: Computing
+    class: Utilization
+     type: Computing
 component: BOINC
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -57,9 +57,9 @@ component: BOINC
 # Warn on no active tasks with a non-empty queue
  template: boinc_active_tasks
        on: boinc.tasks
-    class: Computing
+    class: Utilization
+     type: Computing
 component: BOINC
-     type: Utilization
        os: *
     hosts: *
  families: *
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
index d3200a7ee..8d197aa8d 100644
--- a/health/health.d/btrfs.conf
+++ b/health/health.d/btrfs.conf
@@ -1,9 +1,9 @@
 
  template: btrfs_allocated
        on: btrfs.disk
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -18,9 +18,9 @@ component: File system
 
  template: btrfs_data
        on: btrfs.data
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -35,9 +35,9 @@ component: File system
 
  template: btrfs_metadata
        on: btrfs.metadata
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
@@ -52,9 +52,9 @@ component: File system
 
  template: btrfs_system
        on: btrfs.system
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
        os: *
     hosts: *
  families: *
diff --git a/health/health.d/ceph.conf b/health/health.d/ceph.conf
index ed8f9b4b9..1f9da25c7 100644
--- a/health/health.d/ceph.conf
+++ b/health/health.d/ceph.conf
@@ -2,9 +2,9 @@
 
  template: ceph_cluster_space_usage
        on: ceph.general_usage
-    class: Storage
+    class: Utilization
+     type: Storage
 component: Ceph
-     type: Utilization
      calc: $used * 100 / ($used + $avail)
     units: %
     every: 1m
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
index 068533f10..45b34806c 100644
--- a/health/health.d/cgroups.conf
+++ b/health/health.d/cgroups.conf
@@ -3,9 +3,9 @@
 
  template: cgroup_10min_cpu_usage
        on: cgroup.cpu_limit
-    class: Cgroups
+    class: Utilization
+     type: Cgroups
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned
@@ -19,9 +19,9 @@ component: CPU
 
  template: cgroup_ram_in_use
        on: cgroup.mem_usage
-    class: Cgroups
+    class: Utilization
+     type: Cgroups
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($ram) * 100 / $memory_limit
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
index dccd2b064..1f227841e 100644
--- a/health/health.d/cockroachdb.conf
+++ b/health/health.d/cockroachdb.conf
@@ -1,27 +1,11 @@
 
-# Availability
-
- template: cockroachdb_last_collected_secs
-       on: cockroachdb.live_nodes
-    class: Database
-component: CockroachDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
 # Capacity
 
  template: cockroachdb_used_storage_capacity
        on: cockroachdb.storage_used_capacity_percentage
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $capacity_used_percent
     units: %
     every: 10s
@@ -33,9 +17,9 @@ component: CockroachDB
 
  template: cockroachdb_used_usable_storage_capacity
        on: cockroachdb.storage_used_capacity_percentage
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $capacity_usable_used_percent
     units: %
     every: 10s
@@ -49,37 +33,37 @@ component: CockroachDB
 
  template: cockroachdb_unavailable_ranges
        on: cockroachdb.ranges_replication_problem
-    class: Database
+    class: Errors
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $ranges_unavailable
     units: num
     every: 10s
      warn: $this > 0
     delay: down 15m multiplier 1.5 max 1h
-     info: number of ranges with fewer live replicas than the replication target
+     info: number of ranges with fewer live replicas than needed for quorum
        to: dba
 
- template: cockroachdb_replicas_leaders_not_leaseholders
-       on: cockroachdb.replicas_leaders
-    class: Database
+ template: cockroachdb_underreplicated_ranges
+       on: cockroachdb.ranges_replication_problem
+    class: Errors
+     type: Database
 component: CockroachDB
-     type: Utilization
-     calc: $replicas_leaders_not_leaseholders
+     calc: $ranges_underreplicated
     units: num
     every: 10s
      warn: $this > 0
     delay: down 15m multiplier 1.5 max 1h
-     info: number of replicas that are Raft leaders whose range lease is held by another store
+     info: number of ranges with fewer live replicas than the replication target
        to: dba
 
 # FD
 
  template: cockroachdb_open_file_descriptors_limit
        on: cockroachdb.process_file_descriptors
-    class: Database
+    class: Utilization
+     type: Database
 component: CockroachDB
-     type: Utilization
      calc: $sys_fd_open/$sys_fd_softlimit * 100
     units: %
     every: 10s
@@ -87,29 +71,3 @@ component: CockroachDB
     delay: down 15m multiplier 1.5 max 1h
      info: open file descriptors utilization (against softlimit)
        to: dba
-
-# SQL
-
- template: cockroachdb_sql_active_connections
-       on: cockroachdb.sql_connections
-    class: Database
-component: CockroachDB
-     type: Utilization
-     calc: $sql_conns
-    units: active connections
-    every: 10s
-     info: number of active SQL connections
-       to: dba
-
- template: cockroachdb_sql_executed_statements_total_last_5m
-       on: cockroachdb.sql_statements_total
-    class: Database
-component: CockroachDB
-     type: Workload
-   lookup: sum -5m absolute of sql_query_count
-    units: statements
-    every: 10s
-     warn: $this == 0 AND $cockroachdb_sql_active_connections != 0
-    delay: down 15m up 30s multiplier 1.5 max 1h
-     info: number of executed SQL statements in the last 5 minutes
-       to: dba
diff --git a/health/health.d/couchdb.conf b/health/health.d/couchdb.conf
deleted file mode 100644
index c86c6b988..000000000
--- a/health/health.d/couchdb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure couchdb is running
-
- template: couchdb_last_collected_secs
-       on: couchdb.request_methods
-    class: Database
-component: CouchDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/cpu.conf b/health/health.d/cpu.conf
index d11215768..ad6952825 100644
--- a/health/health.d/cpu.conf
+++ b/health/health.d/cpu.conf
@@ -3,9 +3,9 @@
 
  template: 10min_cpu_usage
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned of user,system,softirq,irq,guest
@@ -19,9 +19,9 @@ component: CPU
 
  template: 10min_cpu_iowait
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned of iowait
@@ -35,9 +35,9 @@ component: CPU
 
  template: 20min_steal_cpu
        on: system.cpu
-    class: System
+    class: Latency
+     type: System
 component: CPU
-     type: Latency
        os: linux
     hosts: *
    lookup: average -20m unaligned of steal
@@ -52,9 +52,9 @@ component: CPU
 ## FreeBSD
  template: 10min_cpu_usage
        on: system.cpu
-    class: System
+    class: Utilization
+     type: System
 component: CPU
-     type: Utilization
        os: freebsd
     hosts: *
    lookup: average -10m unaligned of user,system,interrupt
diff --git a/health/health.d/dbengine.conf b/health/health.d/dbengine.conf
index 79c156ab8..65c41b846 100644
--- a/health/health.d/dbengine.conf
+++ b/health/health.d/dbengine.conf
@@ -3,9 +3,9 @@
 
     alarm: 10min_dbengine_global_fs_errors
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of fs_errors
@@ -18,9 +18,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_io_errors
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of io_errors
@@ -33,9 +33,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_flushing_warnings
        on: netdata.dbengine_global_errors
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
@@ -49,9 +49,9 @@ component: DB engine
 
     alarm: 10min_dbengine_global_flushing_errors
        on: netdata.dbengine_long_term_page_stats
-    class: Netdata
+    class: Errors
+     type: Netdata
 component: DB engine
-     type: Errors
        os: linux freebsd macos
     hosts: *
    lookup: sum -10m unaligned of flushing_pressure_deletions
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index 60f8faed9..5daff61a1 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -11,9 +11,9 @@
 
  template: disk_space_usage
        on: disk.space
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: !/dev !/dev/* !/run !/run/* *
@@ -28,9 +28,9 @@ component: Disk
 
  template: disk_inode_usage
        on: disk.inodes
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: !/dev !/dev/* !/run !/run/* *
@@ -136,19 +136,16 @@ component: Disk
 
  template: 10min_disk_utilization
        on: disk.util
-    class: System
+    class: Utilization
+     type: System
 component: Disk
-     type: Utilization
        os: linux freebsd
     hosts: *
  families: *
    lookup: average -10m unaligned
     units: %
     every: 1m
-    green: 90
-      red: 98
-     warn: $this > $green * (($status >= $WARNING)  ? (0.7) : (1))
-     crit: $this > $red   * (($status == $CRITICAL) ? (0.7) : (1))
+     warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
     delay: down 15m multiplier 1.2 max 1h
      info: average percentage of time $family disk was busy over the last 10 minutes
        to: silent
@@ -161,19 +158,16 @@ component: Disk
 
  template: 10min_disk_backlog
        on: disk.backlog
-    class: System
+    class: Latency
+     type: System
 component: Disk
-     type: Latency
        os: linux
     hosts: *
  families: *
    lookup: average -10m unaligned
     units: ms
     every: 1m
-    green: 2000
-      red: 5000
-     warn: $this > $green * (($status >= $WARNING)  ? (0.7) : (1))
-     crit: $this > $red   * (($status == $CRITICAL) ? (0.7) : (1))
+     warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
     delay: down 15m multiplier 1.2 max 1h
      info: average backlog size of the $family disk over the last 10 minutes
        to: silent
diff --git a/health/health.d/dns_query.conf b/health/health.d/dns_query.conf
index 1fbb2c598..ec4937c0a 100644
--- a/health/health.d/dns_query.conf
+++ b/health/health.d/dns_query.conf
@@ -3,9 +3,9 @@
 
  template: dns_query_time_query_time
        on: dns_query_time.query_time
-    class: DNS
+    class: Latency
+     type: DNS
 component: DNS
-     type: Latency
    lookup: average -10s unaligned foreach *
     units: ms
     every: 10s
diff --git a/health/health.d/dnsmasq_dhcp.conf b/health/health.d/dnsmasq_dhcp.conf
index 10d139f77..010b94599 100644
--- a/health/health.d/dnsmasq_dhcp.conf
+++ b/health/health.d/dnsmasq_dhcp.conf
@@ -2,9 +2,9 @@
 
  template: dnsmasq_dhcp_dhcp_range_utilization
        on: dnsmasq_dhcp.dhcp_range_utilization
-    class: DHCP
+    class: Utilization
+     type: DHCP
 component: Dnsmasq
-     type: Utilization
     every: 10s
     units: %
      calc: $used
diff --git a/health/health.d/dockerd.conf b/health/health.d/dockerd.conf
index ba866f81b..220ddd664 100644
--- a/health/health.d/dockerd.conf
+++ b/health/health.d/dockerd.conf
@@ -1,8 +1,8 @@
  template: docker_unhealthy_containers
        on: docker.unhealthy_containers
-    class: Containers
+    class: Errors
+     type: Containers
 component: Docker
-     type: Errors
     units: unhealthy containers
     every: 10s
    lookup: average -10s
diff --git a/health/health.d/elasticsearch.conf b/health/health.d/elasticsearch.conf
deleted file mode 100644
index 05d576c39..000000000
--- a/health/health.d/elasticsearch.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-
-# make sure elasticsearch is running
-
- template: elasticsearch_last_collected
-       on: elasticsearch.cluster_health_status
-    class: Search engine
-component: Elasticsearch
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/entropy.conf b/health/health.d/entropy.conf
index 0478fa0be..13b0fcde4 100644
--- a/health/health.d/entropy.conf
+++ b/health/health.d/entropy.conf
@@ -5,9 +5,9 @@
 
     alarm: lowest_entropy
        on: system.entropy
-    class: System
+    class: Utilization
+     type: System
 component: Cryptography
-     type: Utilization
        os: linux
     hosts: *
    lookup: min -5m unaligned
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
index 4430f3fd8..06f398c6e 100644
--- a/health/health.d/exporting.conf
+++ b/health/health.d/exporting.conf
@@ -1,22 +1,25 @@
 
-template: exporting_last_buffering
-families: *
-      on: exporting_data_size
-    calc: $now - $last_collected_t
-   units: seconds ago
-   every: 10s
-    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-   delay: down 5m multiplier 1.5 max 1h
-    info: number of seconds since the last successful buffering of exporting data
-      to: dba
+ template: exporting_last_buffering
+ families: *
+       on: exporting_data_size
+    class: Latency
+     type: Netdata
+component: Exporting engine
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful buffering of exporting data
+       to: dba
 
  template: exporting_metrics_sent
  families: *
        on: exporting_data_size
-    class: Netdata
+    class: Workload
+     type: Netdata
 component: Exporting engine
-     type: Workload
     units: %
      calc: abs($sent) * 100 / abs($buffered)
     every: 10s
diff --git a/health/health.d/fping.conf b/health/health.d/fping.conf
index 120fe8f28..bb22419fa 100644
--- a/health/health.d/fping.conf
+++ b/health/health.d/fping.conf
@@ -2,9 +2,9 @@
  template: fping_last_collected_secs
  families: *
        on: fping.latency
-    class: Other
+    class: Latency
+     type: Other
 component: Network
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -17,9 +17,9 @@ component: Network
  template: fping_host_reachable
  families: *
        on: fping.latency
-    class: Other
+    class: Errors
+     type: Other
 component: Network
-     type: Errors
      calc: $average != nan
     units: up/down
     every: 10s
@@ -31,9 +31,9 @@ component: Network
  template: fping_host_latency
  families: *
        on: fping.latency
-    class: Other
+    class: Latency
+     type: Other
 component: Network
-     type: Latency
    lookup: average -10s unaligned of average
     units: ms
     every: 10s
@@ -48,9 +48,9 @@ component: Network
  template: fping_packet_loss
  families: *
        on: fping.quality
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
    lookup: average -10m unaligned of returned
      calc: 100 - $this
     green: 1
diff --git a/health/health.d/fronius.conf b/health/health.d/fronius.conf
index 81aafaa60..853bd7fbc 100644
--- a/health/health.d/fronius.conf
+++ b/health/health.d/fronius.conf
@@ -1,9 +1,9 @@
  template: fronius_last_collected_secs
  families: *
        on: fronius.power
-    class: Power Supply
+    class: Latency
+     type: Power Supply
 component: Solar
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/gearman.conf b/health/health.d/gearman.conf
index e2031bf2b..14010d445 100644
--- a/health/health.d/gearman.conf
+++ b/health/health.d/gearman.conf
@@ -1,24 +1,10 @@
-# make sure Gearman is running
- template: gearman_last_collected_secs
-       on: gearman.total_jobs
-    class: Computing
-component: Gearman
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
  template: gearman_workers_queued
        on: gearman.single_job
-    class: Computing
+    class: Latency
+     type: Computing
 component: Gearman
-     type: Latency
-   lookup: average -10m unaligned match-names of Queued
+   lookup: average -10m unaligned match-names of Pending
     units: workers
     every: 10s
      warn: $this > 30000
diff --git a/health/health.d/geth.conf b/health/health.d/geth.conf
new file mode 100644
index 000000000..dd1eb4701
--- /dev/null
+++ b/health/health.d/geth.conf
@@ -0,0 +1,12 @@
+#chainhead_header is expected momenterarily to be ahead. If its considerably ahead (e.g more than 5 blocks), then the node is definitely out of sync. 
+ template: geth_chainhead_diff_between_header_block
+       on: geth.chainhead
+    class: Workload
+     type: ethereum_node
+component: geth
+    every: 10s
+     calc: $chain_head_block -  $chain_head_header
+    units: blocks
+     warn: $this != 0
+     crit: $this > 5
+    delay: down 1m multiplier 1.5 max 1h
diff --git a/health/health.d/go.d.plugin.conf b/health/health.d/go.d.plugin.conf
new file mode 100644
index 000000000..8bf84a976
--- /dev/null
+++ b/health/health.d/go.d.plugin.conf
@@ -0,0 +1,17 @@
+
+# make sure go.d.plugin data collection job is running
+
+ template: go.d_job_last_collected_secs
+       on: netdata.go_plugin_execution_time
+    class: Error
+     type: Netdata
+component: go.d.plugin
+   module: *
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful data collection
+       to: webmaster
diff --git a/health/health.d/haproxy.conf b/health/health.d/haproxy.conf
index 9f6b1c577..a0ab52bca 100644
--- a/health/health.d/haproxy.conf
+++ b/health/health.d/haproxy.conf
@@ -1,8 +1,8 @@
  template: haproxy_backend_server_status
        on: haproxy_hs.down
-    class: Web Proxy
+    class: Errors
+     type: Web Proxy
 component: HAProxy
-     type: Errors
     units: failed servers
     every: 10s
    lookup: average -10s
@@ -12,25 +12,12 @@ component: HAProxy
 
  template: haproxy_backend_status
        on: haproxy_hb.down
-    class: Web Proxy
+    class: Errors
+     type: Web Proxy
 component: HAProxy
-     type: Errors
     units: failed backend
     every: 10s
    lookup: average -10s
      crit: $this > 0
      info: average number of failed haproxy backends over the last 10 seconds
        to: sysadmin
-
- template: haproxy_last_collected
-       on: haproxy_hb.down
-    class: Web Proxy
-component: HAProxy
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
index bd8308bed..ca8df31b9 100644
--- a/health/health.d/hdfs.conf
+++ b/health/health.d/hdfs.conf
@@ -1,28 +1,11 @@
 
-# make sure hdfs is running
-
- template: hdfs_last_collected_secs
-       on: hdfs.heap_memory
-    class: Storage
-component: HDFS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # Common
 
  template: hdfs_capacity_usage
        on: hdfs.capacity
-    class: Storage
+    class: Utilization
+     type: Storage
 component: HDFS
-     type: Utilization
      calc: ($used) * 100 / ($used + $remaining)
     units: %
     every: 10s
@@ -37,9 +20,9 @@ component: HDFS
 
  template: hdfs_missing_blocks
        on: hdfs.blocks
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $missing
     units: missing blocks
     every: 10s
@@ -51,9 +34,9 @@ component: HDFS
 
  template: hdfs_stale_nodes
        on: hdfs.data_nodes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $stale
     units: dead nodes
     every: 10s
@@ -65,9 +48,9 @@ component: HDFS
 
  template: hdfs_dead_nodes
        on: hdfs.data_nodes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $dead
     units: dead nodes
     every: 10s
@@ -81,9 +64,9 @@ component: HDFS
 
  template: hdfs_num_failed_volumes
        on: hdfs.num_failed_volumes
-    class: Storage
+    class: Errors
+     type: Storage
 component: HDFS
-     type: Errors
      calc: $fsds_num_failed_volumes
     units: failed volumes
     every: 10s
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
index d4d6376a3..599c47acc 100644
--- a/health/health.d/httpcheck.conf
+++ b/health/health.d/httpcheck.conf
@@ -1,25 +1,11 @@
- template: httpcheck_last_collected_secs
- families: *
-       on: httpcheck.status
-    class: Other
-component: HTTP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: httpcheck_web_service_up
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Utilization
+     type: Web Server
 component: HTTP endpoint
-     type: Utilization
    lookup: average -1m unaligned percentage of success
      calc: ($this < 75) ? (0) : ($this)
     every: 5s
@@ -30,9 +16,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_bad_content
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: HTTP endpoint
-     type: Workload
    lookup: average -5m unaligned percentage of bad_content
     every: 10s
     units: %
@@ -46,9 +32,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_bad_status
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: HTTP endpoint
-     type: Workload
    lookup: average -5m unaligned percentage of bad_status
     every: 10s
     units: %
@@ -62,9 +48,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_timeouts
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: HTTP endpoint
-     type: Latency
    lookup: average -5m unaligned percentage of timeout
     every: 10s
     units: %
@@ -73,9 +59,9 @@ component: HTTP endpoint
  template: httpcheck_no_web_service_connections
  families: *
        on: httpcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: HTTP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of no_connection
     every: 10s
     units: %
@@ -85,9 +71,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_unreachable
  families: *
        on: httpcheck.status
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: HTTP endpoint
-     type: Errors
      calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
     units: %
     every: 10s
@@ -101,9 +87,9 @@ component: HTTP endpoint
  template: httpcheck_1h_web_service_response_time
  families: *
        on: httpcheck.responsetime
-    class: Other
+    class: Latency
+     type: Other
 component: HTTP endpoint
-     type: Latency
    lookup: average -1h unaligned of time
     every: 30s
     units: ms
@@ -112,9 +98,9 @@ component: HTTP endpoint
  template: httpcheck_web_service_slow
  families: *
        on: httpcheck.responsetime
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: HTTP endpoint
-     type: Latency
    lookup: average -3m unaligned of time
     units: ms
     every: 10s
diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf
index 57ce4e866..ee4befbea 100644
--- a/health/health.d/ioping.conf
+++ b/health/health.d/ioping.conf
@@ -1,9 +1,9 @@
  template: ioping_disk_latency
  families: *
        on: ioping.latency
-    class: System
+    class: Latency
+     type: System
 component: Disk
-     type: Latency
    lookup: average -10s unaligned of average
     units: ms
     every: 10s
diff --git a/health/health.d/ipc.conf b/health/health.d/ipc.conf
index 6eaf7abe9..c178a410a 100644
--- a/health/health.d/ipc.conf
+++ b/health/health.d/ipc.conf
@@ -3,9 +3,9 @@
 
     alarm: semaphores_used
        on: system.ipc_semaphores
-    class: System
+    class: Utilization
+     type: System
 component: IPC
-     type: Utilization
        os: linux
     hosts: *
      calc: $semaphores * 100 / $ipc_semaphores_max
@@ -19,9 +19,9 @@ component: IPC
 
     alarm: semaphore_arrays_used
        on: system.ipc_semaphore_arrays
-    class: System
+    class: Utilization
+     type: System
 component: IPC
-     type: Utilization
        os: linux
     hosts: *
      calc: $arrays * 100 / $ipc_semaphores_arrays_max
diff --git a/health/health.d/ipfs.conf b/health/health.d/ipfs.conf
index 6268f4092..a514ddfd0 100644
--- a/health/health.d/ipfs.conf
+++ b/health/health.d/ipfs.conf
@@ -1,9 +1,9 @@
 
  template: ipfs_datastore_usage
        on: ipfs.repo_size
-    class: Data Sharing
+    class: Utilization
+     type: Data Sharing
 component: IPFS
-     type: Utilization
      calc: $size * 100 / $avail
     units: %
     every: 10s
diff --git a/health/health.d/ipmi.conf b/health/health.d/ipmi.conf
index d4fdc6c79..feadba1b7 100644
--- a/health/health.d/ipmi.conf
+++ b/health/health.d/ipmi.conf
@@ -1,8 +1,8 @@
     alarm: ipmi_sensors_states
        on: ipmi.sensors_states
-    class: System
+    class: Errors
+     type: System
 component: IPMI
-     type: Errors
      calc: $warning + $critical
     units: sensors
     every: 10s
@@ -14,9 +14,9 @@ component: IPMI
 
     alarm: ipmi_events
        on: ipmi.events
-    class: System
+    class: Utilization
+     type: System
 component: IPMI
-     type: Utilization
      calc: $events
     units: events
     every: 10s
diff --git a/health/health.d/kubelet.conf b/health/health.d/kubelet.conf
index 4d3c45f97..c2778cc5e 100644
--- a/health/health.d/kubelet.conf
+++ b/health/health.d/kubelet.conf
@@ -6,9 +6,9 @@
 
  template: kubelet_node_config_error
        on: k8s_kubelet.kubelet_node_config_error
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
      calc: $kubelet_node_config_error
     units: bool
     every: 10s
@@ -22,9 +22,9 @@ component: Kubelet
  template: kubelet_token_requests
    lookup: sum -10s of token_fail_count
        on: k8s_kubelet.kubelet_token_requests
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
     units: failed requests
     every: 10s
      warn: $this > 0
@@ -37,9 +37,9 @@ component: Kubelet
  template: kubelet_operations_error
    lookup: sum -1m
        on: k8s_kubelet.kubelet_operations_errors
-    class: Kubernetes
+    class: Errors
+     type: Kubernetes
 component: Kubelet
-     type: Errors
     units: errors
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (0) : (20))
@@ -64,9 +64,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_05
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_05
     units: microseconds
     every: 10s
@@ -74,9 +74,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_05
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_05
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_05 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_05))
     every: 10s
@@ -92,9 +92,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_09
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_09
     units: microseconds
     every: 10s
@@ -102,9 +102,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_09
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_09
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_09 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_09))
     every: 10s
@@ -120,9 +120,9 @@ component: Kubelet
 
  template: kubelet_1m_pleg_relist_latency_quantile_099
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -1m unaligned of kubelet_pleg_relist_latency_099
     units: microseconds
     every: 10s
@@ -130,9 +130,9 @@ component: Kubelet
 
  template: kubelet_10s_pleg_relist_latency_quantile_099
        on: k8s_kubelet.kubelet_pleg_relist_latency_microseconds
-    class: Kubernetes
+    class: Latency
+     type: Kubernetes
 component: Kubelet
-     type: Latency
    lookup: average -10s unaligned of kubelet_pleg_relist_latency_099
      calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_099 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_099))
     every: 10s
diff --git a/health/health.d/lighttpd.conf b/health/health.d/lighttpd.conf
deleted file mode 100644
index 0f067549e..000000000
--- a/health/health.d/lighttpd.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure lighttpd is running
-
- template: lighttpd_last_collected_secs
-       on: lighttpd.requests
-    class: Web Server
-component: Lighttpd
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
index e28c246a3..c0bc6de8a 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/health/health.d/linux_power_supply.conf
@@ -2,9 +2,9 @@
 
  template: linux_power_supply_capacity
        on: powersupply.capacity
-    class: Power Supply
+    class: Utilization
+     type: Power Supply
 component: Battery
-     type: Utilization
      calc: $capacity
     units: %
     every: 10s
diff --git a/health/health.d/load.conf b/health/health.d/load.conf
index e811f6ee2..0bd872f85 100644
--- a/health/health.d/load.conf
+++ b/health/health.d/load.conf
@@ -6,9 +6,9 @@
 # minute, with a special case for a single CPU of setting the trigger at 2.
     alarm: load_cpu_number
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
      calc: ($active_processors == nan or $active_processors == inf or $active_processors < 2) ? ( 2 ) : ( $active_processors )
@@ -22,9 +22,9 @@ component: Load
 
     alarm: load_average_15
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load15
@@ -37,9 +37,9 @@ component: Load
 
     alarm: load_average_5
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load5
@@ -52,9 +52,9 @@ component: Load
 
     alarm: load_average_1
        on: system.load
-    class: System
+    class: Utilization
+     type: System
 component: Load
-     type: Utilization
        os: linux
     hosts: *
    lookup: max -1m unaligned of load1
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index 67483b201..cedaa000e 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -1,8 +1,8 @@
  template: mdstat_last_collected
        on: md.disks
-    class: System
+    class: Latency
+     type: System
 component: RAID
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
@@ -13,9 +13,9 @@ component: RAID
 
  template: mdstat_disks
        on: md.disks
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
     units: failed devices
     every: 10s
      calc: $down
@@ -26,9 +26,9 @@ component: RAID
 
  template: mdstat_mismatch_cnt
        on: md.mismatch_cnt
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
  families: !*(raid1) !*(raid10) *
     units: unsynchronized blocks
      calc: $count
@@ -40,9 +40,9 @@ component: RAID
 
  template: mdstat_nonredundant_last_collected
        on: md.nonredundant
-    class: System
+    class: Latency
+     type: System
 component: RAID
-     type: Latency
      calc: $now - $last_collected_t
     units: seconds ago
     every: 10s
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf
index 1b6502f62..9fbcfdb92 100644
--- a/health/health.d/megacli.conf
+++ b/health/health.d/megacli.conf
@@ -3,9 +3,9 @@
 
  template: megacli_adapter_state
        on: megacli.adapter_degraded
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: max -10s foreach *
     units: boolean
     every: 10s
@@ -18,9 +18,9 @@ component: RAID
 
  template: megacli_pd_predictive_failures
        on: megacli.pd_predictive_failure
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: sum -10s foreach *
     units: predictive failures
     every: 10s
@@ -31,9 +31,9 @@ component: RAID
 
  template: megacli_pd_media_errors
        on: megacli.pd_media_error
-    class: System
+    class: Errors
+     type: System
 component: RAID
-     type: Errors
    lookup: sum -10s foreach *
     units: media errors
     every: 10s
@@ -46,9 +46,9 @@ component: RAID
 
  template: megacli_bbu_relative_charge
        on: megacli.bbu_relative_charge
-    class: System
+    class: Workload
+     type: System
 component: RAID
-     type: Workload
    lookup: average -10s
     units: percent
     every: 10s
@@ -59,9 +59,9 @@ component: RAID
 
  template: megacli_bbu_cycle_count
        on: megacli.bbu_cycle_count
-    class: System
+    class: Workload
+     type: System
 component: RAID
-     type: Workload
    lookup: average -10s
     units: cycles
     every: 10s
diff --git a/health/health.d/memcached.conf b/health/health.d/memcached.conf
index f4b734c38..2a2fe4b82 100644
--- a/health/health.d/memcached.conf
+++ b/health/health.d/memcached.conf
@@ -1,28 +1,11 @@
 
-# make sure memcached is running
-
- template: memcached_last_collected_secs
-       on: memcached.cache
-    class: KV Storage
-component: Memcached
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
 # detect if memcached cache is full
 
  template: memcached_cache_memory_usage
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
      calc: $used * 100 / ($used + $available)
     units: %
     every: 10s
@@ -37,9 +20,9 @@ component: Memcached
 
  template: memcached_cache_fill_rate
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
    lookup: min -10m at -50m unaligned of available
      calc: ($this - $available) / (($now - $after) / 3600)
     units: KB/hour
@@ -51,9 +34,9 @@ component: Memcached
 
  template: memcached_out_of_cache_space_time
        on: memcached.cache
-    class: KV Storage
+    class: Utilization
+     type: KV Storage
 component: Memcached
-     type: Utilization
      calc: ($memcached_cache_fill_rate > 0) ? ($available / $memcached_cache_fill_rate) : (inf)
     units: hours
     every: 10s
diff --git a/health/health.d/memory.conf b/health/health.d/memory.conf
index ab651315f..010cbbd7b 100644
--- a/health/health.d/memory.conf
+++ b/health/health.d/memory.conf
@@ -3,9 +3,9 @@
 
     alarm: 1hour_ecc_memory_correctable
        on: mem.ecc_ce
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
    lookup: sum -10m unaligned
@@ -18,9 +18,9 @@ component: Memory
 
     alarm: 1hour_ecc_memory_uncorrectable
        on: mem.ecc_ue
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
    lookup: sum -10m unaligned
@@ -33,9 +33,9 @@ component: Memory
 
     alarm: 1hour_memory_hw_corrupted
        on: mem.hwcorrupt
-    class: System
+    class: Errors
+     type: System
 component: Memory
-     type: Errors
        os: linux
     hosts: *
      calc: $HardwareCorrupted
diff --git a/health/health.d/mongodb.conf b/health/health.d/mongodb.conf
deleted file mode 100644
index 8c9bdeb6f..000000000
--- a/health/health.d/mongodb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure mongodb is running
-
- template: mongodb_last_collected_secs
-       on: mongodb.read_operations
-    class: Database
-component: MongoDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf
index 91860c4a7..34452d983 100644
--- a/health/health.d/mysql.conf
+++ b/health/health.d/mysql.conf
@@ -1,29 +1,11 @@
 
-# make sure mysql is running
-
- template: mysql_last_collected_secs
-       on: mysql.queries
-    class: Database
-component: MySQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
-# -----------------------------------------------------------------------------
 # slow queries
 
  template: mysql_10s_slow_queries
        on: mysql.queries
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
    lookup: sum -10s of slow_queries
     units: slow queries
     every: 10s
@@ -39,9 +21,9 @@ component: MySQL
 
  template: mysql_10s_table_locks_immediate
        on: mysql.table_locks
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
    lookup: sum -10s absolute of immediate
     units: immediate locks
     every: 10s
@@ -50,9 +32,9 @@ component: MySQL
 
  template: mysql_10s_table_locks_waited
        on: mysql.table_locks
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
    lookup: sum -10s absolute of waited
     units: waited locks
     every: 10s
@@ -61,9 +43,9 @@ component: MySQL
 
  template: mysql_10s_waited_locks_ratio
        on: mysql.table_locks
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Latency
      calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
     units: %
     every: 10s
@@ -79,9 +61,9 @@ component: MySQL
 
  template: mysql_connections
        on: mysql.connections_active
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
      calc: $active * 100 / $limit
     units: %
     every: 10s
@@ -97,9 +79,9 @@ component: MySQL
 
  template: mysql_replication
        on: mysql.slave_status
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
     units: ok/failed
     every: 10s
@@ -110,9 +92,9 @@ component: MySQL
 
  template: mysql_replication_lag
        on: mysql.slave_behind
-    class: Database
+    class: Latency
+     type: Database
 component: MySQL
-     type: Errors
      calc: $seconds
     units: seconds
     every: 10s
@@ -129,9 +111,9 @@ component: MySQL
 
  template: mysql_galera_cluster_size_max_2m
        on: mysql.galera_cluster_size
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
    lookup: max -2m absolute
     units: nodes
     every: 10s
@@ -140,9 +122,9 @@ component: MySQL
 
  template: mysql_galera_cluster_size
        on: mysql.galera_cluster_size
-    class: Database
+    class: Utilization
+     type: Database
 component: MySQL
-     type: Utilization
      calc: $nodes
     units: nodes
     every: 10s
@@ -156,9 +138,9 @@ component: MySQL
 
  template: mysql_galera_cluster_state
        on: mysql.galera_cluster_state
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: $state
     every: 10s
      warn: $this == 2 OR $this == 3
@@ -173,9 +155,9 @@ component: MySQL
 
  template: mysql_galera_cluster_status
        on: mysql.galera_cluster_status
-    class: Database
+    class: Errors
+     type: Database
 component: MySQL
-     type: Errors
      calc: $wsrep_cluster_status
     every: 10s
      crit: $mysql_galera_cluster_state != nan AND $this != 0
diff --git a/health/health.d/named.conf b/health/health.d/named.conf
deleted file mode 100644
index 90266df16..000000000
--- a/health/health.d/named.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure named is running
-
- template: named_last_collected_secs
-       on: named.global_queries
-    class: DNS
-component: BIND
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: domainadmin
-
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index 04219e163..028ca7b81 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -6,9 +6,9 @@
 
  template: interface_speed
        on: net.net
-    class: System
+    class: Latency
+     type: System
 component: Network
-     type: Latency
        os: *
     hosts: *
  families: *
@@ -19,9 +19,9 @@ component: Network
 
  template: 1m_received_traffic_overflow
        on: net.net
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
  families: *
@@ -36,9 +36,9 @@ component: Network
 
  template: 1m_sent_traffic_overflow
        on: net.net
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
  families: *
@@ -63,9 +63,9 @@ component: Network
 
  template: inbound_packets_dropped
        on: net.drops
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* *
@@ -76,9 +76,9 @@ component: Network
 
  template: outbound_packets_dropped
        on: net.drops
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* *
@@ -89,14 +89,14 @@ component: Network
 
  template: inbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* !wl* *
    lookup: sum -10m unaligned absolute of received
-     calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0))
+     calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
     units: %
     every: 1m
      warn: $this >= 2
@@ -106,9 +106,9 @@ component: Network
 
  template: outbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: !net* !wl* *
@@ -123,14 +123,14 @@ component: Network
 
  template: wifi_inbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: wl*
    lookup: sum -10m unaligned absolute of received
-     calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0))
+     calc: (($inbound_packets_dropped != nan AND $this > 10000) ? ($inbound_packets_dropped * 100 / $this) : (0))
     units: %
     every: 1m
      warn: $this >= 10
@@ -140,9 +140,9 @@ component: Network
 
  template: wifi_outbound_packets_dropped_ratio
        on: net.packets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: wl*
@@ -160,9 +160,9 @@ component: Network
 
  template: interface_inbound_errors
        on: net.errors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
  families: *
@@ -176,9 +176,9 @@ component: Network
 
  template: interface_outbound_errors
        on: net.errors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
  families: *
@@ -200,9 +200,9 @@ component: Network
 
  template: 10min_fifo_errors
        on: net.fifo
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -225,9 +225,9 @@ component: Network
 
  template: 1m_received_packets_rate
        on: net.packets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux freebsd
     hosts: *
  families: *
@@ -238,9 +238,9 @@ component: Network
 
  template: 10s_received_packets_storm
        on: net.packets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux freebsd
     hosts: *
  families: *
diff --git a/health/health.d/netfilter.conf b/health/health.d/netfilter.conf
index 35c89caf7..7de383fa2 100644
--- a/health/health.d/netfilter.conf
+++ b/health/health.d/netfilter.conf
@@ -3,9 +3,9 @@
 
     alarm: netfilter_conntrack_full
        on: netfilter.conntrack_sockets
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: max -10s unaligned of connections
diff --git a/health/health.d/nginx.conf b/health/health.d/nginx.conf
deleted file mode 100644
index 30c738f47..000000000
--- a/health/health.d/nginx.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure nginx is running
-
- template: nginx_last_collected_secs
-       on: nginx.requests
-    class: Web Server
-component: NGINX
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/nginx_plus.conf b/health/health.d/nginx_plus.conf
deleted file mode 100644
index 5849a9e7e..000000000
--- a/health/health.d/nginx_plus.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure nginx_plus is running
-
- template: nginx_plus_last_collected_secs
-       on: nginx_plus.requests_total
-    class: Web Server
-component: NGINX Plus
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/phpfpm.conf b/health/health.d/phpfpm.conf
deleted file mode 100644
index fc073a944..000000000
--- a/health/health.d/phpfpm.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure phpfpm is running
-
- template: phpfpm_last_collected_secs
-       on: phpfpm.requests
-    class: Web Server
-component: PHP-FPM
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/pihole.conf b/health/health.d/pihole.conf
index 72622caed..2e5c1cbfd 100644
--- a/health/health.d/pihole.conf
+++ b/health/health.d/pihole.conf
@@ -1,45 +1,12 @@
 
-# Make sure Pi-hole is responding.
-
- template: pihole_last_collected_secs
-       on: pihole.dns_queries_total
-    class: Ad Filtering
-component: Pi-hole
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-# Blocked DNS queries.
-
- template: pihole_blocked_queries
-       on: pihole.dns_queries_percentage
-    class: Ad Filtering
-component: Pi-hole
-     type: Errors
-    every: 10s
-    units: %
-     calc: $blocked
-     warn: $this > ( ($status >= $WARNING ) ? ( 45 ) : ( 55 ) )
-     crit: $this > ( ($status == $CRITICAL) ? ( 55 ) : ( 75 ) )
-    delay: up 2m down 5m
-     info: percentage of blocked dns queries over the last 24 hour
-       to: sysadmin
-
-
 # Blocklist last update time.
 # Default update interval is a week.
 
  template: pihole_blocklist_last_update
        on: pihole.blocklist_last_update
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: seconds
      calc: $ago
@@ -52,15 +19,15 @@ component: Pi-hole
 
  template: pihole_blocklist_gravity_file
        on: pihole.blocklist_last_update
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: boolean
      calc: $file_exists
      crit: $this != 1
     delay: up 2m down 5m
-     info: gravity.list (blocklist) file existence state (0: exists, 1: not-exists)
+     info: gravity.list (blocklist) file existence state (0: not-exists, 1: exists)
        to: sysadmin
 
 # Pi-hole's ability to block unwanted domains.
@@ -68,13 +35,13 @@ component: Pi-hole
 
  template: pihole_status
        on: pihole.unwanted_domains_blocking_status
-    class: Ad Filtering
+    class: Errors
+     type: Ad Filtering
 component: Pi-hole
-     type: Errors
     every: 10s
     units: boolean
      calc: $enabled
      warn: $this != 1
     delay: up 2m down 5m
-     info: unwanted domains blocking status (0: enabled, 1: disabled)
+     info: unwanted domains blocking status (0: disabled, 1: enabled)
        to: sysadmin
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf
index b977dbb31..8cbd7729c 100644
--- a/health/health.d/portcheck.conf
+++ b/health/health.d/portcheck.conf
@@ -1,25 +1,11 @@
- template: portcheck_last_collected_secs
- families: *
-       on: portcheck.status
-    class: Other
-component: TCP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: portcheck_service_reachable
  families: *
        on: portcheck.status
-    class: Other
+    class: Workload
+     type: Other
 component: TCP endpoint
-     type: Workload
    lookup: average -1m unaligned percentage of success
      calc: ($this < 75) ? (0) : ($this)
     every: 5s
@@ -30,9 +16,9 @@ component: TCP endpoint
  template: portcheck_connection_timeouts
  families: *
        on: portcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: TCP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of timeout
     every: 10s
     units: %
@@ -45,9 +31,9 @@ component: TCP endpoint
  template: portcheck_connection_fails
  families: *
        on: portcheck.status
-    class: Other
+    class: Errors
+     type: Other
 component: TCP endpoint
-     type: Errors
    lookup: average -5m unaligned percentage of no_connection,failed
     every: 10s
     units: %
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf
deleted file mode 100644
index f908a802a..000000000
--- a/health/health.d/postgres.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure postgres is running
-
- template: postgres_last_collected_secs
-       on: postgres.db_stat_transactions
-    class: Database
-component: PostgreSQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/processes.conf b/health/health.d/processes.conf
index b44a24c0b..2929ee3d4 100644
--- a/health/health.d/processes.conf
+++ b/health/health.d/processes.conf
@@ -2,9 +2,9 @@
 
     alarm: active_processes
        on: system.active_processes
-    class: System
+    class: Workload
+     type: System
 component: Processes
-     type: Workload
     hosts: *
      calc: $active * 100 / $pidmax
     units: %
diff --git a/health/health.d/pulsar.conf b/health/health.d/pulsar.conf
deleted file mode 100644
index 9903d4e38..000000000
--- a/health/health.d/pulsar.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# Availability
-
- template: pulsar_last_collected_secs
-       on: pulsar.broker_components
-    class: Messaging
-component: Pulsar
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/python.d.plugin.conf b/health/health.d/python.d.plugin.conf
new file mode 100644
index 000000000..f3abc588f
--- /dev/null
+++ b/health/health.d/python.d.plugin.conf
@@ -0,0 +1,17 @@
+
+# make sure python.d.plugin data collection job is running
+
+ template: python.d_job_last_collected_secs
+       on: netdata.pythond_runtime
+    class: Error
+     type: Netdata
+component: python.d.plugin
+   module: *
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful data collection
+       to: webmaster
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
index 0e3cc29fa..6e6e3b400 100644
--- a/health/health.d/ram.conf
+++ b/health/health.d/ram.conf
@@ -3,9 +3,9 @@
 
     alarm: used_ram_to_ignore
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux freebsd
     hosts: *
      calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz - $zfs.arc_size.min)
@@ -15,13 +15,12 @@ component: Memory
 
     alarm: ram_in_use
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
-#    calc: $used * 100 / ($used + $cached + $free)
-     calc: ($used - $used_ram_to_ignore) * 100 / ($used  + $cached + $free)
+     calc: ($used - $used_ram_to_ignore) * 100 / ($used + $cached + $free + $buffers)
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
@@ -32,12 +31,12 @@ component: Memory
 
     alarm: ram_available
        on: mem.available
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
-     calc: ($avail + $system.ram.used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
+     calc: $avail * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
     units: %
     every: 10s
      warn: $this < (($status >= $WARNING)  ? (15) : (10))
@@ -46,24 +45,25 @@ component: Memory
      info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
        to: sysadmin
 
-   alarm: oom_kill
-      on: mem.oom_kill
-      os: linux
-   hosts: *
-  lookup: sum -1m unaligned
-   units: kills
-   every: 10s
-    warn: $this > 0
-   delay: down 5m
-    info: number of out of memory kills in the last minute
-      to: sysadmin
+      alarm: oom_kill
+         on: mem.oom_kill
+         os: linux
+      hosts: *
+     lookup: sum -30m unaligned
+      units: kills
+      every: 5m
+       warn: $this > 0
+      delay: down 10m
+host labels: _is_k8s_node = false
+       info: number of out of memory kills in the last 30 minutes
+         to: sysadmin
 
 ## FreeBSD
     alarm: ram_in_use
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: freebsd
     hosts: *
      calc: ($active + $wired + $laundry + $buffers - $used_ram_to_ignore) * 100 / ($active + $wired + $laundry + $buffers - $used_ram_to_ignore + $cache + $free + $inactive)
@@ -77,9 +77,9 @@ component: Memory
 
     alarm: ram_available
        on: system.ram
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: freebsd
     hosts: *
      calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers)
diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf
index e8b289942..dfb771e8c 100644
--- a/health/health.d/redis.conf
+++ b/health/health.d/redis.conf
@@ -1,26 +1,10 @@
 
-# make sure redis is running
-
- template: redis_last_collected_secs
-       on: redis.operations
-    class: KV Storage
-component: Redis
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
  template: redis_bgsave_broken
  families: *
        on: redis.bgsave_health
-    class: KV Storage
+    class: Errors
+     type: KV Storage
 component: Redis
-     type: Errors
     every: 10s
      crit: $rdb_last_bgsave_status != 0
     units: ok/failed
@@ -31,9 +15,9 @@ component: Redis
  template: redis_bgsave_slow
  families: *
        on: redis.bgsave_now
-    class: KV Storage
+    class: Latency
+     type: KV Storage
 component: Redis
-     type: Latency
     every: 10s
      warn: $rdb_bgsave_in_progress > 600
      crit: $rdb_bgsave_in_progress > 1200
diff --git a/health/health.d/retroshare.conf b/health/health.d/retroshare.conf
index ca22e60de..14aa76b4c 100644
--- a/health/health.d/retroshare.conf
+++ b/health/health.d/retroshare.conf
@@ -1,26 +1,11 @@
-# make sure RetroShare is running
-
- template: retroshare_last_collected_secs
-       on: retroshare.peers
-    class: Data Sharing
-component: Retroshare
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # make sure the DHT is fine when active
 
  template: retroshare_dht_working
        on: retroshare.dht
-    class: Data Sharing
+    class: Utilization
+     type: Data Sharing
 component: Retroshare
-     type: Utilization
      calc: $dht_size_all
     units: peers
     every: 1m
diff --git a/health/health.d/riakkv.conf b/health/health.d/riakkv.conf
index b2c0e8d9c..261fd48c6 100644
--- a/health/health.d/riakkv.conf
+++ b/health/health.d/riakkv.conf
@@ -1,24 +1,10 @@
-# Ensure that Riak is running.  template: riak_last_collected_secs
- template: riakkv_last_collected_secs
-       on: riak.kv.throughput
-    class: Database
-component: Riak KV
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
 
 # Warn if a list keys operation is running.
  template: riakkv_list_keys_active
        on: riak.core.fsm_active
-    class: Database
+    class: Utilization
+     type: Database
 component: Riak KV
-     type: Utilization
      calc: $list_fsm_active
     units: state machines
     every: 10s
@@ -31,9 +17,9 @@ component: Riak KV
 # KV GET
  template: riakkv_1h_kv_get_mean_latency
        on: riak.kv.latency.get
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $node_get_fsm_time_mean
    lookup: average -1h unaligned of time
     every: 30s
@@ -43,9 +29,9 @@ component: Riak KV
 
  template: riakkv_kv_get_slow
        on: riak.kv.latency.get
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $mean
    lookup: average -3m unaligned of time
     units: ms
@@ -61,9 +47,9 @@ component: Riak KV
 # KV PUT
  template: riakkv_1h_kv_put_mean_latency
        on: riak.kv.latency.put
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $node_put_fsm_time_mean
    lookup: average -1h unaligned of time
     every: 30s
@@ -73,9 +59,9 @@ component: Riak KV
 
  template: riakkv_kv_put_slow
        on: riak.kv.latency.put
-    class: Database
+    class: Latency
+     type: Database
 component: Riak KV
-     type: Latency
      calc: $mean
    lookup: average -3m unaligned of time
     units: ms
@@ -95,9 +81,9 @@ component: Riak KV
 # On systems observed, this is < 2000, but may grow depending on load.
  template: riakkv_vm_high_process_count
        on: riak.vm
-    class: Database
+    class: Utilization
+     type: Database
 component: Riak KV
-     type: Utilization
      calc: $sys_process_count
     units: processes
     every: 10s
diff --git a/health/health.d/scaleio.conf b/health/health.d/scaleio.conf
index 3c0dc1168..ab110bf07 100644
--- a/health/health.d/scaleio.conf
+++ b/health/health.d/scaleio.conf
@@ -1,27 +1,11 @@
 
-# make sure scaleio is running
-
- template: scaleio_last_collected_secs
-       on: scaleio.system_capacity_total
-    class: Storage
-component: ScaleIO
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure Storage Pool capacity utilization is under limit
 
  template: scaleio_storage_pool_capacity_utilization
        on: scaleio.storage_pool_capacity_utilization
-    class: Storage
+    class: Utilization
+     type: Storage
 component: ScaleIO
-     type: Utilization
      calc: $used
     units: %
     every: 10s
@@ -36,9 +20,9 @@ component: ScaleIO
 
  template: scaleio_sdc_mdm_connection_state
        on: scaleio.sdc_mdm_connection_state
-    class: Storage
+    class: Utilization
+     type: Storage
 component: ScaleIO
-     type: Utilization
      calc: $connected
     every: 10s
      warn: $this != 1
diff --git a/health/health.d/softnet.conf b/health/health.d/softnet.conf
index d8b01caff..345f87505 100644
--- a/health/health.d/softnet.conf
+++ b/health/health.d/softnet.conf
@@ -5,9 +5,9 @@
 
     alarm: 1min_netdev_backlog_exceeded
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of dropped
@@ -21,9 +21,9 @@ component: Network
 
     alarm: 1min_netdev_budget_ran_outs
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of squeezed
@@ -38,9 +38,9 @@ component: Network
 
     alarm: 10min_netisr_backlog_exceeded
        on: system.softnet_stat
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: freebsd
     hosts: *
    lookup: average -1m unaligned absolute of qdrops
diff --git a/health/health.d/squid.conf b/health/health.d/squid.conf
deleted file mode 100644
index 5c3d17629..000000000
--- a/health/health.d/squid.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure squid is running
-
- template: squid_last_collected_secs
-       on: squid.clients_requests
-    class: Web Proxy
-component: Squid
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: proxyadmin
-
diff --git a/health/health.d/stiebeleltron.conf b/health/health.d/stiebeleltron.conf
index f793b5ed1..493c8b73a 100644
--- a/health/health.d/stiebeleltron.conf
+++ b/health/health.d/stiebeleltron.conf
@@ -1,9 +1,9 @@
  template: stiebeleltron_last_collected_secs
  families: *
        on: stiebeleltron.heating.hc1
-    class: Other
+    class: Latency
+     type: Other
 component: Sensors
-     type: Latency
      calc: $now - $last_collected_t
     every: 10s
     units: seconds ago
diff --git a/health/health.d/swap.conf b/health/health.d/swap.conf
index 5b3f89a97..03c319320 100644
--- a/health/health.d/swap.conf
+++ b/health/health.d/swap.conf
@@ -3,9 +3,9 @@
 
     alarm: 30min_ram_swapped_out
        on: system.swapio
-    class: System
+    class: Workload
+     type: System
 component: Memory
-     type: Workload
        os: linux freebsd
     hosts: *
    lookup: sum -30m unaligned absolute of out
@@ -20,12 +20,12 @@ component: Memory
 
     alarm: used_swap
        on: system.swap
-    class: System
+    class: Utilization
+     type: System
 component: Memory
-     type: Utilization
        os: linux freebsd
     hosts: *
-     calc: $used * 100 / ( $used + $free )
+     calc: ($used + $free) > 0 ? ($used * 100 / ($used + $free)) : 0
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
diff --git a/health/health.d/systemdunits.conf b/health/health.d/systemdunits.conf
index cc1a8698d..38213a8db 100644
--- a/health/health.d/systemdunits.conf
+++ b/health/health.d/systemdunits.conf
@@ -4,9 +4,9 @@
 ## Service units
  template: systemd_service_units_state
        on: systemd.service_units_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -18,9 +18,9 @@ component: Systemd units
 ## Socket units
  template: systemd_socket_units_state
        on: systemd.socket_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -32,9 +32,9 @@ component: Systemd units
 ## Target units
  template: systemd_target_units_state
        on: systemd.target_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -46,9 +46,9 @@ component: Systemd units
 ## Path units
  template: systemd_path_units_state
        on: systemd.path_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -60,9 +60,9 @@ component: Systemd units
 ## Device units
  template: systemd_device_units_state
        on: systemd.device_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -74,9 +74,9 @@ component: Systemd units
 ## Mount units
  template: systemd_mount_units_state
        on: systemd.mount_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -88,9 +88,9 @@ component: Systemd units
 ## Automount units
  template: systemd_automount_units_state
        on: systemd.automount_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -102,9 +102,9 @@ component: Systemd units
 ## Swap units
  template: systemd_swap_units_state
        on: systemd.swap_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -116,9 +116,9 @@ component: Systemd units
 ## Scope units
  template: systemd_scope_units_state
        on: systemd.scope_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
@@ -130,9 +130,9 @@ component: Systemd units
 ## Slice units
  template: systemd_slice_units_state
        on: systemd.slice_unit_state
-    class: Linux
+    class: Errors
+     type: Linux
 component: Systemd units
-     type: Errors
    lookup: max -1s min2max
     units: ok/failed
     every: 10s
diff --git a/health/health.d/tcp_conn.conf b/health/health.d/tcp_conn.conf
index f2c5e4e5d..67b3bee53 100644
--- a/health/health.d/tcp_conn.conf
+++ b/health/health.d/tcp_conn.conf
@@ -7,9 +7,9 @@
 
     alarm: tcp_connections
        on: ipv4.tcpsock
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
      calc: (${tcp_max_connections} > 0) ? ( ${connections} * 100 / ${tcp_max_connections} ) : 0
diff --git a/health/health.d/tcp_listen.conf b/health/health.d/tcp_listen.conf
index 51a0e461c..d4bcfa248 100644
--- a/health/health.d/tcp_listen.conf
+++ b/health/health.d/tcp_listen.conf
@@ -20,9 +20,9 @@
 
     alarm: 1m_tcp_accept_queue_overflows
        on: ip.tcp_accept_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of ListenOverflows
@@ -38,9 +38,9 @@ component: Network
 # CHECK: https://github.com/netdata/netdata/issues/3234#issuecomment-423935842
     alarm: 1m_tcp_accept_queue_drops
        on: ip.tcp_accept_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of ListenDrops
@@ -63,9 +63,9 @@ component: Network
 
     alarm: 1m_tcp_syn_queue_drops
        on: ip.tcp_syn_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of TCPReqQFullDrop
@@ -80,9 +80,9 @@ component: Network
 
     alarm: 1m_tcp_syn_queue_cookies
        on: ip.tcp_syn_queue
-    class: System
+    class: Workload
+     type: System
 component: Network
-     type: Workload
        os: linux
     hosts: *
    lookup: average -60s unaligned absolute of TCPReqQFullDoCookies
diff --git a/health/health.d/tcp_mem.conf b/health/health.d/tcp_mem.conf
index 646e5c6da..318be20ac 100644
--- a/health/health.d/tcp_mem.conf
+++ b/health/health.d/tcp_mem.conf
@@ -8,9 +8,9 @@
 
     alarm: tcp_memory
        on: ipv4.sockstat_tcp_mem
-    class: System
+    class: Utilization
+     type: System
 component: Network
-     type: Utilization
        os: linux
     hosts: *
      calc: ${mem} * 100 / ${tcp_mem_high}
diff --git a/health/health.d/tcp_orphans.conf b/health/health.d/tcp_orphans.conf
index 6e94d67d1..cbd628da5 100644
--- a/health/health.d/tcp_orphans.conf
+++ b/health/health.d/tcp_orphans.conf
@@ -9,9 +9,9 @@
 
     alarm: tcp_orphans
        on: ipv4.sockstat_tcp_sockets
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
      calc: ${orphan} * 100 / ${tcp_max_orphans}
diff --git a/health/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf
index 41355dad6..190271e47 100644
--- a/health/health.d/tcp_resets.conf
+++ b/health/health.d/tcp_resets.conf
@@ -6,9 +6,9 @@
 
     alarm: 1m_ipv4_tcp_resets_sent
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m at -10s unaligned absolute of OutRsts
@@ -18,9 +18,9 @@ component: Network
 
     alarm: 10s_ipv4_tcp_resets_sent
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -10s unaligned absolute of OutRsts
@@ -40,9 +40,9 @@ component: Network
 
     alarm: 1m_ipv4_tcp_resets_received
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -1m at -10s unaligned absolute of AttemptFails
@@ -52,9 +52,9 @@ component: Network
 
     alarm: 10s_ipv4_tcp_resets_received
        on: ipv4.tcphandshake
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -10s unaligned absolute of AttemptFails
diff --git a/health/health.d/timex.conf b/health/health.d/timex.conf
new file mode 100644
index 000000000..ea90c4000
--- /dev/null
+++ b/health/health.d/timex.conf
@@ -0,0 +1,17 @@
+
+# It can take several minutes before ntpd selects a server to synchronize with;
+# try checking after 17 minutes (1024 seconds).
+
+    alarm: system_clock_sync_state
+       on: system.clock_sync_state
+       os: linux
+    class: Error
+     type: System
+component: Clock
+     calc: $state
+    units: synchronization state
+    every: 10s
+     warn: $system.uptime.uptime > 17 * 60 AND $this == 0
+    delay: down 5m
+     info: the system time is not synchronized to a reliable server
+       to: silent
diff --git a/health/health.d/udp_errors.conf b/health/health.d/udp_errors.conf
index 342a1aedd..64f47dfa7 100644
--- a/health/health.d/udp_errors.conf
+++ b/health/health.d/udp_errors.conf
@@ -6,9 +6,9 @@
 
     alarm: 1m_ipv4_udp_receive_buffer_errors
        on: ipv4.udperrors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux freebsd
     hosts: *
    lookup: average -1m unaligned absolute of RcvbufErrors
@@ -24,9 +24,9 @@ component: Network
 
     alarm: 1m_ipv4_udp_send_buffer_errors
        on: ipv4.udperrors
-    class: System
+    class: Errors
+     type: System
 component: Network
-     type: Errors
        os: linux
     hosts: *
    lookup: average -1m unaligned absolute of SndbufErrors
diff --git a/health/health.d/unbound.conf b/health/health.d/unbound.conf
index 1df15474f..4e8d164d2 100644
--- a/health/health.d/unbound.conf
+++ b/health/health.d/unbound.conf
@@ -1,27 +1,11 @@
 
-# make sure unbound is running
-
- template: unbound_last_collected_secs
-       on: unbound.queries
-    class: DNS
-component: Unbound
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure there is no overwritten/dropped queries in the request-list
 
  template: unbound_request_list_overwritten
        on: unbound.request_list_jostle_list
-    class: DNS
+    class: Errors
+     type: DNS
 component: Unbound
-     type: Errors
    lookup: average -60s unaligned absolute match-names of overwritten
     units: queries
     every: 10s
@@ -32,9 +16,9 @@ component: Unbound
 
  template: unbound_request_list_dropped
        on: unbound.request_list_jostle_list
-    class: DNS
+    class: Errors
+     type: DNS
 component: Unbound
-     type: Errors
    lookup: average -60s unaligned absolute match-names of dropped
     units: queries
     every: 10s
diff --git a/health/health.d/varnish.conf b/health/health.d/varnish.conf
deleted file mode 100644
index 7f3bd6c82..000000000
--- a/health/health.d/varnish.conf
+++ /dev/null
@@ -1,12 +0,0 @@
-    alarm: varnish_last_collected
-       on: varnish.uptime
-    class: Web Proxy
-component: Varnish
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/vcsa.conf b/health/health.d/vcsa.conf
index 8538e488c..a9cc7ceef 100644
--- a/health/health.d/vcsa.conf
+++ b/health/health.d/vcsa.conf
@@ -1,20 +1,4 @@
 
-# make sure vcsa is running and responding
-
- template: vcsa_last_collected_secs
-       on: vcsa.system_health
-    class: Virtual Machine
-component: VMware vCenter
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Overall system health:
 #  - 0: all components are healthy.
 #  - 1: one or more components might become overloaded soon.
@@ -24,9 +8,9 @@ component: VMware vCenter
 
  template: vcsa_system_health
        on: vcsa.system_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of system
     units: status
     every: 10s
@@ -46,9 +30,9 @@ component: VMware vCenter
 
  template: vcsa_swap_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of swap
     units: status
     every: 10s
@@ -61,9 +45,9 @@ component: VMware vCenter
 
  template: vcsa_storage_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of storage
     units: status
     every: 10s
@@ -76,9 +60,9 @@ component: VMware vCenter
 
  template: vcsa_mem_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of mem
     units: status
     every: 10s
@@ -91,9 +75,9 @@ component: VMware vCenter
 
  template: vcsa_load_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: VMware vCenter
-     type: Utilization
    lookup: max -10s unaligned of load
     units: status
     every: 10s
@@ -106,9 +90,9 @@ component: VMware vCenter
 
  template: vcsa_database_storage_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of database_storage
     units: status
     every: 10s
@@ -121,9 +105,9 @@ component: VMware vCenter
 
  template: vcsa_applmgmt_health
        on: vcsa.components_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of applmgmt
     units: status
     every: 10s
@@ -143,9 +127,9 @@ component: VMware vCenter
 
  template: vcsa_software_updates_health
        on: vcsa.software_updates_health
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: VMware vCenter
-     type: Errors
    lookup: max -10s unaligned of software_packages
     units: status
     every: 10s
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf
index 737147f38..cfbe2a524 100644
--- a/health/health.d/vernemq.conf
+++ b/health/health.d/vernemq.conf
@@ -1,27 +1,11 @@
 
-# Availability
-
- template: vernemq_last_collected_secs
-       on: vernemq.node_uptime
-    class: Messaging
-component: VerneMQ
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Socket errors
 
  template: vernemq_socket_errors
        on: vernemq.socket_errors
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: sum -1m unaligned absolute of socket_error
     units: errors
     every: 1m
@@ -34,9 +18,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_drop
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute of queue_message_drop
     units: dropped messages
     every: 1m
@@ -47,9 +31,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_expired
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Latency
+     type: Messaging
 component: VerneMQ
-     type: Latency
    lookup: average -1m unaligned absolute of queue_message_expired
     units: expired messages
     every: 1m
@@ -60,9 +44,9 @@ component: VerneMQ
 
  template: vernemq_queue_message_unhandled
        on: vernemq.queue_undelivered_messages
-    class: Messaging
+    class: Latency
+     type: Messaging
 component: VerneMQ
-     type: Latency
    lookup: average -1m unaligned absolute of queue_message_unhandled
     units: unhandled messages
     every: 1m
@@ -75,9 +59,9 @@ component: VerneMQ
 
  template: vernemq_average_scheduler_utilization
        on: vernemq.average_scheduler_utilization
-    class: Messaging
+    class: Utilization
+     type: Messaging
 component: VerneMQ
-     type: Utilization
    lookup: average -10m unaligned
     units: %
     every: 1m
@@ -91,9 +75,9 @@ component: VerneMQ
 
  template: vernemq_cluster_dropped
        on: vernemq.cluster_dropped
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: sum -1m unaligned
     units: KiB
     every: 1m
@@ -104,9 +88,9 @@ component: VerneMQ
 
  template: vernemq_netsplits
        on: vernemq.netsplits
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: sum -1m unaligned absolute of netsplit_detected
     units: netsplits
     every: 10s
@@ -119,9 +103,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_connack_sent_reason_unsuccessful
        on: vernemq.mqtt_connack_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -134,9 +118,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_disconnect_received_reason_not_normal
        on: vernemq.mqtt_disconnect_received_reason
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
     units: packets
     every: 1m
@@ -147,9 +131,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_disconnect_sent_reason_not_normal
        on: vernemq.mqtt_disconnect_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
     units: packets
     every: 1m
@@ -162,9 +146,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_subscribe_error
        on: vernemq.mqtt_subscribe_error
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -175,9 +159,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_subscribe_auth_error
        on: vernemq.mqtt_subscribe_auth_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: attempts
     every: 1m
@@ -190,9 +174,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_unsubscribe_error
        on: vernemq.mqtt_unsubscribe_error
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -205,9 +189,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_publish_errors
        on: vernemq.mqtt_publish_errors
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute
     units: failed ops
     every: 1m
@@ -218,9 +202,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_publish_auth_errors
        on: vernemq.mqtt_publish_auth_errors
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: attempts
     every: 1m
@@ -233,9 +217,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_received_reason_unsuccessful
        on: vernemq.mqtt_puback_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -246,9 +230,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_sent_reason_unsuccessful
        on: vernemq.mqtt_puback_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -259,9 +243,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_puback_unexpected
        on: vernemq.mqtt_puback_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
@@ -274,9 +258,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_received_reason_unsuccessful
        on: vernemq.mqtt_pubrec_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -287,9 +271,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
        on: vernemq.mqtt_pubrec_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -300,9 +284,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrec_invalid_error
        on: vernemq.mqtt_pubrec_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
@@ -315,9 +299,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrel_received_reason_unsuccessful
        on: vernemq.mqtt_pubrel_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -328,9 +312,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
        on: vernemq.mqtt_pubrel_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -343,9 +327,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
        on: vernemq.mqtt_pubcomp_received_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -356,9 +340,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
        on: vernemq.mqtt_pubcomp_sent_reason
-    class: Messaging
+    class: Errors
+     type: Messaging
 component: VerneMQ
-     type: Errors
    lookup: average -1m unaligned absolute match-names of !success,*
     units: packets
     every: 1m
@@ -369,9 +353,9 @@ component: VerneMQ
 
  template: vernemq_mqtt_pubcomp_unexpected
        on: vernemq.mqtt_pubcomp_invalid_error
-    class: Messaging
+    class: Workload
+     type: Messaging
 component: VerneMQ
-     type: Workload
    lookup: average -1m unaligned absolute
     units: messages
     every: 1m
diff --git a/health/health.d/vsphere.conf b/health/health.d/vsphere.conf
index aee7c5cd4..d8fc899b9 100644
--- a/health/health.d/vsphere.conf
+++ b/health/health.d/vsphere.conf
@@ -6,9 +6,9 @@
 
  template: vsphere_vm_mem_usage
        on: vsphere.vm_mem_usage_percentage
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: Memory
-     type: Utilization
     hosts: *
      calc: $used
     units: %
@@ -23,9 +23,9 @@ component: Memory
 
  template: vsphere_host_mem_usage
        on: vsphere.host_mem_usage_percentage
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: Memory
-     type: Utilization
     hosts: *
      calc: $used
     units: %
@@ -39,9 +39,9 @@ component: Memory
 
  template: vsphere_inbound_packets_errors
        on: vsphere.net_errors_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -51,9 +51,9 @@ component: Network
 
  template: vsphere_outbound_packets_errors
        on: vsphere.net_errors_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -65,9 +65,9 @@ component: Network
 
  template: vsphere_inbound_packets_errors_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -81,9 +81,9 @@ component: Network
 
  template: vsphere_outbound_packets_errors_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -100,9 +100,9 @@ component: Network
 
  template: vsphere_cpu_usage
        on: vsphere.cpu_usage_total
-    class: Virtual Machine
+    class: Utilization
+     type: Virtual Machine
 component: CPU
-     type: Utilization
     hosts: *
    lookup: average -10m unaligned match-names of used
     units: %
@@ -117,9 +117,9 @@ component: CPU
 
  template: vsphere_inbound_packets_dropped
        on: vsphere.net_drops_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -129,9 +129,9 @@ component: Network
 
  template: vsphere_outbound_packets_dropped
        on: vsphere.net_drops_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
@@ -143,9 +143,9 @@ component: Network
 
  template: vsphere_inbound_packets_dropped_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of rx
@@ -159,9 +159,9 @@ component: Network
 
  template: vsphere_outbound_packets_dropped_ratio
        on: vsphere.net_packets_total
-    class: Virtual Machine
+    class: Errors
+     type: Virtual Machine
 component: Network
-     type: Errors
     hosts: *
  families: *
    lookup: sum -10m unaligned absolute match-names of tx
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index 127c9a9c6..454e0abef 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -1,22 +1,4 @@
 
-# make sure we can collect web log data
-
- template: last_collected_secs
-       on: web_log.response_codes
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # -----------------------------------------------------------------------------
 # high level response code alarms
 
@@ -29,9 +11,9 @@ component: Web log
 
  template: 1m_requests
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -41,9 +23,9 @@ component: Web log
 
  template: 1m_successful
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of successful_requests
      calc: $this * 100 / $1m_requests
@@ -57,41 +39,39 @@ component: Web log
 
  template: 1m_redirects
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of redirects
      calc: $this * 100 / $1m_requests
     units: %
     every: 10s
      warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? (  1 ) : ( 20 )) ) : ( 0 )
-     crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of redirection HTTP requests over the last minute (3xx except 304)
        to: webmaster
 
  template: 1m_bad_requests
        on: web_log.response_statuses
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of bad_requests
      calc: $this * 100 / $1m_requests
     units: %
     every: 10s
      warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 10 ) : ( 30 )) ) : ( 0 )
-     crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of client error HTTP requests over the last minute (4xx except 401)
        to: webmaster
 
  template: 1m_internal_errors
        on: web_log.response_statuses
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of server_errors
      calc: $this * 100 / $1m_requests
@@ -114,9 +94,9 @@ component: Web log
 
  template: 1m_total_requests
        on: web_log.response_codes
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -126,9 +106,9 @@ component: Web log
 
  template: 1m_unmatched
        on: web_log.response_codes
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of unmatched
      calc: $this * 100 / $1m_total_requests
@@ -151,9 +131,9 @@ component: Web log
 
  template: 10m_response_time
        on: web_log.response_time
-    class: System
+    class: Latency
+     type: System
 component: Web log
-     type: Latency
  families: *
    lookup: average -10m unaligned of avg
     units: ms
@@ -162,9 +142,9 @@ component: Web log
 
  template: web_slow
        on: web_log.response_time
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: Web log
-     type: Latency
  families: *
    lookup: average -1m unaligned of avg
     units: ms
@@ -191,9 +171,9 @@ component: Web log
 
  template: 5m_successful_old
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m at -5m unaligned of successful_requests
     units: requests/s
@@ -202,9 +182,9 @@ component: Web log
 
  template: 5m_successful
        on: web_log.response_statuses
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m unaligned of successful_requests
     units: requests/s
@@ -213,9 +193,9 @@ component: Web log
 
  template: 5m_requests_ratio
        on: web_log.response_codes
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
      calc: ($5m_successful_old > 0)?($5m_successful * 100 / $5m_successful_old):(100)
     units: %
@@ -233,23 +213,6 @@ component: Web log
 
 # ---------------------------------------------------GO-VERSION---------------------------------------------------------
 
-# make sure we can collect web log data
-
- template: web_log_last_collected_secs
-       on: web_log.requests
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
 # unmatched lines
 
 # the following alarms trigger only when there are enough data.
@@ -261,9 +224,9 @@ component: Web log
 
  template: web_log_1m_total_requests
        on: web_log.requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -273,9 +236,9 @@ component: Web log
 
  template: web_log_1m_unmatched
        on: web_log.excluded_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of unmatched
      calc: $this * 100 / $web_log_1m_total_requests
@@ -298,9 +261,9 @@ component: Web log
 
  template: web_log_1m_requests
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned
      calc: ($this == 0)?(1):($this)
@@ -310,9 +273,9 @@ component: Web log
 
  template: web_log_1m_successful
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of success
      calc: $this * 100 / $web_log_1m_requests
@@ -326,41 +289,39 @@ component: Web log
 
  template: web_log_1m_redirects
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: sum -1m unaligned of redirect
      calc: $this * 100 / $web_log_1m_requests
     units: %
     every: 10s
      warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING ) ? (  1 ) : ( 20 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of redirection HTTP requests over the last minute (3xx except 304)
        to: webmaster
 
  template: web_log_1m_bad_requests
        on: web_log.type_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of bad
      calc: $this * 100 / $web_log_1m_requests
     units: %
     every: 10s
      warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING)  ? ( 10 ) : ( 30 )) ) : ( 0 )
-     crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
     delay: up 2m down 15m multiplier 1.5 max 1h
      info: ratio of client error HTTP requests over the last minute (4xx except 401)
        to: webmaster
 
  template: web_log_1m_internal_errors
        on: web_log.type_requests
-    class: Web Server
+    class: Errors
+     type: Web Server
 component: Web log
-     type: Errors
  families: *
    lookup: sum -1m unaligned of error
      calc: $this * 100 / $web_log_1m_requests
@@ -384,9 +345,9 @@ component: Web log
 
  template: web_log_10m_response_time
        on: web_log.request_processing_time
-    class: System
+    class: Latency
+     type: System
 component: Web log
-     type: Latency
  families: *
    lookup: average -10m unaligned of avg
     units: ms
@@ -395,9 +356,9 @@ component: Web log
 
  template: web_log_web_slow
        on: web_log.request_processing_time
-    class: Web Server
+    class: Latency
+     type: Web Server
 component: Web log
-     type: Latency
  families: *
    lookup: average -1m unaligned of avg
     units: ms
@@ -424,9 +385,9 @@ component: Web log
 
  template: web_log_5m_successful_old
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m at -5m unaligned of success
     units: requests/s
@@ -435,9 +396,9 @@ component: Web log
 
  template: web_log_5m_successful
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
    lookup: average -5m unaligned of success
     units: requests/s
@@ -446,9 +407,9 @@ component: Web log
 
  template: web_log_5m_requests_ratio
        on: web_log.type_requests
-    class: Web Server
+    class: Workload
+     type: Web Server
 component: Web log
-     type: Workload
  families: *
      calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100)
     units: %
diff --git a/health/health.d/whoisquery.conf b/health/health.d/whoisquery.conf
index c6d3a9de0..be5eb58f9 100644
--- a/health/health.d/whoisquery.conf
+++ b/health/health.d/whoisquery.conf
@@ -1,26 +1,9 @@
 
-# make sure whoisquery is running
-
- template: whoisquery_last_collected_secs
-       on: whoisquery.time_until_expiration
-    class: Other
-component: WHOIS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: whoisquery_days_until_expiration
        on: whoisquery.time_until_expiration
-    class: Other
+    class: Utilization
+     type: Other
 component: WHOIS
-     type: Utilization
      calc: $expiry
     units: seconds
     every: 60s
diff --git a/health/health.d/wmi.conf b/health/health.d/wmi.conf
index 6bd4e077f..90d39ce9d 100644
--- a/health/health.d/wmi.conf
+++ b/health/health.d/wmi.conf
@@ -1,29 +1,11 @@
 
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-## Availability
-
- template: wmi_last_collected_secs
-       on: cpu.collector_duration
-    class: Windows
-component: Availability
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 ## CPU
 
  template: wmi_10min_cpu_usage
        on: wmi.cpu_utilization_total
-    class: Windows
+    class: Utilization
+     type: Windows
 component: CPU
-     type: Utilization
        os: linux
     hosts: *
    lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt
@@ -40,9 +22,9 @@ component: CPU
 
  template: wmi_ram_in_use
        on: wmi.memory_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $available)
@@ -56,9 +38,9 @@ component: Memory
 
  template: wmi_swap_in_use
        on: wmi.memory_swap_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Memory
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $available)
@@ -75,9 +57,9 @@ component: Memory
 
  template: wmi_inbound_packets_discarded
        on: wmi.net_discarded
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -91,9 +73,9 @@ component: Network
 
  template: wmi_outbound_packets_discarded
        on: wmi.net_discarded
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -107,9 +89,9 @@ component: Network
 
  template: wmi_inbound_packets_errors
        on: wmi.net_errors
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -123,9 +105,9 @@ component: Network
 
  template: wmi_outbound_packets_errors
        on: wmi.net_errors
-    class: Windows
+    class: Errors
+     type: Windows
 component: Network
-     type: Errors
        os: linux
     hosts: *
  families: *
@@ -142,9 +124,9 @@ component: Network
 
  template: wmi_disk_in_use
        on: wmi.logical_disk_utilization
-    class: Windows
+    class: Utilization
+     type: Windows
 component: Disk
-     type: Utilization
        os: linux
     hosts: *
      calc: ($used) * 100 / ($used + $free)
diff --git a/health/health.d/x509check.conf b/health/health.d/x509check.conf
index 93c406b7a..fc69d0288 100644
--- a/health/health.d/x509check.conf
+++ b/health/health.d/x509check.conf
@@ -1,26 +1,9 @@
 
-# make sure x509check is running
-
- template: x509check_last_collected_secs
-       on: x509check.time_until_expiration
-    class: Certificates
-component: x509 certificates
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: x509check_days_until_expiration
        on: x509check.time_until_expiration
-    class: Certificates
+    class: Latency
+     type: Certificates
 component: x509 certificates
-     type: Latency
      calc: $expiry
     units: seconds
     every: 60s
@@ -31,9 +14,9 @@ component: x509 certificates
       
  template: x509check_revocation_status
        on: x509check.revocation_status
-    class: Certificates
+    class: Errors
+     type: Certificates
 component: x509 certificates
-     type: Errors
      calc: $revoked
     every: 60s
      crit: $this != nan AND $this != 0
diff --git a/health/health.d/zfs.conf b/health/health.d/zfs.conf
index d6f5fa2fe..785838d47 100644
--- a/health/health.d/zfs.conf
+++ b/health/health.d/zfs.conf
@@ -1,9 +1,9 @@
 
     alarm: zfs_memory_throttle
        on: zfs.memory_ops
-    class: System
+    class: Utilization
+     type: System
 component: File system
-     type: Utilization
    lookup: sum -10m unaligned absolute of throttled
     units: events
     every: 1m
@@ -16,9 +16,9 @@ component: File system
 
  template: zfs_pool_state_warn
        on: zfspool.state
-    class: System
+    class: Errors
+     type: System
 component: File system
-     type: Errors
      calc: $degraded
     units: boolean
     every: 10s
@@ -29,9 +29,9 @@ component: File system
 
  template: zfs_pool_state_crit
        on: zfspool.state
-    class: System
+    class: Errors
+     type: System
 component: File system
-     type: Errors
      calc: $faulted + $unavail
     units: boolean
     every: 10s
diff --git a/health/health.d/zookeeper.conf b/health/health.d/zookeeper.conf
deleted file mode 100644
index 8c7d5a73d..000000000
--- a/health/health.d/zookeeper.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure zookeeper is running
-
- template: zookeeper_last_collected_secs
-       on: zookeeper.requests
-    class: KV Storage
-component: ZooKeeper
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.h b/health/health.h
index 56331b227..09040b3a8 100644
--- a/health/health.h
+++ b/health/health.h
@@ -3,7 +3,7 @@
 #ifndef NETDATA_HEALTH_H
 #define NETDATA_HEALTH_H 1
 
-#include "../daemon/common.h"
+#include "daemon/common.h"
 
 #define NETDATA_PLUGIN_HOOK_HEALTH \
     { \
@@ -27,6 +27,7 @@ extern unsigned int default_health_enabled;
 #define HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS      0x00000040
 
 #define HEALTH_ENTRY_FLAG_SAVED                 0x10000000
+#define HEALTH_ENTRY_FLAG_ACLK_QUEUED           0x20000000
 #define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
 
 #ifndef HEALTH_LISTEN_PORT
@@ -63,6 +64,7 @@ extern ALARM_ENTRY* health_create_alarm_entry(
         RRDHOST *host,
         uint32_t alarm_id,
         uint32_t alarm_event_id,
+        uuid_t config_hash_id,
         time_t when,
         const char *name,
         const char *chart,
@@ -96,6 +98,8 @@ extern void *health_cmdapi_thread(void *ptr);
 
 extern void health_label_log_save(RRDHOST *host);
 
+extern char *health_edit_command_from_source(const char *source);
+
 extern SIMPLE_PATTERN *health_pattern_from_foreach(char *s);
 
 #endif //NETDATA_HEALTH_H
diff --git a/health/health_config.c b/health/health_config.c
index 756023715..35234df15 100644
--- a/health/health_config.c
+++ b/health/health_config.c
@@ -473,6 +473,29 @@ static inline char *health_source_file(size_t line, const char *file) {
     return strdupz(buffer);
 }
 
+char *health_edit_command_from_source(const char *source)
+{
+    char buffer[FILENAME_MAX + 1];
+    char *temp = strdupz(source);
+    char *line_num = strchr(temp, '@');
+    char *file_no_path = strrchr(temp, '/');
+
+    if (likely(file_no_path && line_num)) {
+        *line_num = '\0';
+        snprintfz(
+            buffer,
+            FILENAME_MAX,
+            "sudo %s/edit-config health.d/%s=%s",
+            netdata_configured_user_config_dir,
+            file_no_path + 1,
+            temp);
+    } else
+        buffer[0] = '\0';
+
+    freez(temp);
+    return strdupz(buffer);
+}
+
 static inline void strip_quotes(char *s) {
     while(*s) {
         if(*s == '\'' || *s == '"') *s = ' ';
@@ -480,6 +503,40 @@ static inline void strip_quotes(char *s) {
     }
 }
 
+static inline void alert_config_free(struct alert_config *cfg)
+{
+    freez(cfg->alarm);
+    freez(cfg->template_key);
+    freez(cfg->os);
+    freez(cfg->host);
+    freez(cfg->on);
+    freez(cfg->families);
+    freez(cfg->plugin);
+    freez(cfg->module);
+    freez(cfg->charts);
+    freez(cfg->lookup);
+    freez(cfg->calc);
+    freez(cfg->warn);
+    freez(cfg->crit);
+    freez(cfg->every);
+    freez(cfg->green);
+    freez(cfg->red);
+    freez(cfg->exec);
+    freez(cfg->to);
+    freez(cfg->units);
+    freez(cfg->info);
+    freez(cfg->classification);
+    freez(cfg->component);
+    freez(cfg->type);
+    freez(cfg->delay);
+    freez(cfg->options);
+    freez(cfg->repeat);
+    freez(cfg->host_labels);
+    freez(cfg->p_db_lookup_dimensions);
+    freez(cfg->p_db_lookup_method);
+    freez(cfg);
+}
+
 static int health_readfile(const char *filename, void *data) {
     RRDHOST *host = (RRDHOST *)data;
 
@@ -554,6 +611,7 @@ static int health_readfile(const char *filename, void *data) {
 
     RRDCALC *rc = NULL;
     RRDCALCTEMPLATE *rt = NULL;
+    struct alert_config *alert_cfg = NULL;
 
     int ignore_this = 0;
     size_t line = 0, append = 0;
@@ -603,16 +661,18 @@ static int health_readfile(const char *filename, void *data) {
 
         if(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
             if(rc) {
-                if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
+                if(ignore_this || !alert_hash_and_store_config(rc->config_hash_id, alert_cfg) || !rrdcalc_add_alarm_from_config(host, rc)) {
                     rrdcalc_free(rc);
+                    alert_config_free(alert_cfg);
                 }
                // health_add_alarms_loop(host, rc, ignore_this) ;
             }
 
             if(rt) {
-                if (ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
+                if (ignore_this || !alert_hash_and_store_config(rt->config_hash_id, alert_cfg) || !rrdcalctemplate_add_template_from_config(host, rt)) {
                     rrdcalctemplate_free(rt);
-
+                    alert_config_free(alert_cfg);
+                }
                 rt = NULL;
             }
 
@@ -629,25 +689,30 @@ static int health_readfile(const char *filename, void *data) {
             rc->old_status = RRDCALC_STATUS_UNINITIALIZED;
             rc->warn_repeat_every = host->health_default_warn_repeat_every;
             rc->crit_repeat_every = host->health_default_crit_repeat_every;
+            alert_cfg = callocz(1, sizeof(struct alert_config));
 
             if(rrdvar_fix_name(rc->name))
                 error("Health configuration renamed alarm '%s' to '%s'", value, rc->name);
 
+            alert_cfg->alarm = strdupz(rc->name);
             ignore_this = 0;
         }
         else if(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) {
             if(rc) {
 //                health_add_alarms_loop(host, rc, ignore_this) ;
-                if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
+                if(ignore_this || !alert_hash_and_store_config(rc->config_hash_id, alert_cfg) || !rrdcalc_add_alarm_from_config(host, rc)) {
                     rrdcalc_free(rc);
+                    alert_config_free(alert_cfg);
                 }
 
                 rc = NULL;
             }
 
             if(rt) {
-                if(ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
+                if(ignore_this || !alert_hash_and_store_config(rt->config_hash_id, alert_cfg) || !rrdcalctemplate_add_template_from_config(host, rt)) {
                     rrdcalctemplate_free(rt);
+                    alert_config_free(alert_cfg);
+                }
             }
 
             rt = callocz(1, sizeof(RRDCALCTEMPLATE));
@@ -659,14 +724,17 @@ static int health_readfile(const char *filename, void *data) {
             rt->delay_multiplier = 1.0;
             rt->warn_repeat_every = host->health_default_warn_repeat_every;
             rt->crit_repeat_every = host->health_default_crit_repeat_every;
+            alert_cfg = callocz(1, sizeof(struct alert_config));
 
             if(rrdvar_fix_name(rt->name))
                 error("Health configuration renamed template '%s' to '%s'", value, rt->name);
 
+            alert_cfg->template_key = strdupz(rt->name);
             ignore_this = 0;
         }
         else if(hash == hash_os && !strcasecmp(key, HEALTH_OS_KEY)) {
             char *os_match = value;
+            if (alert_cfg) alert_cfg->os = strdupz(value);
             SIMPLE_PATTERN *os_pattern = simple_pattern_create(os_match, NULL, SIMPLE_PATTERN_EXACT);
 
             if(!simple_pattern_matches(os_pattern, host->os)) {
@@ -683,6 +751,7 @@ static int health_readfile(const char *filename, void *data) {
         }
         else if(hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) {
             char *host_match = value;
+            if (alert_cfg) alert_cfg->host = strdupz(value);
             SIMPLE_PATTERN *host_pattern = simple_pattern_create(host_match, NULL, SIMPLE_PATTERN_EXACT);
 
             if(!simple_pattern_matches(host_pattern, host->hostname)) {
@@ -699,6 +768,7 @@ static int health_readfile(const char *filename, void *data) {
         }
         else if(rc) {
             if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
+                alert_cfg->on = strdupz(value);
                 if(rc->chart) {
                     if(strcmp(rc->chart, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -710,6 +780,7 @@ static int health_readfile(const char *filename, void *data) {
                 rc->hash_chart = simple_hash(rc->chart);
             }
             else if(hash == hash_class && !strcasecmp(key, HEALTH_CLASS_KEY)) {
+                alert_cfg->classification = strdupz(value);
                 if(rc->classification) {
                     if(strcmp(rc->classification, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -721,6 +792,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rc->classification);
             }
             else if(hash == hash_component && !strcasecmp(key, HEALTH_COMPONENT_KEY)) {
+                alert_cfg->component = strdupz(value);
                 if(rc->component) {
                     if(strcmp(rc->component, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -732,6 +804,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rc->component);
             }
             else if(hash == hash_type && !strcasecmp(key, HEALTH_TYPE_KEY)) {
+                alert_cfg->type = strdupz(value);
                 if(rc->type) {
                     if(strcmp(rc->type, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -743,18 +816,32 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rc->type);
             }
             else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
+                alert_cfg->lookup = strdupz(value);
                 health_parse_db_lookup(line, filename, value, &rc->group, &rc->after, &rc->before,
                         &rc->update_every, &rc->options, &rc->dimensions, &rc->foreachdim);
                 if(rc->foreachdim) {
                     rc->spdim = health_pattern_from_foreach(rc->foreachdim);
                 }
+                if (rc->after) {
+                    if (rc->dimensions)
+                        alert_cfg->p_db_lookup_dimensions = strdupz(rc->dimensions);
+                    if (rc->group)
+                        alert_cfg->p_db_lookup_method = strdupz(group_method2string(rc->group));
+                    alert_cfg->p_db_lookup_options = rc->options;
+                    alert_cfg->p_db_lookup_after = rc->after;
+                    alert_cfg->p_db_lookup_before = rc->before;
+                    alert_cfg->p_update_every = rc->update_every;
+                }
             }
             else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
+                alert_cfg->every = strdupz(value);
                 if(!config_parse_duration(value, &rc->update_every))
                     error("Health configuration at line %zu of file '%s' for alarm '%s' at key '%s' cannot parse duration: '%s'.",
                             line, filename, rc->name, key, value);
+                alert_cfg->p_update_every = rc->update_every;
             }
             else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
+                alert_cfg->green = strdupz(value);
                 char *e;
                 rc->green = str2ld(value, &e);
                 if(e && *e) {
@@ -763,6 +850,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
+                alert_cfg->red = strdupz(value);
                 char *e;
                 rc->red = str2ld(value, &e);
                 if(e && *e) {
@@ -771,6 +859,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
+                alert_cfg->calc = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rc->calculation = expression_parse(value, &failed_at, &error);
@@ -780,6 +869,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
+                alert_cfg->warn = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rc->warning = expression_parse(value, &failed_at, &error);
@@ -789,6 +879,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
+                alert_cfg->crit = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rc->critical = expression_parse(value, &failed_at, &error);
@@ -798,6 +889,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
+                alert_cfg->exec = strdupz(value);
                 if(rc->exec) {
                     if(strcmp(rc->exec, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -808,6 +900,7 @@ static int health_readfile(const char *filename, void *data) {
                 rc->exec = strdupz(value);
             }
             else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
+                alert_cfg->to = strdupz(value);
                 if(rc->recipient) {
                     if(strcmp(rc->recipient, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -818,6 +911,7 @@ static int health_readfile(const char *filename, void *data) {
                 rc->recipient = strdupz(value);
             }
             else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
+                alert_cfg->units = strdupz(value);
                 if(rc->units) {
                     if(strcmp(rc->units, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -829,6 +923,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rc->units);
             }
             else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
+                alert_cfg->info = strdupz(value);
                 if(rc->info) {
                     if(strcmp(rc->info, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -840,17 +935,21 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rc->info);
             }
             else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
+                alert_cfg->delay = strdupz(value);
                 health_parse_delay(line, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier);
             }
             else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+                alert_cfg->options = strdupz(value);
                 rc->options |= health_parse_options(value);
             }
             else if(hash == hash_repeat && !strcasecmp(key, HEALTH_REPEAT_KEY)){
+                alert_cfg->repeat = strdupz(value);
                 health_parse_repeat(line, filename, value,
                                     &rc->warn_repeat_every,
                                     &rc->crit_repeat_every);
             }
             else if(hash == hash_host_label && !strcasecmp(key, HEALTH_HOST_LABEL_KEY)) {
+                alert_cfg->host_labels = strdupz(value);
                 if(rc->labels) {
                     if(strcmp(rc->labels, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'.",
@@ -864,6 +963,7 @@ static int health_readfile(const char *filename, void *data) {
                 rc->splabels = simple_pattern_create(rc->labels, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_plugin && !strcasecmp(key, HEALTH_PLUGIN_KEY)) {
+                alert_cfg->plugin = strdupz(value);
                 freez(rc->plugin_match);
                 simple_pattern_free(rc->plugin_pattern);
 
@@ -871,6 +971,7 @@ static int health_readfile(const char *filename, void *data) {
                 rc->plugin_pattern = simple_pattern_create(rc->plugin_match, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_module && !strcasecmp(key, HEALTH_MODULE_KEY)) {
+                alert_cfg->module = strdupz(value);
                 freez(rc->module_match);
                 simple_pattern_free(rc->module_pattern);
 
@@ -884,6 +985,7 @@ static int health_readfile(const char *filename, void *data) {
         }
         else if(rt) {
             if(hash == hash_on && !strcasecmp(key, HEALTH_ON_KEY)) {
+                alert_cfg->on = strdupz(value);
                 if(rt->context) {
                     if(strcmp(rt->context, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -895,6 +997,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->hash_context = simple_hash(rt->context);
             }
             else if(hash == hash_class && !strcasecmp(key, HEALTH_CLASS_KEY)) {
+                alert_cfg->classification = strdupz(value);
                 if(rt->classification) {
                     if(strcmp(rt->classification, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -906,6 +1009,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rt->classification);
             }
             else if(hash == hash_component && !strcasecmp(key, HEALTH_COMPONENT_KEY)) {
+                alert_cfg->component = strdupz(value);
                 if(rt->component) {
                     if(strcmp(rt->component, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -917,6 +1021,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rt->component);
             }
             else if(hash == hash_type && !strcasecmp(key, HEALTH_TYPE_KEY)) {
+                alert_cfg->type = strdupz(value);
                 if(rt->type) {
                     if(strcmp(rt->type, value) != 0)
                         error("Health configuration at line %zu of file '%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -928,6 +1033,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rt->type);
             }
             else if(hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) {
+                alert_cfg->families = strdupz(value);
                 freez(rt->family_match);
                 simple_pattern_free(rt->family_pattern);
 
@@ -935,6 +1041,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->family_pattern = simple_pattern_create(rt->family_match, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_plugin && !strcasecmp(key, HEALTH_PLUGIN_KEY)) {
+                alert_cfg->plugin = strdupz(value);
                 freez(rt->plugin_match);
                 simple_pattern_free(rt->plugin_pattern);
 
@@ -942,6 +1049,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->plugin_pattern = simple_pattern_create(rt->plugin_match, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_module && !strcasecmp(key, HEALTH_MODULE_KEY)) {
+                alert_cfg->module = strdupz(value);
                 freez(rt->module_match);
                 simple_pattern_free(rt->module_pattern);
 
@@ -949,6 +1057,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->module_pattern = simple_pattern_create(rt->module_match, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_charts && !strcasecmp(key, HEALTH_CHARTS_KEY)) {
+                alert_cfg->charts = strdupz(value);
                 freez(rt->charts_match);
                 simple_pattern_free(rt->charts_pattern);
 
@@ -956,18 +1065,32 @@ static int health_readfile(const char *filename, void *data) {
                 rt->charts_pattern = simple_pattern_create(rt->charts_match, NULL, SIMPLE_PATTERN_EXACT);
             }
             else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
+                alert_cfg->lookup = strdupz(value);
                 health_parse_db_lookup(line, filename, value, &rt->group, &rt->after, &rt->before,
                         &rt->update_every, &rt->options, &rt->dimensions, &rt->foreachdim);
                 if(rt->foreachdim) {
                     rt->spdim = health_pattern_from_foreach(rt->foreachdim);
                 }
+                if (rt->after) {
+                    if (rt->dimensions)
+                        alert_cfg->p_db_lookup_dimensions = strdupz(rt->dimensions);
+                    if (rt->group)
+                        alert_cfg->p_db_lookup_method = strdupz(group_method2string(rt->group));
+                    alert_cfg->p_db_lookup_options = rt->options;
+                    alert_cfg->p_db_lookup_after = rt->after;
+                    alert_cfg->p_db_lookup_before = rt->before;
+                    alert_cfg->p_update_every = rt->update_every;
+                }
             }
             else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
+                alert_cfg->every = strdupz(value);
                 if(!config_parse_duration(value, &rt->update_every))
                     error("Health configuration at line %zu of file '%s' for template '%s' at key '%s' cannot parse duration: '%s'.",
                             line, filename, rt->name, key, value);
+                alert_cfg->p_update_every = rt->update_every;
             }
             else if(hash == hash_green && !strcasecmp(key, HEALTH_GREEN_KEY)) {
+                alert_cfg->green = strdupz(value);
                 char *e;
                 rt->green = str2ld(value, &e);
                 if(e && *e) {
@@ -976,6 +1099,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_red && !strcasecmp(key, HEALTH_RED_KEY)) {
+                alert_cfg->red = strdupz(value);
                 char *e;
                 rt->red = str2ld(value, &e);
                 if(e && *e) {
@@ -984,6 +1108,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_calc && !strcasecmp(key, HEALTH_CALC_KEY)) {
+                alert_cfg->calc = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rt->calculation = expression_parse(value, &failed_at, &error);
@@ -993,6 +1118,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_warn && !strcasecmp(key, HEALTH_WARN_KEY)) {
+                alert_cfg->warn = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rt->warning = expression_parse(value, &failed_at, &error);
@@ -1002,6 +1128,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_crit && !strcasecmp(key, HEALTH_CRIT_KEY)) {
+                alert_cfg->crit = strdupz(value);
                 const char *failed_at = NULL;
                 int error = 0;
                 rt->critical = expression_parse(value, &failed_at, &error);
@@ -1011,6 +1138,7 @@ static int health_readfile(const char *filename, void *data) {
                 }
             }
             else if(hash == hash_exec && !strcasecmp(key, HEALTH_EXEC_KEY)) {
+                alert_cfg->exec = strdupz(value);
                 if(rt->exec) {
                     if(strcmp(rt->exec, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -1021,6 +1149,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->exec = strdupz(value);
             }
             else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) {
+                alert_cfg->to = strdupz(value);
                 if(rt->recipient) {
                     if(strcmp(rt->recipient, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -1031,6 +1160,7 @@ static int health_readfile(const char *filename, void *data) {
                 rt->recipient = strdupz(value);
             }
             else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) {
+                alert_cfg->units = strdupz(value);
                 if(rt->units) {
                     if(strcmp(rt->units, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -1042,6 +1172,7 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rt->units);
             }
             else if(hash == hash_info && !strcasecmp(key, HEALTH_INFO_KEY)) {
+                alert_cfg->info = strdupz(value);
                 if(rt->info) {
                     if(strcmp(rt->info, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -1053,17 +1184,21 @@ static int health_readfile(const char *filename, void *data) {
                 strip_quotes(rt->info);
             }
             else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
+                alert_cfg->delay = strdupz(value);
                 health_parse_delay(line, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier);
             }
             else if(hash == hash_options && !strcasecmp(key, HEALTH_OPTIONS_KEY)) {
+                alert_cfg->options = strdupz(value);
                 rt->options |= health_parse_options(value);
             }
             else if(hash == hash_repeat && !strcasecmp(key, HEALTH_REPEAT_KEY)){
+                alert_cfg->repeat = strdupz(value);
                 health_parse_repeat(line, filename, value,
                                     &rt->warn_repeat_every,
                                     &rt->crit_repeat_every);
             }
             else if(hash == hash_host_label && !strcasecmp(key, HEALTH_HOST_LABEL_KEY)) {
+                alert_cfg->host_labels = strdupz(value);
                 if(rt->labels) {
                     if(strcmp(rt->labels, value) != 0)
                         error("Health configuration at line %zu of file '%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').",
@@ -1089,16 +1224,20 @@ static int health_readfile(const char *filename, void *data) {
 
     if(rc) {
         //health_add_alarms_loop(host, rc, ignore_this) ;
-        if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
+        if(ignore_this || !alert_hash_and_store_config(rc->config_hash_id, alert_cfg) || !rrdcalc_add_alarm_from_config(host, rc)) {
             rrdcalc_free(rc);
         }
     }
 
     if(rt) {
-        if(ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
+        if(ignore_this || !alert_hash_and_store_config(rt->config_hash_id, alert_cfg) || !rrdcalctemplate_add_template_from_config(host, rt)) {
             rrdcalctemplate_free(rt);
+        }
     }
 
+    if (alert_cfg)
+        alert_config_free(alert_cfg);
+
     fclose(fp);
     return 1;
 }
diff --git a/health/health_json.c b/health/health_json.c
index 4df44611c..a21d5a4fd 100644
--- a/health/health_json.c
+++ b/health/health_json.c
@@ -14,12 +14,19 @@ void health_string2json(BUFFER *wb, const char *prefix, const char *label, const
 }
 
 void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
+    char *edit_command = ae->source ? health_edit_command_from_source(ae->source) : strdupz("UNKNOWN=0");
+    char config_hash_id[GUID_LEN + 1];
+    uuid_unparse_lower(ae->config_hash_id, config_hash_id);
+
     buffer_sprintf(wb,
             "\n\t{\n"
                     "\t\t\"hostname\": \"%s\",\n"
+                    "\t\t\"utc_offset\": %d,\n"
+                    "\t\t\"timezone\": \"%s\",\n"
                     "\t\t\"unique_id\": %u,\n"
                     "\t\t\"alarm_id\": %u,\n"
                     "\t\t\"alarm_event_id\": %u,\n"
+                    "\t\t\"config_hash_id\": \"%s\",\n"
                     "\t\t\"name\": \"%s\",\n"
                     "\t\t\"chart\": \"%s\",\n"
                     "\t\t\"family\": \"%s\",\n"
@@ -34,6 +41,7 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
                     "\t\t\"recipient\": \"%s\",\n"
                     "\t\t\"exec_code\": %d,\n"
                     "\t\t\"source\": \"%s\",\n"
+                    "\t\t\"command\": \"%s\",\n"
                     "\t\t\"units\": \"%s\",\n"
                     "\t\t\"when\": %lu,\n"
                     "\t\t\"duration\": %lu,\n"
@@ -49,9 +57,12 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
                     "\t\t\"last_repeat\": \"%lu\",\n"
                     "\t\t\"silenced\": \"%s\",\n"
                    , host->hostname
+                   , host->utc_offset
+                   , host->abbrev_timezone
                    , ae->unique_id
                    , ae->alarm_id
                    , ae->alarm_event_id
+                   , config_hash_id
                    , ae->name
                    , ae->chart
                    , ae->family
@@ -66,6 +77,7 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
                    , ae->recipient?ae->recipient:host->health_default_recipient
                    , ae->exec_code
                    , ae->source
+                   , edit_command
                    , ae->units?ae->units:""
                    , (unsigned long)ae->when
                    , (unsigned long)ae->duration
@@ -114,6 +126,7 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
     buffer_strcat(wb, "\t}");
 
     freez(replaced_info);
+    freez(edit_command);
 }
 
 void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart) {
@@ -178,9 +191,13 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
         }
     }
 
+    char hash_id[GUID_LEN + 1];
+    uuid_unparse_lower(rc->config_hash_id, hash_id);
+
     buffer_sprintf(wb,
             "\t\t\"%s.%s\": {\n"
                     "\t\t\t\"id\": %lu,\n"
+                    "\t\t\t\"config_hash_id\": \"%s\",\n"
                     "\t\t\t\"name\": \"%s\",\n"
                     "\t\t\t\"chart\": \"%s\",\n"
                     "\t\t\t\"family\": \"%s\",\n"
@@ -212,6 +229,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
                     "\t\t\t\"last_repeat\": \"%lu\",\n"
                    , rc->chart, rc->name
                    , (unsigned long)rc->id
+                   , hash_id
                    , rc->name
                    , rc->chart
                    , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
diff --git a/health/health_log.c b/health/health_log.c
index de0a0883b..d20085d9e 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -38,39 +38,41 @@ static inline void health_log_rotate(RRDHOST *host) {
     }
 
     if(unlikely(host->health_log_entries_written > rotate_every)) {
-        health_alarm_log_close(host);
+        if(unlikely(host->health_log_fp)) {
+            health_alarm_log_close(host);
 
-        char old_filename[FILENAME_MAX + 1];
-        snprintfz(old_filename, FILENAME_MAX, "%s.old", host->health_log_filename);
+            char old_filename[FILENAME_MAX + 1];
+            snprintfz(old_filename, FILENAME_MAX, "%s.old", host->health_log_filename);
 
-        if(unlink(old_filename) == -1 && errno != ENOENT)
-            error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, old_filename);
+            if(unlink(old_filename) == -1 && errno != ENOENT)
+                error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, old_filename);
 
-        if(link(host->health_log_filename, old_filename) == -1 && errno != ENOENT)
-            error("HEALTH [%s]: cannot move file '%s' to '%s'.", host->hostname, host->health_log_filename, old_filename);
+            if(link(host->health_log_filename, old_filename) == -1 && errno != ENOENT)
+                error("HEALTH [%s]: cannot move file '%s' to '%s'.", host->hostname, host->health_log_filename, old_filename);
 
-        if(unlink(host->health_log_filename) == -1 && errno != ENOENT)
-            error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, host->health_log_filename);
+            if(unlink(host->health_log_filename) == -1 && errno != ENOENT)
+                error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, host->health_log_filename);
 
-        // open it with truncate
-        host->health_log_fp = fopen(host->health_log_filename, "w");
+            // open it with truncate
+            host->health_log_fp = fopen(host->health_log_filename, "w");
 
-        if(host->health_log_fp)
-            fclose(host->health_log_fp);
-        else
-            error("HEALTH [%s]: cannot truncate health log '%s'", host->hostname, host->health_log_filename);
+            if(host->health_log_fp)
+                fclose(host->health_log_fp);
+            else
+                error("HEALTH [%s]: cannot truncate health log '%s'", host->hostname, host->health_log_filename);
 
-        host->health_log_fp = NULL;
+            host->health_log_fp = NULL;
 
-        host->health_log_entries_written = 0;
-        health_alarm_log_open(host);
+            host->health_log_entries_written = 0;
+            health_alarm_log_open(host);
+        }
     }
 }
 
 inline void health_label_log_save(RRDHOST *host) {
     health_log_rotate(host);
 
-    if(likely(host->health_log_fp)) {
+    if(unlikely(host->health_log_fp)) {
         BUFFER *wb = buffer_create(1024);
         rrdhost_check_rdlock(host);
         netdata_rwlock_rdlock(&host->labels.labels_rwlock);
@@ -101,7 +103,7 @@ inline void health_label_log_save(RRDHOST *host) {
 
 inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
     health_log_rotate(host);
-    if(likely(host->health_log_fp)) {
+    if(unlikely(host->health_log_fp)) {
         if(unlikely(fprintf(host->health_log_fp
                             , "%c\t%s"
                         "\t%08x\t%08x\t%08x\t%08x\t%08x"
@@ -155,13 +157,12 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
             ae->flags |= HEALTH_ENTRY_FLAG_SAVED;
             host->health_log_entries_written++;
         }
-    }
+    }else
+        sql_health_alarm_log_save(host, ae);
+
 #ifdef ENABLE_ACLK
     if (netdata_cloud_setting) {
-        if ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
-            ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL))) {
-            aclk_update_alarm(host, ae);
-        }
+        sql_queue_alarm_to_aclk(host, ae);
     }
 #endif
 }
@@ -368,7 +369,7 @@ static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char
 
             ae->last_repeat = last_repeat;
 
-            if (likely(entries > 28)) {
+            if (likely(entries > 30)) {
                 freez(ae->classification);
                 ae->classification = strdupz(pointers[28]);
                 if(!*ae->classification) { freez(ae->classification); ae->classification = NULL; }
@@ -392,9 +393,13 @@ static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char
             if(unlikely(*pointers[0] == 'A')) {
                 ae->next = host->health_log.alarms;
                 host->health_log.alarms = ae;
+                sql_health_alarm_log_insert(host, ae);
                 loaded++;
             }
-            else updated++;
+            else {
+                sql_health_alarm_log_update(host, ae);
+                updated++;
+            }
 
             if(unlikely(ae->unique_id > host->health_max_unique_id))
                 host->health_max_unique_id = ae->unique_id;
@@ -444,8 +449,6 @@ inline void health_alarm_log_load(RRDHOST *host) {
         health_alarm_log_read(host, fp, host->health_log_filename);
         fclose(fp);
     }
-
-    health_alarm_log_open(host);
 }
 
 
@@ -456,6 +459,7 @@ inline ALARM_ENTRY* health_create_alarm_entry(
         RRDHOST *host,
         uint32_t alarm_id,
         uint32_t alarm_event_id,
+        uuid_t config_hash_id,
         time_t when,
         const char *name,
         const char *chart,
@@ -487,6 +491,8 @@ inline ALARM_ENTRY* health_create_alarm_entry(
         ae->hash_chart = simple_hash(ae->chart);
     }
 
+    uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
+
     if(family)
         ae->family = strdupz(family);
 
diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in
index 9a3a80ad6..08a32ff10 100755
--- a/health/notifications/alarm-notify.sh.in
+++ b/health/notifications/alarm-notify.sh.in
@@ -239,6 +239,11 @@ else
   calc_param_values="${22}"  # the values of the parameters in the expression, at the time of the evaluation
   total_warnings="${23}"     # Total number of alarms in WARNING state
   total_critical="${24}"     # Total number of alarms in CRITICAL state
+  total_warn_alarms="${25}"  # List of alarms in warning state
+  total_crit_alarms="${26}"  # List of alarms in critical state
+  classification="${27}"     # The class field from .conf files
+  edit_command_line="${28}"  # The command to edit the alarm, with the line number
+  sender_host="${29}"        # The host sending this notification
 fi
 
 # -----------------------------------------------------------------------------
@@ -252,6 +257,17 @@ else
   host="${args_host}"
 fi
 
+# -----------------------------------------------------------------------------
+# Do the same for sender_host (find a suitable hostname to use, if netdata did not supply a hostname)
+
+if [ -z ${sender_host} ]; then
+  this_host=$(hostname -s 2>/dev/null)
+  s_host="${this_host}"
+  sender_host="${this_host}"
+else
+  s_host="${sender_host}"
+fi
+
 # -----------------------------------------------------------------------------
 # screen statuses we don't need to send a notification
 
@@ -303,7 +319,7 @@ SLACK_WEBHOOK_URL=
 # Microsoft Teams configs
 MSTEAMS_WEBHOOK_URL=
 
-# Legacy Microsoft Teams configs for backwards compatability:
+# Legacy Microsoft Teams configs for backwards compatibility:
 declare -A role_recipients_msteam
 
 # rocketchat configs
@@ -810,6 +826,14 @@ date=$(date --date=@${when} "${date_format}" 2>/dev/null)
 [ -z "${date}" ] && date=$(date --date=@${when} 2>/dev/null)
 [ -z "${date}" ] && date=$(date 2>/dev/null)
 
+# -----------------------------------------------------------------------------
+# get the date in utc the alarm happened
+
+date_utc=$(date --date=@${when} "${date_format}" -u 2>/dev/null)
+[ -z "${date_utc}" ] && date_utc=$(date -u "${date_format}" 2>/dev/null)
+[ -z "${date_utc}" ] && date_utc=$(date -u --date=@${when} 2>/dev/null)
+[ -z "${date_utc}" ] && date_utc=$(date -u 2>/dev/null)
+
 # ----------------------------------------------------------------------------
 # prepare some extra headers if we've been asked to thread e-mails
 if [ "${SEND_EMAIL}" == "YES" ] && [ "${EMAIL_THREADING}" != "NO" ]; then
@@ -915,7 +939,7 @@ send_email() {
     fi
 
     [ -n "${sender_email}" ] && opts+=(-f "${sender_email}")
-    [ -n "${sender_name}" ] && sendmail --help 2>&1 | grep -q "\-F " && opts+=(-F "${sender_name}")
+    [ -n "${sender_name}" ] && ${sendmail} -F 2>&1 | head -1 | grep -qv "sendmail: unrecognized option: F" && opts+=(-F "${sender_name}")
 
     if [ "${debug}" = "1" ]; then
       echo >&2 "--- BEGIN sendmail command ---"
@@ -1364,15 +1388,15 @@ EOF
     )"
 
     # Replacing in the webhook CHANNEL string by the MS Teams channel name from conf file.
-    webhook="${webhook//CHANNEL/${channel}}"
+    cur_webhook="${webhook//CHANNEL/${channel}}"
 
-    httpcode=$(docurl -H "Content-Type: application/json" -d "${payload}" "${webhook}")
+    httpcode=$(docurl -H "Content-Type: application/json" -d "${payload}" "${cur_webhook}")
 
     if [ "${httpcode}" = "200" ]; then
-      info "sent Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${webhook}'"
+      info "sent Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${cur_webhook}'"
       sent=$((sent + 1))
     else
-      error "failed to send Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${webhook}', with HTTP response status code ${httpcode}."
+      error "failed to send Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${cur_webhook}', with HTTP response status code ${httpcode}."
     fi
   done
 
@@ -2113,12 +2137,12 @@ send_dynatrace() {
   [ "${SEND_DYNATRACE}" != "YES" ] && return 1
 
   local dynatrace_url="${DYNATRACE_SERVER}/e/${DYNATRACE_SPACE}/api/v1/events"
-  local description="NetData Notification for: ${host} ${chart}.${name} is ${status}"
+  local description="Netdata Notification for: ${host} ${chart}.${name} is ${status}"
   local payload=""
 
   payload=$(cat <<EOF
 {
-  "title": "NetData Alarm from ${host}",
+  "title": "Netdata Alarm from ${host}",
   "source" : "${DYNATRACE_ANNOTATION_TYPE}",
   "description" : "${description}",
   "eventType": "${DYNATRACE_EVENT}",
@@ -2266,8 +2290,10 @@ urlencode "${family}" >/dev/null
 url_family="${REPLY}"
 urlencode "${name}" >/dev/null
 url_name="${REPLY}"
+urlencode "${value_string}" >/dev/null
+url_value_string="${REPLY}"
 
-redirect_params="host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}&alarm_when=${when}"
+redirect_params="host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}&alarm_when=${when}&alarm_status=${status}&alarm_chart=${chart}&alarm_value=${url_value_string}"
 GOTOCLOUD=0
 
 if [ "${NETDATA_REGISTRY_URL}" == "https://registry.my-netdata.io" ]; then
@@ -2284,9 +2310,9 @@ fi
 if [ ${GOTOCLOUD} -eq 0 ]; then
   goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?${redirect_params}"
 else
-  # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud
-  #goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentID=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}"
-  goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}"
+    # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud
+    # Re-allow alarm redirection, for alarms 2.0, new template
+  goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}"
 fi
 
 # the severity of the alarm
@@ -2311,48 +2337,79 @@ alarm="${name//_/ } = ${value_string}"
 # the image of the alarm
 image="${images_base_url}/images/banner-icon-144x144.png"
 
+# have a default email status, in case the following case does not catch it
+status_email_subject="${status}"
+
 # prepare the title based on status
 case "${status}" in
 CRITICAL)
   image="${images_base_url}/images/alert-128-red.png"
+  alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_critical.png"
   status_message="is critical"
+  status_email_subject="Critical"
   color="#ca414b"
+  rich_status_raised_for="Raised to critical, for ${non_clear_duration_txt}"
+  background_color="#FFEBEF"
+  border_color="#FF4136"
+  text_color="#FF4136"
+  action_text_color="#FFFFFF"
   ;;
 
 WARNING)
   image="${images_base_url}/images/alert-128-orange.png"
+  alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_warning.png"
   status_message="needs attention"
+  status_email_subject="Warning"
   color="#ffc107"
+  rich_status_raised_for="Raised to warning, for ${non_clear_duration_txt}"
+  background_color="#FFF8E1"
+  border_color="#FFC300"
+  text_color="#536775"
+  action_text_color="#35414A"
   ;;
 
 CLEAR)
   image="${images_base_url}/images/check-mark-2-128-green.png"
+  alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_recovered.png"
   status_message="recovered"
+  status_email_subject="Clear"
   color="#77ca6d"
+  rich_status_raised_for=
+  background_color="#E5F5E8"
+  border_color="#68C47D"
+  text_color="#00AB44"
+  action_text_color="#FFFFFF"
   ;;
 esac
 
+# the html email subject
+html_email_subject="${status_email_subject}, ${name} = ${value_string}, on ${host}"
+
 if [ "${status}" = "CLEAR" ]; then
   severity="Recovered from ${old_status}"
   if [ ${non_clear_duration} -gt ${duration} ]; then
     raised_for="(alarm was raised for ${non_clear_duration_txt})"
   fi
+  rich_status_raised_for="Recovered from ${old_status,,}, ${raised_for}"
 
   # don't show the value when the status is CLEAR
   # for certain alarms, this value might not have any meaning
   alarm="${name//_/ } ${raised_for}"
+  html_email_subject="${status_email_subject}, ${name} ${raised_for}, on ${host}"
 
 elif { [ "${old_status}" = "WARNING" ] && [ "${status}" = "CRITICAL" ]; }; then
   severity="Escalated to ${status}"
   if [ ${non_clear_duration} -gt ${duration} ]; then
     raised_for="(alarm is raised for ${non_clear_duration_txt})"
   fi
+  rich_status_raised_for="Escalated to critical, ${raised_for}"
 
 elif { [ "${old_status}" = "CRITICAL" ] && [ "${status}" = "WARNING" ]; }; then
   severity="Demoted to ${status}"
   if [ ${non_clear_duration} -gt ${duration} ]; then
     raised_for="(alarm is raised for ${non_clear_duration_txt})"
   fi
+  rich_status_raised_for="Demoted to warning, ${raised_for}"
 
 else
   raised_for=
@@ -2628,6 +2685,13 @@ Subject: ${host} ${status_message} - ${name//_/ } - ${chart}
 MIME-Version: 1.0
 Content-Type: multipart/alternative; boundary="multipart-boundary"
 ${email_thread_headers}
+X-Netdata-Severity: ${status,,}
+X-Netdata-Alert-Name: $name
+X-Netdata-Chart: $chart
+X-Netdata-Family: $family
+X-Netdata-Classification: $classification
+X-Netdata-Host: $host
+X-Netdata-Role: $roles
 
 This is a MIME-encoded multipart message
 
@@ -2638,120 +2702,742 @@ EOF
 
 else
 
+now=$(date "+%s")
+
+if [ -n "$total_warn_alarms" ]; then
+   while read -d, -r pair; do
+       IFS='=' read -r key val <<<"$pair"
+
+       date_w=$(date --date=@${val} "${date_format}" 2>/dev/null)
+       [ -z "${date_w}" ] && date_w=$(date "${date_format}" 2>/dev/null)
+       [ -z "${date_w}" ] && date_w=$(date --date=@${val} 2>/dev/null)
+       [ -z "${date_w}" ] && date_w=$(date 2>/dev/null)
+
+       elapsed=$((now - val))
+
+       duration4human ${elapsed} >/dev/null
+       elapsed_txt="${REPLY}"
+
+       WARN_ALARMS+="
+       <div class=\"set-font\" style=\"font-family: 'IBM Plex Sans', sans-serif; background: #FFFFFF; background-color: #FFFFFF; margin: 0px auto; max-width: 600px;\">
+            <table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style=\"background:#FFFFFF;background-color:#FFFFFF;width:100%;\">
+              <tbody>
+              <tr>
+                <td style=\"border-top:8px solid #F7F8F8;direction:ltr;font-size:0px;padding:20px 0;text-align:center;\">
+                  <!--[if mso | IE]><table role=\"presentation\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\"><tr><td class=\"\" style=\"vertical-align:top;width:300px;\" ><![endif]-->
+                  <div class=\"mj-column-per-50 mj-outlook-group-fix\" style=\"font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:50%;\">
+                    <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style=\"vertical-align:top;\" width=\"100%\">
+                      <tbody>
+                      <tr>
+                        <td align=\"left\" style=\"font-size:0px;padding:10px 25px;word-break:break-word;\">
+                          <div style=\"font-family:Open Sans, sans-serif;font-size:14px;font-weight:600;line-height:1;text-align:left;color:#35414A;\">${key}</div>
+                        </td>
+                      </tr>
+                      <tr>
+                        <td align=\"left\" style=\"font-size:0px;padding:10px 25px;padding-top:2px;word-break:break-word;\">
+                          <div style=\"font-family:Open Sans, sans-serif;font-size:12px;line-height:1;text-align:left;color:#35414A;\">${date_w}</div>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td><td class=\"\" style=\"vertical-align:top;width:300px;\" ><![endif]-->
+                  <div class=\"mj-column-per-50 mj-outlook-group-fix\" style=\"font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:50%;\">
+                    <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" width=\"100%\">
+                      <tbody>
+                      <tr>
+                        <td style=\"vertical-align:top;padding-top:13px;\">
+                          <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style width=\"100%\">
+                            <tbody>
+                            <tr>
+                              <td align=\"right\" style=\"font-size:0px;padding:10px 25px;word-break:break-word;\">
+                                <div style=\"font-family:Open Sans, sans-serif;font-size:13px;line-height:1;text-align:right;color:#555555;\"><span style=\"background-color:#FFF8E1; border: 1px solid #FFC300; border-radius:36px; padding: 2px 12px; margin-top: 20px; white-space: nowrap\">
+              Warning for ${elapsed_txt}
+           </span></div>
+                              </td>
+                            </tr>
+                            </tbody>
+                          </table>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+       "
+
+   done <<<"$total_warn_alarms,"
+fi
+
+if [ -n "$total_crit_alarms" ]; then
+   while read -d, -r pair; do
+       IFS='=' read -r key val <<<"$pair"
+
+       date_c=$(date --date=@${val} "${date_format}" 2>/dev/null)
+       [ -z "${date_c}" ] && date_c=$(date "${date_format}" 2>/dev/null)
+       [ -z "${date_c}" ] && date_c=$(date --date=@${val} 2>/dev/null)
+       [ -z "${date_c}" ] && date_c=$(date 2>/dev/null)
+
+       elapsed=$((now - val))
+
+       duration4human ${elapsed} >/dev/null
+       elapsed_txt="${REPLY}"
+
+       CRIT_ALARMS+="
+       <div class=\"set-font\" style=\"font-family: 'IBM Plex Sans', sans-serif; background: #FFFFFF; background-color: #FFFFFF; margin: 0px auto; max-width: 600px;\">
+            <table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style=\"background:#FFFFFF;background-color:#FFFFFF;width:100%;\">
+              <tbody>
+              <tr>
+                <td style=\"border-top:8px solid #F7F8F8;direction:ltr;font-size:0px;padding:20px 0;text-align:center;\">
+                  <!--[if mso | IE]><table role=\"presentation\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\"><tr><td class=\"\" style=\"vertical-align:top;width:300px;\" ><![endif]-->
+                  <div class=\"mj-column-per-50 mj-outlook-group-fix\" style=\"font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:50%;\">
+                    <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style=\"vertical-align:top;\" width=\"100%\">
+                      <tbody>
+                      <tr>
+                        <td align=\"left\" style=\"font-size:0px;padding:10px 25px;word-break:break-word;\">
+                          <div style=\"font-family:Open Sans, sans-serif;font-size:14px;font-weight:600;line-height:1;text-align:left;color:#35414A;\">${key}</div>
+                        </td>
+                      </tr>
+                      <tr>
+                        <td align=\"left\" style=\"font-size:0px;padding:10px 25px;padding-top:2px;word-break:break-word;\">
+                          <div style=\"font-family:Open Sans, sans-serif;font-size:12px;line-height:1;text-align:left;color:#35414A;\">${date_c}</div>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td><td class=\"\" style=\"vertical-align:top;width:300px;\" ><![endif]-->
+                  <div class=\"mj-column-per-50 mj-outlook-group-fix\" style=\"font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:50%;\">
+                    <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" width=\"100%\">
+                      <tbody>
+                      <tr>
+                        <td style=\"vertical-align:top;padding-top:13px;\">
+                          <table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" role=\"presentation\" style width=\"100%\">
+                            <tbody>
+                            <tr>
+                              <td align=\"right\" style=\"font-size:0px;padding:10px 25px;word-break:break-word;\">
+                                <div style=\"font-family:Open Sans, sans-serif;font-size:13px;line-height:1;text-align:right;color:#35414A;\"><span style=\"background-color:#FFEBEF; border: 1px solid #FF4136; border-radius:36px; padding: 2px 12px; margin-top: 20px; white-space: nowrap\">
+              Critical for ${elapsed_txt}
+           </span></div>
+                              </td>
+                            </tr>
+                            </tbody>
+                          </table>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+       "
+
+   done <<<"$total_crit_alarms,"
+fi
+
+if [ -n "$edit_command_line" ]; then
+    IFS='=' read -r edit_command line <<<"$edit_command_line"
+fi
+
 IFS='' read -r -d '' email_html_part <<EOF
 Content-Type: text/html; encoding=${EMAIL_CHARSET}
 Content-Disposition: inline
 Content-Transfer-Encoding: 8bit
 
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0; padding: 0;">
-<body style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 14px; width: 100% !important; min-height: 100%; line-height: 1.6; background: #f6f6f6; margin:0; padding: 0;">
-<table>
-    <tbody>
-    <tr>
-        <td style="vertical-align: top;" valign="top"></td>
-        <td width="700" style="vertical-align: top; display: block !important; max-width: 700px !important; clear: both !important; margin: 0 auto; padding: 0;" valign="top">
-            <div style="max-width: 700px; display: block; margin: 0 auto; padding: 20px;">
-                <table width="100%" cellpadding="0" cellspacing="0" style="background: #fff; border: 1px solid #e9e9e9;">
+<!doctype html>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
+<head>
+  <title>
+  </title>
+  <!--[if !mso]><!-->
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <!--<![endif]-->
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <style type="text/css">
+      #outlook a { padding:0; }
+      body { margin:0;padding:0;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%; }
+      table, td { border-collapse:collapse;mso-table-lspace:0pt;mso-table-rspace:0pt; }
+      img { border:0;height:auto;line-height:100%; outline:none;text-decoration:none;-ms-interpolation-mode:bicubic; }
+      p { display:block;margin:13px 0; }
+  </style>
+  <!--[if mso]>
+  <xml>
+    <o:OfficeDocumentSettings>
+      <o:AllowPNG/>
+      <o:PixelsPerInch>96</o:PixelsPerInch>
+    </o:OfficeDocumentSettings>
+  </xml>
+  <![endif]-->
+  <!--[if lte mso 11]>
+  <style type="text/css">
+    .mj-outlook-group-fix { width:100% !important; }
+  </style>
+  <![endif]-->
+  <!--[if !mso]><!-->
+  <link href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;500;600;700&display=swap" rel="stylesheet" type="text/css">
+  <link href="https://fonts.googleapis.com/css?family=Ubuntu:300,400,500,700" rel="stylesheet" type="text/css">
+  <style type="text/css">
+      @import url(https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;500;600;700&display=swap);
+      @import url(https://fonts.googleapis.com/css?family=Ubuntu:300,400,500,700);
+  </style>
+  <!--<![endif]-->
+  <style type="text/css">
+      @media only screen and (min-width:100px) {
+          .mj-column-px-130 { width:130px !important; max-width: 130px; }
+          .mj-column-per-50 { width:50% !important; max-width: 50%; }
+          .mj-column-per-70 { width:70% !important; max-width: 70%; }
+          .mj-column-per-30 { width:30% !important; max-width: 30%; }
+          .mj-column-per-100 { width:100% !important; max-width: 100%; }
+          .mj-column-px-66 { width:66px !important; max-width: 66px; }
+          .mj-column-px-400 { width:400px !important; max-width: 400px; }
+      }
+  </style>
+  <style type="text/css">
+      @media only screen and (max-width:100px) {
+          table.mj-full-width-mobile { width: 100% !important; }
+          td.mj-full-width-mobile { width: auto !important; }
+      }
+  </style>
+</head>
+<body style="word-spacing:normal;">
+<div class="svgbg" style="background-image: url('https://staging.netdata.cloud/static/email/img/isotype_600.png'); background-repeat: no-repeat; background-position: top center; background-size: 600px 192px;">
+  <!--[if mso | IE]><table align="center" border="0" cellpadding="0" cellspacing="0" class="" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div style="margin:0px auto;max-width:600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:20px 0;padding-bottom:50px;padding-left:0;text-align:left;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:130px;" ><![endif]-->
+          <div class="mj-column-px-130 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:130px;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+              <tbody>
+              <tr>
+                <td align="center" style="font-size:0px;padding:10px 25px;padding-right:0;padding-left:0;word-break:break-word;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="border-collapse:collapse;border-spacing:0px;">
                     <tbody>
                     <tr>
-                        <td bgcolor="#eee" style="padding: 5px 20px 5px 20px; background-color: #eee;">
-                            <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 20px; color: #777; font-weight: bold;">netdata notification</div>
-                        </td>
+                      <td style="width:130px;">
+                        <img alt="Netdata Logo" height="auto" src="https://app.netdata.cloud/static/email/img/full_logo.png" style="border:0;display:block;outline:none;text-decoration:none;height:auto;width:100%;font-size:13px;" width="130">
+                      </td>
                     </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td><td class="" style="vertical-align:top;width:300px;" ><![endif]-->
+          <div class="mj-column-per-50 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:50%;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding-top:4px;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
                     <tr>
-                        <td bgcolor="${color}" style="font-size: 16px; vertical-align: top; font-weight: 400; text-align: center; margin: 0; padding: 10px; color: #ffffff; background: ${color} !important; border: 1px solid ${color}; border-top-color: ${color};" align="center" valign="top">
-                            <h1 style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-weight: 400; margin: 0;">${host} ${status_message}</h1>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-left:10px;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">Notification</div>
+                      </td>
+                    </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><table align="center" border="0" cellpadding="0" cellspacing="0" class="no-collapse-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="no-collapse" style="border-collapse: initial; margin: 0px auto; border-radius: 4px; max-width: 600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;border-radius:4px;">
+      <tbody>
+      <tr>
+        <td style="border:1px solid ${border_color};direction:ltr;font-size:0px;padding:20px 0;text-align:center;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:598px;" width="598" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 598px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:20px 0;padding-bottom:0;padding-top:0;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:418.6px;" ><![endif]-->
+                  <div class="mj-column-per-70 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:70%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="left" style="font-size:0px;padding:10px 25px;padding-top:15px;word-break:break-word;">
+                          <div style="font-family:Open Sans, sans-serif;font-size:20px;font-weight:700;line-height:1;text-align:left;color:#35414A;">${name}</div>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td><td class="" style="vertical-align:top;width:179.4px;" ><![endif]-->
+                  <div class="mj-column-per-30 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:30%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="right" style="font-size:0px;padding:10px 25px;padding-right:25px;word-break:break-word;">
+                          <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="border-collapse:collapse;border-spacing:0px;">
+                            <tbody>
+                            <tr>
+                              <td style="width:100px;">
+                                <img height="auto" src="${alarm_badge}" style="border:0;display:block;outline:none;text-decoration:none;height:auto;width:100%;font-size:13px;" width="100"/>
+                              </td>
+                            </tr>
+                            </tbody>
+                          </table>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table></td></tr><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:598px;" width="598" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 598px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:0;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:598px;" ><![endif]-->
+                  <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                          <div style="font-family:IBM Plex Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">on ${host}</div>
                         </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table></td></tr><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:598px;" width="598" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 598px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:20px 0;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:598px;" ><![endif]-->
+                  <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                          <div style="font-family:Open Sans, sans-serif;font-size:26px;font-weight:700;line-height:1;text-align:left;color:#35414A;"><span style="color: ${text_color}; font-size:26px; background: ${background_color}; padding:4px 24px; border-radius: 36px">${value_string}
+            </span></div>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table></td></tr><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:598px;" width="598" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 598px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:20px 0;padding-bottom:0;padding-top:0;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:598px;" ><![endif]-->
+                  <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                          <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:21px;text-align:left;color:#35414A;">Details: ${info}</div>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table></td></tr><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:598px;" width="598" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 598px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:20px 0;padding-bottom:0;padding-top:0;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:598px;" ><![endif]-->
+                  <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="center" vertical-align="middle" style="font-size:0px;padding:10px 25px;word-break:break-word;">
+                          <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="border-collapse:separate;width:100%;line-height:100%;">
+                            <tr>
+                              <td
+                                align="center" bgcolor="${border_color}" role="presentation" style="border:none;border-radius:3px;cursor:auto;height:44px;background:${border_color};" valign="middle">
+                                <p style="display:block;background:${border_color};color:#ffffff;font-size:13px;font-weight:600;line-height:44px;margin:0;text-decoration:none;text-transform:none;mso-padding-alt:0px;border-radius:3px;">
+                                  <a href="${goto_url}" style="color: ${action_text_color}; text-decoration: none; width: 100%; display: inline-block">GO TO CHART</a>
+                                </p>
+                              </td>
+                            </tr>
+                          </table>
+                        </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><![endif]-->
+  <div style="height:32px;line-height:32px;">&#8202;</div>
+  <!--[if mso | IE]><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; background: ${background_color}; background-color: ${background_color}; margin: 0px auto; border-radius: 4px; max-width: 600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="background:${background_color};background-color:${background_color};width:100%;border-radius:4px;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:20px 0;text-align:center;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:600px;" ><![endif]-->
+          <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+              <tbody>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-bottom:6px;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:18px;line-height:1;text-align:left;color:#35414A;">Chart:
+                    <span style="font-weight:700; font-size:20px">${chart}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:18px;line-height:1;text-align:left;color:#35414A;">Family:
+                    <span style="font-weight:700; font-size:20px">${family}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-top:4px;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:14px;line-height:1;text-align:left;color:#35414A;">${rich_status_raised_for}</div>
+                </td>
+              </tr>
+              <tr>
+                <td align="center" style="font-size:0px;padding:10px 25px;word-break:break-word;">
+                  <p style="border-top:solid 1px lightgrey;font-size:1px;margin:0px auto;width:100%;">
+                  </p>
+                  <!--[if mso | IE]><table align="center" border="0" cellpadding="0" cellspacing="0" style="border-top:solid 1px lightgrey;font-size:1px;margin:0px auto;width:550px;" role="presentation" width="550px" ><tr><td style="height:0;line-height:0;"> &nbsp;
+            </td></tr></table><![endif]-->
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-bottom:6px;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">On
+                    <span style="font-weight:600">${date}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">By:
+                    <span style="font-weight:600">${host}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-top:4px;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:14px;line-height:1;text-align:left;color:#35414A;">Global time:
+                    <span style="font-weight:600">${date_utc}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="center" style="font-size:0px;padding:10px 25px;word-break:break-word;">
+                  <p style="border-top:solid 1px lightgrey;font-size:1px;margin:0px auto;width:100%;">
+                  </p>
+                  <!--[if mso | IE]><table align="center" border="0" cellpadding="0" cellspacing="0" style="border-top:solid 1px lightgrey;font-size:1px;margin:0px auto;width:550px;" role="presentation" width="550px" ><tr><td style="height:0;line-height:0;"> &nbsp;
+            </td></tr></table><![endif]-->
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-bottom:6px;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">Classification:
+                    <span style="font-weight:600">${classification}</span></div>
+                </td>
+              </tr>
+              <tr>
+                <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;word-break:break-word;">
+                  <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:left;color:#35414A;">Role:
+                    <span style="font-weight:600">${roles}</span></div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:20px 0;padding-left:25px;text-align:left;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:66px;" ><![endif]-->
+          <div class="mj-column-px-66 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:66px;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding:0;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-right:0;padding-left:0;word-break:break-word;">
+                        <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="border-collapse:collapse;border-spacing:0px;">
+                          <tbody>
+                          <tr>
+                            <td style="width:48px;">
+                              <img height="auto" src="https://app.netdata.cloud/static/email/img/community_icon.png" style="border:0;display:block;outline:none;text-decoration:none;height:auto;width:100%;font-size:13px;" width="48">
+                            </td>
+                          </tr>
+                          </tbody>
+                        </table>
+                      </td>
+                    </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td><td align="left" class="" style="vertical-align:top;width:400px;" ><![endif]-->
+          <div class="mj-column-px-400 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:400px;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding-left:0;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-left:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:16px;font-weight:700;line-height:1;text-align:left;color:#35414A;">Want to know more about this alert?</div>
+                      </td>
+                    </tr>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-left:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:14px;line-height:1.3;text-align:left;color:#35414A;">Discuss and troubleshoot with others on the Netdata <a href="https://community.netdata.cloud/" class="link" style="color: #00AB44; text-decoration: none;">community forums</a></div>
+                      </td>
                     </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:20px 0;padding-left:25px;text-align:left;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:66px;" ><![endif]-->
+          <div class="mj-column-px-66 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:66px;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding:0;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
                     <tr>
-                        <td style="vertical-align: top;" valign="top">
-                            <div style="margin: 0; padding: 20px; max-width: 700px;">
-                                <table width="100%" cellpadding="0" cellspacing="0" style="max-width:700px">
-                                    <tbody>
-                                    <tr>
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding:0 0 20px;" align="left" valign="top">
-                                            <span>${chart}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Chart</span>
-                                        </td>
-                                    </tr>
-                                    <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            <span><b>${alarm}</b>${info_html}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Alarm</span>
-                                        </td>
-                                    </tr>
-                                    <tr>
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            <span>${family}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Family</span>
-                                        </td>
-                                    </tr>
-                                    <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            <span>${severity}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Severity</span>
-                                        </td>
-                                    </tr>
-                                    <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top"><span>${date}</span>
-                                            <span>${raised_for_html}</span> <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Time</span>
-                                        </td>
-                                    </tr>
-                                    <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            <span>${calc_expression}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Evaluated Expression</span>
-                                        </td>
-                                    </tr>
-                                     <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            <span>${calc_param_values}</span>
-                                            <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Expression Variables</span>
-                                        </td>
-                                    </tr>
-                                     <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
-                                            The host has ${total_warnings} WARNING and ${total_critical} CRITICAL alarm(s) raised.
-                                         </td>
-                                    </tr>
-
-                                    <tr style="margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;">
-                                            <a href="${goto_url}" style="font-size: 14px; color: #ffffff; text-decoration: none; line-height: 1.5; font-weight: bold; text-align: center; display: inline-block; text-transform: capitalize; background: #35568d; border-width: 1px; border-style: solid; border-color: #2b4c86; margin: 0; padding: 10px 15px;" target="_blank">View Netdata</a>
-                                        </td>
-                                    </tr>
-                                    <tr style="text-align: center; margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 11px; vertical-align: top; margin: 0; padding: 10px 0 0 0; color: #666666;" align="center" valign="bottom">The source of this alarm is line <code>${src}</code><br/>(alarms are configurable, edit this file to adapt the alarm to your needs)
-                                        </td>
-                                    </tr>
-                                    <tr style="text-align: center; margin: 0; padding: 0;">
-                                        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 12px; vertical-align: top; margin:0; padding: 20px 0 0 0; color: #666666; border-top: 1px solid #f0f0f0;" align="center" valign="bottom">Sent by
-                                            <a href="https://mynetdata.io/" target="_blank">netdata</a>, the real-time performance and health monitoring, on <code>${host}</code>.
-                                        </td>
-                                    </tr>
-                                    </tbody>
-                                </table>
-                            </div>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-right:0;padding-left:0;word-break:break-word;">
+                        <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="border-collapse:collapse;border-spacing:0px;">
+                          <tbody>
+                          <tr>
+                            <td style="width:48px;">
+                              <img height="auto" src="https://app.netdata.cloud/static/email/img/configure_icon.png" style="border:0;display:block;outline:none;text-decoration:none;height:auto;width:100%;font-size:13px;" width="48">
+                            </td>
+                          </tr>
+                          </tbody>
+                        </table>
+                      </td>
+                    </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td><td align="left" class="" style="vertical-align:top;width:400px;" ><![endif]-->
+          <div class="mj-column-px-400 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:400px;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding-left:0;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-left:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:16px;font-weight:700;line-height:1;text-align:left;color:#35414A;">Need to configure this alert?</div>
+                      </td>
+                    </tr>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-left:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:14px;line-height:1.3;text-align:left;color:#35414A;"><span style="color: #00AB44"><a href="https://learn.netdata.cloud/docs/agent/health/notifications#:~:text=To%20edit%20it%20on%20your,have%20one%20or%20more%20destinations" class="link" style="color: #00AB44; text-decoration: none;">Edit</a></span> this alert's configuration file by logging into $s_host and running the following command:</div>
+                      </td>
+                    </tr>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-top:8px;padding-left:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:12px;line-height:1.3;text-align:left;color:#35414A;">${edit_command} <br>
+                          The alarm to edit is at line {${line}}</div>
+                      </td>
+                    </tr>
+                    </tbody>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--[if mso | IE]></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><table align="center" border="0" cellpadding="0" cellspacing="0" class="history-wrapper-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="history-wrapper" style="background: #F7F8F8; background-color: #F7F8F8; margin: 0px auto; max-width: 100%;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="background:#F7F8F8;background-color:#F7F8F8;width:100%;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:0;padding-bottom:24px;text-align:center;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="set-font-outlook" width="600px" ><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+          <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 600px;">
+            <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+              <tbody>
+              <tr>
+                <td style="direction:ltr;font-size:0px;padding:20px 0;padding-bottom:12px;text-align:center;">
+                  <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:600px;" ><![endif]-->
+                  <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+                    <table border="0" cellpadding="0" cellspacing="0" role="presentation" style="vertical-align:top;" width="100%">
+                      <tbody>
+                      <tr>
+                        <td align="left" style="font-size:0px;padding:10px 25px;word-break:break-word;">
+                          <div style="font-family:Open Sans, sans-serif;font-size:16px;line-height:1;text-align:center;color:#35414A;">The node has
+                            <span style="font-weight:600">${total_warnings} warning</span>
+                            and
+                            <span style="font-weight:600">${total_critical} critical</span>
+                            additional active alert(s)</div>
                         </td>
+                      </tr>
+                      </tbody>
+                    </table>
+                  </div>
+                  <!--[if mso | IE]></td></tr></table><![endif]-->
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+          ${CRIT_ALARMS}
+          ${WARN_ALARMS}
+          <!--[if mso | IE]></td></tr></table></td></tr></table><![endif]-->
+        </td>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><table align="center" border="0" cellpadding="0" cellspacing="0" class="set-font-outlook" style="width:600px;" width="600" ><tr><td style="line-height:0px;font-size:0px;mso-line-height-rule:exactly;"><![endif]-->
+  <div class="set-font" style="font-family: 'IBM Plex Sans', sans-serif; margin: 0px auto; max-width: 600px;">
+    <table align="center" border="0" cellpadding="0" cellspacing="0" role="presentation" style="width:100%;">
+      <tbody>
+      <tr>
+        <td style="direction:ltr;font-size:0px;padding:20px 0;text-align:center;">
+          <!--[if mso | IE]><table role="presentation" border="0" cellpadding="0" cellspacing="0"><tr><td class="" style="vertical-align:top;width:600px;" ><![endif]-->
+          <div class="mj-column-per-100 mj-outlook-group-fix" style="font-size:0px;text-align:left;direction:ltr;display:inline-block;vertical-align:top;width:100%;">
+            <table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
+              <tbody>
+              <tr>
+                <td style="vertical-align:top;padding-top:44px;padding-bottom:12px;">
+                  <table border="0" cellpadding="0" cellspacing="0" role="presentation" style width="100%">
+                    <tbody>
+                    <tr>
+                      <td align="left" style="font-size:0px;padding:10px 25px;padding-top:0;padding-bottom:0;word-break:break-word;">
+                        <div style="font-family:Open Sans, sans-serif;font-size:13px;line-height:1;text-align:center;color:#35414A;">© Netdata 2021 - The real-time performance and health monitoring</div>
+                      </td>
                     </tr>
                     </tbody>
-                </table>
-            </div>
+                  </table>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+  </div>
+          <!--[if mso | IE]></td></tr></table><![endif]-->
         </td>
-    </tr>
-    </tbody>
-</table>
+      </tr>
+      </tbody>
+    </table>
+  </div>
+  <!--[if mso | IE]></td></tr></table><![endif]-->
+</div>
 </body>
 </html>
 EOF
 
 send_email <<EOF
 To: ${to_email}
-Subject: ${host} ${status_message} - ${name//_/ } - ${chart}
+Subject: ${html_email_subject}
 MIME-Version: 1.0
 Content-Type: multipart/alternative; boundary="multipart-boundary"
 ${email_thread_headers}
+X-Netdata-Severity: ${status,,}
+X-Netdata-Alert-Name: $name
+X-Netdata-Chart: $chart
+X-Netdata-Family: $family
+X-Netdata-Classification: $classification
+X-Netdata-Host: $host
+X-Netdata-Role: $roles
 
 This is a MIME-encoded multipart message
 
diff --git a/health/notifications/custom/README.md b/health/notifications/custom/README.md
index 04376d555..bcb09ef53 100644
--- a/health/notifications/custom/README.md
+++ b/health/notifications/custom/README.md
@@ -7,7 +7,11 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/health/notificat
 
 Netdata allows you to send custom notifications to any endpoint you choose.
 
-To configure custom notifications, you will need to customize `health_alarm_notify.conf`. You can look at the other senders in `/usr/libexec/netdata/plugins.d/alarm-notify.sh` for examples of how to modify the `custom_sender()` function in `health_alarm_notify.conf`. Ensure you follow the instructions of changing any configuration file to [persist your configuration](/docs/configuration-guide.md#persist-my-configuration).
+To configure custom notifications, you will need to customize `health_alarm_notify.conf`. Open the file for editing
+using [`edit-config`](/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) from the [Netdata config
+directory](/docs/configure/nodes.md#the-netdata-config-directory), which is typically at `/etc/netdata`.
+
+You can look at the other senders in `/usr/libexec/netdata/plugins.d/alarm-notify.sh` for examples of how to modify the `custom_sender()` function in `health_alarm_notify.conf`.
 
 As with other notifications, you will also need to define the recipient list in `DEFAULT_RECIPIENT_CUSTOM` and/or the `role_recipients_custom` array.
 
diff --git a/health/notifications/email/README.md b/health/notifications/email/README.md
index ebd7f4b8c..82786fca1 100644
--- a/health/notifications/email/README.md
+++ b/health/notifications/email/README.md
@@ -11,9 +11,9 @@ Netdata sends all emails as user `netdata`, so make sure your `sendmail` works f
 
 email notifications look like this:
 
-![image](https://cloud.githubusercontent.com/assets/2662304/18407294/e9218c68-7714-11e6-8739-e4dd8a498252.png)
+![image](https://user-images.githubusercontent.com/1905463/133216974-a2ca0e4f-787b-4dce-b1b2-9996a8c5f718.png)
 
-## configuration
+## Configuration
 
 To edit `health_alarm_notify.conf` on your system run `/etc/netdata/edit-config health_alarm_notify.conf`.
 
@@ -38,6 +38,20 @@ Where `[ROLE]` is the role you want to test. The default (if you don't give a `[
 Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). 
 You can always find the location of the alarm-notify.sh script in `netdata.conf`.
 
+## Filtering
+
+Every notification email (both the plain text and the rich html versions) from the Netdata agent, contain a set of custom email headers that can be used for filtering using an email client. Example:
+
+```
+X-Netdata-Severity: warning
+X-Netdata-Alert-Name: inbound_packets_dropped_ratio
+X-Netdata-Chart: net_packets.enp2s0
+X-Netdata-Family: enp2s0
+X-Netdata-Classification: System
+X-Netdata-Host: winterland
+X-Netdata-Role: sysadmin
+```
+
 ## Simple SMTP transport configuration
 
 If you want an alternative to `sendmail` in order to have a simple MTA configuration for sending emails and auth to an existing SMTP server, you can do the following:
diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf
index e851a530c..873c7c353 100755
--- a/health/notifications/health_alarm_notify.conf
+++ b/health/notifications/health_alarm_notify.conf
@@ -253,13 +253,13 @@ DYNATRACE_TOKEN=""
 DYNATRACE_SPACE=""
 
 # Generate a Server Tag. On the Dynatrace Server go to Settings --> Tags --> Manually applied tags create the Tag
-# The NetData alarm will be sent as a Dynatrace Event to be correlated with all those hosts tagged with this Tag 
+# The Netdata alarm will be sent as a Dynatrace Event to be correlated with all those hosts tagged with this Tag
 # you created.
 # Required
 DYNATRACE_TAG_VALUE=""
 
 # Change this to what you want
-DYNATRACE_ANNOTATION_TYPE="NetData Alarm"
+DYNATRACE_ANNOTATION_TYPE="Netdata Alarm"
 
 # This can be CUSTOM_INFO, CUSTOM_ANNOTATION, CUSTOM_CONFIGURATION, CUSTOM_DEPLOYMENT
 # Applying default value
diff --git a/health/notifications/syslog/README.md b/health/notifications/syslog/README.md
index 456394d2f..360f6844d 100644
--- a/health/notifications/syslog/README.md
+++ b/health/notifications/syslog/README.md
@@ -17,7 +17,7 @@ netdata WARNING on hostname at Tue Apr 3 09:00:00 EDT 2018: disk_space._ out of
 
 System log targets are configured as recipients in [`/etc/netdata/health_alarm_notify.conf`](https://github.com/netdata/netdata/blob/36bedc044584dea791fd29455bdcd287c3306cb2/conf.d/health_alarm_notify.conf#L534) (to edit it on your system run `/etc/netdata/edit-config health_alarm_notify.conf`).
 
-You can als configure per-role targets in the same file a bit further down.
+You can also configure per-role targets in the same file a bit further down.
 
 Targets are defined as follows:
 
-- 
cgit v1.2.3