81 files changed, 2584 insertions, 49 deletions
diff --git a/src/health/health.d/adaptec_raid.conf b/src/health/health.d/adaptec_raid.conf
new file mode 100644
index 000000000..b01113b69
--- /dev/null
+++ b/src/health/health.d/adaptec_raid.conf
@@ -0,0 +1,29 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: adaptec_raid_ld_health_status
+       on: adaptecraid.logical_device_status
+    class: Errors
+     type: System
+component: RAID
+   lookup: average -1m unaligned percentage of ok
+    units: %
+    every: 10s
+     crit: $this < 100
+    delay: down 5m multiplier 1.5 max 1h
+  summary: Adaptec RAID LD (number ${label:ld_number}) health status
+     info: Adaptec RAID logical device (number ${label:ld_number} name ${label:ld_name}) health status is critical
+       to: sysadmin
+
+ template: adaptec_raid_pd_health_state
+       on: adaptecraid.physical_device_state
+    class: Errors
+     type: System
+component: RAID
+   lookup: average -1m unaligned percentage of ok
+    units: %
+    every: 10s
+     crit: $this < 100
+    delay: down 5m multiplier 1.5 max 1h
+  summary: Adaptec RAID PD (number ${label:pd_number}) health state
+     info: Adaptec RAID physical device (number ${label:pd_number} location ${label:location}) health state is critical
+       to: sysadmin
diff --git a/src/health/health.d/anomalies.conf b/src/health/health.d/anomalies.conf
new file mode 100644
index 000000000..80d63bb8d
--- /dev/null
+++ b/src/health/health.d/anomalies.conf
@@ -0,0 +1,25 @@
+## raise a warning alarm if an anomaly probability is consistently above 50%
+
+## "foreach" was removed, these alarms don't work anymore
+
+# template: anomalies_anomaly_probabilities
+#       on: anomalies.probability
+#    class: Errors
+#     type: Netdata
+#component: ML
+#   lookup: average -2m foreach *
+#    every: 1m
+#     warn: $this > 50
+#     info: average anomaly probability over the last 2 minutes
+
+# raise a warning alarm if an anomaly flag is consistently firing
+
+# template: anomalies_anomaly_flags
+#       on: anomalies.anomaly
+#    class: Errors
+#     type: Netdata
+#component: ML
+#   lookup: sum -2m foreach *
+#    every: 1m
+#     warn: $this > 10
+#     info: number of anomalies in the last 2 minutes
diff --git a/health/health.d/apcupsd.conf b/src/health/health.d/apcupsd.conf
index 90a72af19..5fd7aa112 100644
--- a/health/health.d/apcupsd.conf
+++ b/src/health/health.d/apcupsd.conf
@@ -5,8 +5,6 @@
     class: Utilization
      type: Power Supply
 component: UPS
-       os: *
-    hosts: *
    lookup: average -10m unaligned of percentage
     units: %
     every: 1m
@@ -23,8 +21,6 @@ component: UPS
     class: Errors
      type: Power Supply
 component: UPS
-       os: *
-    hosts: *
    lookup: average -60s unaligned of charge
     units: %
     every: 60s
diff --git a/health/health.d/bcache.conf b/src/health/health.d/bcache.conf
index 446173428..446173428 100644
--- a/health/health.d/bcache.conf
+++ b/src/health/health.d/bcache.conf
diff --git a/health/health.d/beanstalkd.conf b/src/health/health.d/beanstalkd.conf
index 0d37f28e0..0d37f28e0 100644
--- a/health/health.d/beanstalkd.conf
+++ b/src/health/health.d/beanstalkd.conf
diff --git a/health/health.d/boinc.conf b/src/health/health.d/boinc.conf
index 092a56845..6fd987de1 100644
--- a/health/health.d/boinc.conf
+++ b/src/health/health.d/boinc.conf
@@ -1,4 +1,4 @@
-# Alarms for various BOINC issues.
+# you can disable an alarm notification by setting the 'to' line to: silent
 
 # Warn on any compute errors encountered.
  template: boinc_compute_errors
@@ -6,8 +6,6 @@
     class: Errors
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of comperror
     units: tasks
     every: 1m
@@ -23,8 +21,6 @@ component: BOINC
     class: Errors
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of upload_failed
     units: tasks
     every: 1m
@@ -40,8 +36,6 @@ component: BOINC
     class: Utilization
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of total
     units: tasks
     every: 1m
@@ -57,8 +51,6 @@ component: BOINC
     class: Utilization
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of active
      calc: ($boinc_total_tasks >= 1) ? ($this) : (inf)
     units: tasks
diff --git a/health/health.d/btrfs.conf b/src/health/health.d/btrfs.conf
index 1557a5941..f43f600c0 100644
--- a/health/health.d/btrfs.conf
+++ b/src/health/health.d/btrfs.conf
@@ -1,11 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
 
  template: btrfs_allocated
        on: btrfs.disk
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
     units: %
     every: 10s
@@ -20,8 +19,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: $used * 100 / ($used + $free)
     units: %
     every: 10s
@@ -37,8 +34,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
     units: %
     every: 10s
@@ -54,8 +49,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: $used * 100 / ($used + $free)
     units: %
     every: 10s
@@ -71,8 +64,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of read_errs
      warn: $this > 0
@@ -86,8 +77,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of write_errs
      crit: $this > 0
@@ -101,8 +90,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of flush_errs
      crit: $this > 0
@@ -116,8 +103,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of corruption_errs
      warn: $this > 0
@@ -131,8 +116,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of generation_errs
      warn: $this > 0
diff --git a/health/health.d/ceph.conf b/src/health/health.d/ceph.conf
index 44d351338..44d351338 100644
--- a/health/health.d/ceph.conf
+++ b/src/health/health.d/ceph.conf
diff --git a/src/health/health.d/cgroups.conf b/src/health/health.d/cgroups.conf
new file mode 100644
index 000000000..52ca02624
--- /dev/null
+++ b/src/health/health.d/cgroups.conf
@@ -0,0 +1,67 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+   template: cgroup_10min_cpu_usage
+         on: cgroup.cpu_limit
+      class: Utilization
+       type: Cgroups
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Cgroup ${label:cgroup_name} CPU utilization
+       info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
+         to: silent
+
+   template: cgroup_ram_in_use
+         on: cgroup.mem_usage
+      class: Utilization
+       type: Cgroups
+  component: Memory
+host labels: _os=linux
+       calc: ($ram) * 100 / $memory_limit
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Cgroup ${label:cgroup_name} memory utilization
+       info: Cgroup ${label:cgroup_name} memory utilization
+         to: silent
+
+# ---------------------------------K8s containers--------------------------------------------
+
+   template: k8s_cgroup_10min_cpu_usage
+         on: k8s.cgroup.cpu_limit
+      class: Utilization
+       type: Cgroups
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
+       info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+             average CPU utilization over the last 10 minutes
+         to: silent
+
+   template: k8s_cgroup_ram_in_use
+         on: k8s.cgroup.mem_usage
+      class: Utilization
+       type: Cgroups
+  component: Memory
+host labels: _os=linux
+       calc: ($ram) * 100 / $memory_limit
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
+       info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+             memory utilization
+         to: silent
diff --git a/src/health/health.d/clickhouse.conf b/src/health/health.d/clickhouse.conf
new file mode 100644
index 000000000..e24f71830
--- /dev/null
+++ b/src/health/health.d/clickhouse.conf
@@ -0,0 +1,140 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: clickhouse_restarted
+       on: clickhouse.uptime
+    class: Error
+     type: Database
+component: ClickHouse
+     calc: $uptime
+    units: seconds
+    every: 10s
+     warn: $this > 1 AND $this < 180
+  summary: ClickHouse restart detected
+     info: ClickHouse has recently been restarted
+       to: silent
+
+ template: clickhouse_queries_preempted
+       on: clickhouse.queries_preempted
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: max -1m unaligned
+    units: preempted_queries
+    every: 10s
+     warn: $this > 0
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse preempted queries detected
+     info: ClickHouse has queries that are stopped and waiting due to priority setting
+       to: dba
+
+ template: clickhouse_long_running_query
+       on: clickhouse.longest_running_query_time
+    class: Latency
+     type: Database
+component: ClickHouse
+   lookup: max -1m unaligned
+    units: seconds
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (300) : (600))
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse long-running query detected
+     info: ClickHouse has a long-running query exceeding the threshold
+       to: dba
+
+ template: clickhouse_rejected_inserts
+       on: clickhouse.rejected_inserts
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: sum -1m unaligned
+    units: rejected_inserts
+    every: 10s
+     warn: $this > 0
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse rejected INSERT queries detected
+     info: ClickHouse has INSERT queries that are rejected due to high number of active data parts for partition in a MergeTree
+       to: dba
+
+ template: clickhouse_delayed_inserts
+       on: clickhouse.delayed_inserts
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: sum -1m unaligned
+    units: delayed_inserts
+    every: 10s
+     warn: $this > 0
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse delayed INSERT queries detected
+     info: ClickHouse has INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree
+       to: silent
+
+ template: clickhouse_replication_lag
+       on: clickhouse.replicas_max_absolute_delay
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: avg -1m unaligned
+    units: seconds
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (250) : (300))
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse high replication lag detected
+     info: ClickHouse is experiencing replication lag greater than 5 minutes
+       to: dba
+
+ template: clickhouse_replicated_readonly_tables
+       on: clickhouse.replicated_readonly_tables
+    class: Error
+     type: Database
+component: ClickHouse
+   lookup: max -1m unaligned
+    units: readonly_tables
+    every: 10s
+     warn: $this > 0
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse replicated tables in readonly state detected
+     info: ClickHouse has replicated tables in readonly state due to ZooKeeper session loss/startup without ZooKeeper configured
+       to: dba
+
+ template: clickhouse_max_part_count_for_partition
+       on: clickhouse.max_part_count_for_partition
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: avg -1m unaligned
+    units: parts
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (200) : (300))
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse high parts/partition detected
+     info: ClickHouse high number of parts per partition
+       to: dba
+
+ template: clickhouse_distributed_connections_failures
+       on: clickhouse.distributed_connections_fail_exhausted_retries
+    class: Error
+     type: Database
+component: ClickHouse
+   lookup: sum -1m unaligned
+    units: failures
+    every: 10s
+     warn: $this > 0
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse distributed connections failures detected
+     info: ClickHouse has failed distributed connections after exhausting all retry attempts
+       to: dba
+
+ template: clickhouse_distributed_files_to_insert
+       on: clickhouse.distributed_files_to_insert
+    class: Workload
+     type: Database
+component: ClickHouse
+   lookup: max -1m unaligned
+    units: files
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (40) : (80))
+    delay: down 5m multiplier 1.5 max 1h
+  summary: ClickHouse high files to insert detected
+     info: ClickHouse high number of pending files to process for asynchronous insertion into Distributed tables
+       to: silent
diff --git a/health/health.d/cockroachdb.conf b/src/health/health.d/cockroachdb.conf
index 60f178354..60f178354 100644
--- a/health/health.d/cockroachdb.conf
+++ b/src/health/health.d/cockroachdb.conf
diff --git a/health/health.d/consul.conf b/src/health/health.d/consul.conf
index 8b414a26d..8b414a26d 100644
--- a/health/health.d/consul.conf
+++ b/src/health/health.d/consul.conf
diff --git a/src/health/health.d/cpu.conf b/src/health/health.d/cpu.conf
new file mode 100644
index 000000000..29f541e56
--- /dev/null
+++ b/src/health/health.d/cpu.conf
@@ -0,0 +1,65 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+   template: 10min_cpu_usage
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned of user,system,softirq,irq,guest
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+       crit: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System CPU utilization
+       info: Average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
+         to: sysadmin
+
+   template: 10min_cpu_iowait
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned of iowait
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (20) : (40))
+      delay: up 30m down 30m multiplier 1.5 max 2h
+    summary: System CPU iowait time
+       info: Average CPU iowait time over the last 10 minutes
+         to: silent
+
+   template: 20min_steal_cpu
+         on: system.cpu
+      class: Latency
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -20m unaligned of steal
+      units: %
+      every: 5m
+       warn: $this > (($status >= $WARNING)  ? (5)  : (10))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System CPU steal time
+       info: Average CPU steal time over the last 20 minutes
+         to: silent
+
+## FreeBSD
+   template: 10min_cpu_usage
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=freebsd
+     lookup: average -10m unaligned of user,system,interrupt
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+       crit: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System CPU utilization
+       info: Average CPU utilization over the last 10 minutes (excluding nice)
+         to: sysadmin
diff --git a/health/health.d/dbengine.conf b/src/health/health.d/dbengine.conf
index 0a70d2e8f..5585a9533 100644
--- a/health/health.d/dbengine.conf
+++ b/src/health/health.d/dbengine.conf
@@ -1,4 +1,3 @@
-
 # you can disable an alarm notification by setting the 'to' line to: silent
 
     alarm: 10min_dbengine_global_fs_errors
@@ -6,8 +5,6 @@
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of fs_errors
     units: errors
     every: 10s
@@ -22,8 +19,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of io_errors
     units: errors
     every: 10s
@@ -38,8 +33,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
     units: errors
     every: 10s
@@ -55,8 +48,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of flushing_pressure_deletions
     units: pages
     every: 10s
diff --git a/src/health/health.d/disks.conf b/src/health/health.d/disks.conf
new file mode 100644
index 000000000..fe96837fb
--- /dev/null
+++ b/src/health/health.d/disks.conf
@@ -0,0 +1,161 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# -----------------------------------------------------------------------------
+# low disk space
+
+# checking the latest collected values
+# raise an alarm if the disk is low on
+# available disk space
+
+    template: disk_space_usage
+          on: disk.space
+       class: Utilization
+        type: System
+   component: Disk
+ host labels: _os=linux freebsd
+chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
+         calc: $used * 100 / ($avail + $used)
+        units: %
+        every: 1m
+         warn: $this > (($status >= $WARNING ) ? (80) : (90))
+         crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
+        delay: up 1m down 15m multiplier 1.5 max 1h
+      summary: Disk ${label:mount_point} space usage
+         info: Total space utilization of disk ${label:mount_point}
+           to: sysadmin
+
+    template: disk_inode_usage
+          on: disk.inodes
+       class: Utilization
+        type: System
+   component: Disk
+ host labels: _os=linux freebsd
+chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
+        calc: $used * 100 / ($avail + $used)
+       units: %
+       every: 1m
+        warn: $this > (($status >= $WARNING)  ? (80) : (90))
+        crit: $this > (($status == $CRITICAL) ? (90) : (98))
+       delay: up 1m down 15m multiplier 1.5 max 1h
+     summary: Disk ${label:mount_point} inode usage
+        info: Total inode utilization of disk ${label:mount_point}
+          to: sysadmin
+
+
+# -----------------------------------------------------------------------------
+# disk fill rate
+
+# calculate the rate the disk fills
+# use as base, the available space change
+# during the last hour
+
+# this is just a calculation - it has no alarm
+# we will use it in the next template to find
+# the hours remaining
+
+   template: disk_fill_rate
+         on: disk.space
+host labels: _os=linux freebsd
+     lookup: min -10m at -50m unaligned of avail
+       calc: ($this - $avail) / (($now - $after) / 3600)
+      every: 1m
+      units: GB/hour
+       info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
+
+# calculate the hours remaining
+# if the disk continues to fill in this rate
+
+   template: out_of_disk_space_time
+         on: disk.space
+host labels: _os=linux freebsd
+       calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
+      units: hours
+      every: 10s
+       warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
+       crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:mount_point} estimation of lack of space
+       info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour
+         to: silent
+
+
+# -----------------------------------------------------------------------------
+# disk inode fill rate
+
+# calculate the rate the disk inodes are allocated
+# use as base, the available inodes change
+# during the last hour
+
+# this is just a calculation - it has no alarm
+# we will use it in the next template to find
+# the hours remaining
+
+   template: disk_inode_rate
+         on: disk.inodes
+host labels: _os=linux freebsd
+     lookup: min -10m at -50m unaligned of avail
+       calc: ($this - $avail) / (($now - $after) / 3600)
+      every: 1m
+      units: inodes/hour
+       info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
+
+# calculate the hours remaining
+# if the disk inodes are allocated
+# in this rate
+
+   template: out_of_disk_inodes_time
+         on: disk.inodes
+host labels: _os=linux freebsd
+       calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
+      units: hours
+      every: 10s
+       warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
+       crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:mount_point} estimation of lack of inodes
+       info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
+         to: silent
+
+
+# -----------------------------------------------------------------------------
+# disk congestion
+
+# raise an alarm if the disk is congested
+# by calculating the average disk utilization
+# for the last 10 minutes
+
+   template: 10min_disk_utilization
+         on: disk.util
+      class: Utilization
+       type: System
+  component: Disk
+host labels: _os=linux freebsd
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:device} utilization
+       info: Average percentage of time ${label:device} disk was busy over the last 10 minutes
+         to: silent
+
+
+# raise an alarm if the disk backlog
+# is above 1000ms (1s) per second
+# for 10 minutes
+# (i.e. the disk cannot catch up)
+
+   template: 10min_disk_backlog
+         on: disk.backlog
+      class: Latency
+       type: System
+  component: Disk
+host labels: _os=linux freebsd
+     lookup: average -10m unaligned
+      units: ms
+      every: 1m
+       warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:device} backlog
+       info: Average backlog size of the ${label:device} disk over the last 10 minutes
+         to: silent
diff --git a/health/health.d/dns_query.conf b/src/health/health.d/dns_query.conf
index 756c6a1b6..756c6a1b6 100644
--- a/health/health.d/dns_query.conf
+++ b/src/health/health.d/dns_query.conf
diff --git a/health/health.d/dnsmasq_dhcp.conf b/src/health/health.d/dnsmasq_dhcp.conf
index f6ef01940..f6ef01940 100644
--- a/health/health.d/dnsmasq_dhcp.conf
+++ b/src/health/health.d/dnsmasq_dhcp.conf
diff --git a/health/health.d/docker.conf b/src/health/health.d/docker.conf
index 668614d4d..668614d4d 100644
--- a/health/health.d/docker.conf
+++ b/src/health/health.d/docker.conf
diff --git a/health/health.d/elasticsearch.conf b/src/health/health.d/elasticsearch.conf
index 600840c58..600840c58 100644
--- a/health/health.d/elasticsearch.conf
+++ b/src/health/health.d/elasticsearch.conf
diff --git a/src/health/health.d/entropy.conf b/src/health/health.d/entropy.conf
new file mode 100644
index 000000000..f7671353c
--- /dev/null
+++ b/src/health/health.d/entropy.conf
@@ -0,0 +1,19 @@
+
+# check if entropy is too low
+# the alarm is checked every 1 minute
+# and examines the last hour of data
+
+      alarm: lowest_entropy
+         on: system.entropy
+      class: Utilization
+       type: System
+  component: Cryptography
+host labels: _os=linux
+     lookup: min -5m unaligned
+      units: entries
+      every: 5m
+       warn: $this < (($status >= $WARNING) ? (200) : (100))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System entropy pool number of entries
+       info: Minimum number of entries in the random numbers pool in the last 5 minutes
+         to: silent
diff --git a/health/health.d/exporting.conf b/src/health/health.d/exporting.conf
index c0320193c..c0320193c 100644
--- a/health/health.d/exporting.conf
+++ b/src/health/health.d/exporting.conf
diff --git a/src/health/health.d/file_descriptors.conf b/src/health/health.d/file_descriptors.conf
new file mode 100644
index 000000000..b4b4500e3
--- /dev/null
+++ b/src/health/health.d/file_descriptors.conf
@@ -0,0 +1,30 @@
+ # you can disable an alarm notification by setting the 'to' line to: silent
+
+  template: system_file_descriptors_utilization
+        on: system.file_nr_utilization
+     class: Utilization
+      type: System
+ component: Processes
+    lookup: max -1m unaligned
+     units: %
+     every: 1m
+      crit: $this > 90
+     delay: down 15m multiplier 1.5 max 1h
+   summary: System open file descriptors utilization
+      info: System-wide utilization of open files
+        to: sysadmin
+
+   template: apps_group_file_descriptors_utilization
+         on: app.fds_open_limit
+      class: Utilization
+       type: System
+  component: Process
+host labels: _os=linux
+     lookup: max -10s unaligned
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: App group ${label:app_group} file descriptors utilization
+       info: Open files percentage against the processes limits, among all PIDs in application group
+         to: sysadmin
diff --git a/health/health.d/gearman.conf b/src/health/health.d/gearman.conf
index 78e1165d1..78e1165d1 100644
--- a/health/health.d/gearman.conf
+++ b/src/health/health.d/gearman.conf
diff --git a/health/health.d/geth.conf b/src/health/health.d/geth.conf
index 361b6b41f..361b6b41f 100644
--- a/health/health.d/geth.conf
+++ b/src/health/health.d/geth.conf
diff --git a/src/health/health.d/go.d.plugin.conf b/src/health/health.d/go.d.plugin.conf
new file mode 100644
index 000000000..eb951448b
--- /dev/null
+++ b/src/health/health.d/go.d.plugin.conf
@@ -0,0 +1,17 @@
+# make sure go.d.plugin data collection job is running
+
+   template: go.d_job_last_collected_secs
+         on: netdata.go_plugin_execution_time
+      class: Errors
+       type: Netdata
+  component: go.d.plugin
+host labels: _hostname=!*
+       calc: $now - $last_collected_t
+      units: seconds ago
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+       crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+      delay: down 5m multiplier 1.5 max 1h
+    summary: Go.d plugin last collection
+       info: Number of seconds since the last successful data collection
+         to: webmaster
diff --git a/health/health.d/haproxy.conf b/src/health/health.d/haproxy.conf
index 66a488fa4..66a488fa4 100644
--- a/health/health.d/haproxy.conf
+++ b/src/health/health.d/haproxy.conf
diff --git a/health/health.d/hdfs.conf b/src/health/health.d/hdfs.conf
index 566e815aa..566e815aa 100644
--- a/health/health.d/hdfs.conf
+++ b/src/health/health.d/hdfs.conf
diff --git a/src/health/health.d/httpcheck.conf b/src/health/health.d/httpcheck.conf
new file mode 100644
index 000000000..3d32dedbf
--- /dev/null
+++ b/src/health/health.d/httpcheck.conf
@@ -0,0 +1,88 @@
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+ template: httpcheck_web_service_up
+       on: httpcheck.status
+    class: Utilization
+     type: Web Server
+component: HTTP endpoint
+   lookup: average -1m unaligned percentage of success
+     calc: ($this < 75) ? (0) : ($this)
+    every: 5s
+    units: up/down
+     info: HTTP check endpoint ${label:url} liveness status
+       to: silent
+
+ template: httpcheck_web_service_bad_content
+       on: httpcheck.status
+    class: Workload
+     type: Web Server
+component: HTTP endpoint
+   lookup: average -5m unaligned percentage of bad_content
+    every: 10s
+    units: %
+     warn: $this >= 10 AND $this < 40
+     crit: $this >= 40
+    delay: down 5m multiplier 1.5 max 1h
+  summary: HTTP check for ${label:url} unexpected content
+     info: Percentage of HTTP responses from ${label:url} with unexpected content in the last 5 minutes
+       to: webmaster
+
+ template: httpcheck_web_service_bad_status
+       on: httpcheck.status
+    class: Workload
+     type: Web Server
+component: HTTP endpoint
+   lookup: average -5m unaligned percentage of bad_status
+    every: 10s
+    units: %
+     warn: $this >= 10 AND $this < 40
+     crit: $this >= 40
+    delay: down 5m multiplier 1.5 max 1h
+  summary: HTTP check for ${label:url} unexpected status
+     info: Percentage of HTTP responses from ${label:url} with unexpected status in the last 5 minutes
+       to: webmaster
+
+ template: httpcheck_web_service_bad_header
+       on: httpcheck.status
+    class: Errors
+     type: Web Server
+component: HTTP endpoint
+   lookup: average -5m unaligned percentage of bad_header
+    every: 10s
+    units: %
+     warn: $this >= 10 AND $this < 40
+     crit: $this >= 40
+    delay: down 5m multiplier 1.5 max 1h
+  summary: HTTP check for ${label:url} unexpected header
+     info: Percentage of HTTP responses from ${label:url} with unexpected header in the last 5 minutes
+       to: webmaster
+
+ template: httpcheck_web_service_timeouts
+       on: httpcheck.status
+    class: Latency
+     type: Web Server
+component: HTTP endpoint
+   lookup: average -5m unaligned percentage of timeout
+    every: 10s
+    units: %
+     warn: $this >= 10 AND $this < 40
+     crit: $this >= 40
+    delay: down 5m multiplier 1.5 max 1h
+  summary: HTTP check for ${label:url} timeouts
+     info: Percentage of timed-out HTTP requests to ${label:url} in the last 5 minutes
+       to: webmaster
+
+ template: httpcheck_web_service_no_connection
+       on: httpcheck.status
+    class: Errors
+     type: Other
+component: HTTP endpoint
+   lookup: average -5m unaligned percentage of no_connection
+    every: 10s
+    units: %
+     warn: $this >= 10 AND $this < 40
+     crit: $this >= 40
+    delay: down 5m multiplier 1.5 max 1h
+  summary: HTTP check for ${label:url} failed requests
+     info: Percentage of failed HTTP requests to ${label:url} in the last 5 minutes
+       to: webmaster
diff --git a/health/health.d/ioping.conf b/src/health/health.d/ioping.conf
index 6d832bf00..6d832bf00 100644
--- a/health/health.d/ioping.conf
+++ b/src/health/health.d/ioping.conf
diff --git a/src/health/health.d/ipc.conf b/src/health/health.d/ipc.conf
new file mode 100644
index 000000000..f46cf4285
--- /dev/null
+++ b/src/health/health.d/ipc.conf
@@ -0,0 +1,32 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+      alarm: semaphores_used
+         on: system.ipc_semaphores
+      class: Utilization
+       type: System
+  component: IPC
+host labels: _os=linux
+       calc: $semaphores * 100 / $ipc_semaphores_max
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (70) : (80))
+      delay: down 5m multiplier 1.5 max 1h
+    summary: IPC semaphores used
+       info: IPC semaphore utilization
+         to: sysadmin
+
+      alarm: semaphore_arrays_used
+         on: system.ipc_semaphore_arrays
+      class: Utilization
+       type: System
+  component: IPC
+host labels: _os=linux
+       calc: $arrays * 100 / $ipc_semaphores_arrays_max
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (70) : (80))
+      delay: down 5m multiplier 1.5 max 1h
+    summary: IPC semaphore arrays used
+       info: IPC semaphore arrays utilization
+         to: sysadmin
diff --git a/health/health.d/ipfs.conf b/src/health/health.d/ipfs.conf
index 4dfee3c7f..4dfee3c7f 100644
--- a/health/health.d/ipfs.conf
+++ b/src/health/health.d/ipfs.conf
diff --git a/health/health.d/ipmi.conf b/src/health/health.d/ipmi.conf
index cec2320a9..cec2320a9 100644
--- a/health/health.d/ipmi.conf
+++ b/src/health/health.d/ipmi.conf
diff --git a/src/health/health.d/isc_dhcpd.conf b/src/health/health.d/isc_dhcpd.conf
new file mode 100644
index 000000000..3f6e9d5d4
--- /dev/null
+++ b/src/health/health.d/isc_dhcpd.conf
@@ -0,0 +1,15 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: isc_dhcpd_dhcp_pool_utilization
+       on: isc_dhcpd.dhcp_pool_utilization
+    class: Utilization
+     type: DHCP
+component: DHCPd
+    every: 10s
+    units: %
+     calc: $used
+     warn: $this > ( ($status >= $WARNING ) ? ( 80 ) : ( 90 ) )
+    delay: down 5m
+  summary: ISC DHCP pool ${label:dhcp_pool_name} utilization
+     info: ISC DHCP pool ${label:dhcp_pool_name} utilization
+       to: sysadmin
diff --git a/health/health.d/kubelet.conf b/src/health/health.d/kubelet.conf
index 8adf5f7d4..8adf5f7d4 100644
--- a/health/health.d/kubelet.conf
+++ b/src/health/health.d/kubelet.conf
diff --git a/health/health.d/linux_power_supply.conf b/src/health/health.d/linux_power_supply.conf
index b0d35e752..b0d35e752 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/src/health/health.d/linux_power_supply.conf
diff --git a/src/health/health.d/load.conf b/src/health/health.d/load.conf
new file mode 100644
index 000000000..e639c9ad5
--- /dev/null
+++ b/src/health/health.d/load.conf
@@ -0,0 +1,67 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# Calculate the base trigger point for the load average alarms.
+# This is the maximum number of CPU's in the system over the past 1
+# minute, with a special case for a single CPU of setting the trigger at 2.
+      alarm: load_cpu_number
+         on: system.load
+      class: Utilization
+       type: System
+  component: Load
+host labels: _os=linux
+       calc: ($active_processors == nan or $active_processors == 0) ? (nan) : ( ($active_processors < 2) ? ( 2 ) : ( $active_processors ) )
+      units: cpus
+      every: 1m
+       info: Number of active CPU cores in the system
+
+# Send alarms if the load average is unusually high.
+# These intentionally _do not_ calculate the average over the sampled
+# time period because the values being checked already are averages.
+
+      alarm: load_average_15
+         on: system.load
+      class: Utilization
+       type: System
+  component: Load
+host labels: _os=linux
+     lookup: max -1m unaligned of load15
+       calc: ($load_cpu_number == nan) ? (nan) : ($this)
+      units: load
+      every: 1m
+       warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 175 : 200)
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System load average (15 minutes)
+       info: System load average for the past 15 minutes
+         to: silent
+
+      alarm: load_average_5
+         on: system.load
+      class: Utilization
+       type: System
+  component: Load
+host labels: _os=linux
+     lookup: max -1m unaligned of load5
+       calc: ($load_cpu_number == nan) ? (nan) : ($this)
+      units: load
+      every: 1m
+       warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 350 : 400)
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System load average (5 minutes)
+       info: System load average for the past 5 minutes
+         to: silent
+
+      alarm: load_average_1
+         on: system.load
+      class: Utilization
+       type: System
+  component: Load
+host labels: _os=linux
+     lookup: max -1m unaligned of load1
+       calc: ($load_cpu_number == nan) ? (nan) : ($this)
+      units: load
+      every: 1m
+       warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 700 : 800)
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System load average (1 minute)
+       info: System load average for the past 1 minute
+         to: silent
diff --git a/src/health/health.d/lvm.conf b/src/health/health.d/lvm.conf
new file mode 100644
index 000000000..570aa14d3
--- /dev/null
+++ b/src/health/health.d/lvm.conf
@@ -0,0 +1,31 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+    template: lvm_lv_data_space_utilization
+          on: lvm.lv_data_space_utilization
+       class: Utilization
+        type: System
+   component: LVM
+        calc: $utilization
+       units: %
+       every: 1m
+        warn: $this > (($status >= $WARNING ) ? (85) : (90))
+        crit: ($this > (($status == $CRITICAL) ? (90) : (98)))
+       delay: down 5m multiplier 1.5 max 1h
+     summary: LVM LV ${label:lv_name} on VG ${label:vg_name} high data space usage
+        info: LVM logical volume high data space usage (LV ${label:lv_name} VG ${label:vg_name} Type ${label:volume_type})
+          to: sysadmin
+
+    template: lvm_lv_metadata_space_utilization
+          on: lvm.lv_metadata_space_utilization
+       class: Utilization
+        type: System
+   component: LVM
+        calc: $utilization
+       units: %
+       every: 1m
+        warn: $this > (($status >= $WARNING ) ? (85) : (90))
+        crit: ($this > (($status == $CRITICAL) ? (90) : (98)))
+       delay: down 5m multiplier 1.5 max 1h
+     summary: LVM LV ${label:lv_name} on VG ${label:vg_name} high metadata space usage
+        info: LVM logical volume high metadata space usage (LV ${label:lv_name} VG ${label:vg_name} Type ${label:volume_type})
+          to: sysadmin
diff --git a/health/health.d/mdstat.conf b/src/health/health.d/mdstat.conf
index 90f97d851..90f97d851 100644
--- a/health/health.d/mdstat.conf
+++ b/src/health/health.d/mdstat.conf
diff --git a/src/health/health.d/megacli.conf b/src/health/health.d/megacli.conf
new file mode 100644
index 000000000..27721fa9a
--- /dev/null
+++ b/src/health/health.d/megacli.conf
@@ -0,0 +1,77 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# Adapters (controllers)
+
+ template: megacli_adapter_health_state
+       on: megacli.adapter_health_state
+    class: Errors
+     type: System
+component: RAID
+   lookup: average -1m unaligned percentage of optimal
+    units: %
+    every: 10s
+     crit: $this < 100
+    delay: down 5m multiplier 2 max 10m
+  summary: MegaCLI adapter ${label:adapter_number} health
+     info: MegaCLI adapter ${label:adapter_number} is in the degraded state
+       to: sysadmin
+
+ template: megacli_phys_drive_media_errors
+       on: megacli.phys_drive_media_errors
+    class: Errors
+     type: System
+component: RAID
+   lookup: sum -10s
+    units: media errors
+    every: 10s
+     warn: $this > 0
+    delay: up 1m down 5m multiplier 2 max 10m
+  summary: MegaCLI PD adapter ${label:adapter_number} slot ${label:slot_number} media errors
+     info: MegaCLI physical drive adapter ${label:adapter_number} slot ${label:slot_number} media errors
+       to: sysadmin
+
+# Physical Drives
+
+ template: megacli_phys_drive_predictive_failures
+       on: megacli.phys_drive_predictive_failures
+    class: Errors
+     type: System
+component: RAID
+   lookup: sum -10s
+    units: failures
+    every: 10s
+     warn: $this > 0
+    delay: up 1m down 5m multiplier 2 max 10m
+  summary: MegaCLI PD adapter ${label:adapter_number} slot ${label:slot_number} predictive failures
+     info: MegaCLI physical drive (adapter ${label:adapter_number} slot ${label:slot_number}) predictive failures
+       to: sysadmin
+
+# Backup Battery Unit
+
+ template: megacli_bbu_charge
+       on: megacli.bbu_charge
+    class: Workload
+     type: System
+component: RAID
+   lookup: average -10s
+    units: percent
+    every: 10s
+     warn: $this <= (($status >= $WARNING)  ? (85) : (80))
+     crit: $this <= (($status == $CRITICAL)  ? (50) : (40))
+  summary: MegaCLI BBU charge
+     info: MegaCLI Backup Battery Unit (adapter ${label:adapter_number}) average charge over the last minute
+       to: sysadmin
+
+ template: megacli_bbu_recharge_cycles
+       on: megacli.bbu_recharge_cycles
+    class: Workload
+     type: System
+component: RAID
+   lookup: average -10s
+    units: cycles
+    every: 10s
+     warn: $this >= 100
+     crit: $this >= 500
+  summary: MegaCLI BBU recharge cycles
+     info: MegaCLI Backup Battery Unit (adapter ${label:adapter_number}) recharge cycles
+       to: sysadmin
diff --git a/health/health.d/memcached.conf b/src/health/health.d/memcached.conf
index 77ca0afa9..77ca0afa9 100644
--- a/health/health.d/memcached.conf
+++ b/src/health/health.d/memcached.conf
diff --git a/src/health/health.d/memory.conf b/src/health/health.d/memory.conf
new file mode 100644
index 000000000..2b2b4e4da
--- /dev/null
+++ b/src/health/health.d/memory.conf
@@ -0,0 +1,76 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+      alarm: 1hour_memory_hw_corrupted
+         on: mem.hwcorrupt
+      class: Errors
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $HardwareCorrupted
+      units: MB
+      every: 10s
+       warn: $this > 0
+      delay: down 1h multiplier 1.5 max 1h
+    summary: System corrupted memory
+       info: Amount of memory corrupted due to a hardware failure
+         to: sysadmin
+
+## ECC Controller
+
+   template: ecc_memory_mc_correctable
+         on: mem.edac_mc_errors
+      class: Errors
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $correctable + $correctable_noinfo
+      units: errors
+      every: 1m
+       warn: $this > 0
+    summary: System ECC memory ${label:controller} correctable errors
+       info: Memory controller ${label:controller} ECC correctable errors
+         to: sysadmin
+
+   template: ecc_memory_mc_uncorrectable
+         on: mem.edac_mc_errors
+      class: Errors
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $uncorrectable + $uncorrectable_noinfo
+      units: errors
+      every: 1m
+       crit: $this > 0
+    summary: System ECC memory ${label:controller} uncorrectable errors
+       info: Memory controller ${label:controller} ECC uncorrectable errors
+         to: sysadmin
+
+## ECC DIMM
+
+   template: ecc_memory_dimm_correctable
+         on: mem.edac_mc_dimm_errors
+      class: Errors
+       type: System
+  component: Memory
+host labels: _os=linux
+      calc: $correctable
+      units: errors
+      every: 1m
+       warn: $this > 0
+    summary: System ECC memory DIMM ${label:dimm} correctable errors
+       info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors
+         to: sysadmin
+
+   template: ecc_memory_dimm_uncorrectable
+         on: mem.edac_mc_dimm_errors
+      class: Errors
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $uncorrectable
+      units: errors
+      every: 1m
+       crit: $this > 0
+    summary: System ECC memory DIMM ${label:dimm} uncorrectable errors
+       info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors
+         to: sysadmin
diff --git a/health/health.d/ml.conf b/src/health/health.d/ml.conf
index aef9b0368..b6a5df6dd 100644
--- a/health/health.d/ml.conf
+++ b/src/health/health.d/ml.conf
@@ -13,8 +13,6 @@
     class: Workload
      type: System
 component: ML
-       os: *
-    hosts: *
    lookup: average -1m of anomaly_rate
      calc: $this
     units: %
@@ -29,8 +27,6 @@ component: ML
 # if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
 # template: ml_5min_cpu_dims
 #       on: system.cpu
-#       os: linux
-#    hosts: *
 #   lookup: average -5m anomaly-bit foreach *
 #     calc: $this
 #    units: %
@@ -44,8 +40,6 @@ component: ML
 # if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
 # template: ml_5min_cpu_chart
 #       on: system.cpu
-#       os: linux
-#    hosts: *
 #   lookup: average -5m anomaly-bit of *
 #     calc: $this
 #    units: %
@@ -53,4 +47,3 @@ component: ML
 #     warn: $this > (($status >= $WARNING)  ? (5) : (20))
 #     crit: $this > (($status == $CRITICAL) ? (20) : (100))
 #     info: rolling 5min anomaly rate for system.cpu chart
-
diff --git a/health/health.d/mysql.conf b/src/health/health.d/mysql.conf
index 572560b4e..572560b4e 100644
--- a/health/health.d/mysql.conf
+++ b/src/health/health.d/mysql.conf
diff --git a/src/health/health.d/net.conf b/src/health/health.d/net.conf
new file mode 100644
index 000000000..448a3733d
--- /dev/null
+++ b/src/health/health.d/net.conf
@@ -0,0 +1,239 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# -----------------------------------------------------------------------------
+# net traffic overflow
+
+ template: interface_speed
+       on: net.net
+    class: Latency
+     type: System
+component: Network
+     calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max / 1000) : ( nan )
+    units: Mbit
+    every: 10s
+     info: Network interface ${label:device} current speed
+
+   template: 1m_received_traffic_overflow
+         on: net.net
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m unaligned absolute of received
+       calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan )
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (85) : (90))
+      delay: up 1m down 1m multiplier 1.5 max 1h
+    summary: System network interface ${label:device} inbound utilization
+       info: Average inbound utilization for the network interface ${label:device} over the last minute
+         to: silent
+
+   template: 1m_sent_traffic_overflow
+         on: net.net
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m unaligned absolute of sent
+       calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan )
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (85) : (90))
+      delay: up 1m down 1m multiplier 1.5 max 1h
+    summary: System network interface ${label:device} outbound utilization
+       info: Average outbound utilization for the network interface ${label:device} over the last minute
+         to: silent
+
+# -----------------------------------------------------------------------------
+# dropped packets
+
+# check if an interface is dropping packets
+# the alarm is checked every 1 minute
+# and examines the last 10 minutes of data
+#
+# it is possible to have expected packet drops on an interface for some network configurations
+# look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information
+
+ template: net_interface_inbound_packets
+       on: net.packets
+    class: Workload
+     type: System
+component: Network
+   lookup: sum -10m unaligned absolute of received
+    units: packets
+    every: 1m
+  summary: Network interface ${label:device} received packets
+     info: Received packets for the network interface ${label:device} in the last 10 minutes
+
+ template: net_interface_outbound_packets
+       on: net.packets
+    class: Workload
+     type: System
+component: Network
+   lookup: sum -10m unaligned absolute of sent
+    units: packets
+    every: 1m
+  summary: Network interface ${label:device} sent packets
+     info: Sent packets for the network interface ${label:device} in the last 10 minutes
+
+    template: inbound_packets_dropped_ratio
+          on: net.drops
+       class: Errors
+        type: System
+   component: Network
+chart labels: device=!wl* *
+      lookup: sum -10m unaligned absolute of inbound
+        calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0))
+       units: %
+       every: 1m
+        warn: $this >= 2
+       delay: up 1m down 1h multiplier 1.5 max 2h
+     summary: System network interface ${label:device} inbound drops
+        info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
+          to: silent
+
+    template: outbound_packets_dropped_ratio
+          on: net.drops
+       class: Errors
+        type: System
+   component: Network
+chart labels: device=!wl* *
+      lookup: sum -10m unaligned absolute of outbound
+        calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0))
+       units: %
+       every: 1m
+        warn: $this >= 2
+       delay: up 1m down 1h multiplier 1.5 max 2h
+     summary: System network interface ${label:device} outbound drops
+        info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
+          to: silent
+
+    template: wifi_inbound_packets_dropped_ratio
+          on: net.drops
+       class: Errors
+        type: System
+   component: Network
+ host labels: _os=linux
+chart labels: device=wl*
+      lookup: sum -10m unaligned absolute of received
+        calc: (($net_interface_inbound_packets > 10000) ? ($this * 100 / $net_interface_inbound_packets) : (0))
+       units: %
+       every: 1m
+        warn: $this >= 10
+       delay: up 1m down 1h multiplier 1.5 max 2h
+     summary: System network interface ${label:device} inbound drops ratio
+        info: Ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
+          to: silent
+
+    template: wifi_outbound_packets_dropped_ratio
+          on: net.drops
+       class: Errors
+        type: System
+   component: Network
+ host labels: _os=linux
+chart labels: device=wl*
+      lookup: sum -10m unaligned absolute of sent
+        calc: (($net_interface_outbound_packets > 1000) ? ($this * 100 / $net_interface_outbound_packets) : (0))
+       units: %
+       every: 1m
+        warn: $this >= 10
+       delay: up 1m down 1h multiplier 1.5 max 2h
+     summary: System network interface ${label:device} outbound drops ratio
+        info: Ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
+          to: silent
+
+# -----------------------------------------------------------------------------
+# interface errors
+
+   template: interface_inbound_errors
+         on: net.errors
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=freebsd
+     lookup: sum -10m unaligned absolute of inbound
+      units: errors
+      every: 1m
+       warn: $this >= 5
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System network interface ${label:device} inbound errors
+       info: Number of inbound errors for the network interface ${label:device} in the last 10 minutes
+         to: silent
+
+   template: interface_outbound_errors
+         on: net.errors
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=freebsd
+     lookup: sum -10m unaligned absolute of outbound
+      units: errors
+      every: 1m
+       warn: $this >= 5
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System network interface ${label:device} outbound errors
+       info: Number of outbound errors for the network interface ${label:device} in the last 10 minutes
+         to: silent
+
+# -----------------------------------------------------------------------------
+# FIFO errors
+
+# check if an interface is having FIFO
+# buffer errors
+# the alarm is checked every 1 minute
+# and examines the last 10 minutes of data
+
+   template: 10min_fifo_errors
+         on: net.fifo
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: sum -10m unaligned absolute
+      units: errors
+      every: 1m
+       warn: $this > 0
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System network interface ${label:device} FIFO errors
+       info: Number of FIFO errors for the network interface ${label:device} in the last 10 minutes
+         to: silent
+
+# -----------------------------------------------------------------------------
+# check for packet storms
+
+# 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
+# 2. do the same for the last 10s
+# 3. raise an alarm if the later is 10x or 20x the first
+# we assume the minimum packet storm should at least have
+# 10000 packets/s, average of the last 10 seconds
+
+   template: 1m_received_packets_rate
+         on: net.packets
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux freebsd
+     lookup: average -1m unaligned of received
+      units: packets
+      every: 10s
+       info: Average number of packets received by the network interface ${label:device} over the last minute
+
+   template: 10s_received_packets_storm
+         on: net.packets
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux freebsd
+     lookup: average -10s unaligned of received
+       calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
+      every: 10s
+      units: %
+       warn: $this > (($status >= $WARNING)?(200):(5000))
+       crit: $this > (($status == $CRITICAL)?(5000):(6000))
+    options: no-clear-notification
+    summary: System network interface ${label:device} inbound packet storm
+       info: Ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
+             compared to the rate over the last minute
+         to: silent
diff --git a/src/health/health.d/netfilter.conf b/src/health/health.d/netfilter.conf
new file mode 100644
index 000000000..e0a05c8de
--- /dev/null
+++ b/src/health/health.d/netfilter.conf
@@ -0,0 +1,18 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+      alarm: netfilter_conntrack_full
+         on: netfilter.conntrack_sockets
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: max -10s unaligned of connections
+       calc: $this * 100 / $netfilter_conntrack_max
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (85) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (95))
+      delay: down 5m multiplier 1.5 max 1h
+    summary: System Netfilter connection tracker utilization
+       info: Netfilter connection tracker table size utilization
+         to: sysadmin
diff --git a/health/health.d/nvme.conf b/src/health/health.d/nvme.conf
index aea402e88..aea402e88 100644
--- a/health/health.d/nvme.conf
+++ b/src/health/health.d/nvme.conf
diff --git a/health/health.d/pihole.conf b/src/health/health.d/pihole.conf
index c4db835ce..c4db835ce 100644
--- a/health/health.d/pihole.conf
+++ b/src/health/health.d/pihole.conf
diff --git a/src/health/health.d/ping.conf b/src/health/health.d/ping.conf
new file mode 100644
index 000000000..a91b231c3
--- /dev/null
+++ b/src/health/health.d/ping.conf
@@ -0,0 +1,50 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: ping_host_reachable
+       on: ping.host_packet_loss
+    class: Errors
+     type: Other
+component: Network
+   lookup: average -30s unaligned of loss
+     calc: ($this == nan) ? (nan) : ($this < 100)
+    units: up/down
+    every: 10s
+     crit: $this == 0
+    delay: down 30m multiplier 1.5 max 2h
+  summary: Host ${label:host} ping status
+     info: Network host ${label:host} reachability status
+       to: sysadmin
+
+ template: ping_packet_loss
+       on: ping.host_packet_loss
+    class: Errors
+     type: Other
+component: Network
+   lookup: average -10m unaligned of loss
+    green: 5
+      red: 10
+    units: %
+    every: 10s
+     warn: $this > $green
+     crit: $this > $red
+    delay: down 30m multiplier 1.5 max 2h
+  summary: Host ${label:host} ping packet loss
+     info: Packet loss percentage to the network host ${label:host} over the last 10 minutes
+       to: sysadmin
+
+ template: ping_host_latency
+       on: ping.host_rtt
+    class: Latency
+     type: Other
+component: Network
+   lookup: average -10s unaligned of avg
+    units: ms
+    every: 10s
+    green: 500
+      red: 1000
+     warn: $this > $green OR $max > $red
+     crit: $this > $red
+    delay: down 30m multiplier 1.5 max 2h
+  summary: Host ${label:host} ping latency
+     info: Average latency to the network host ${label:host} over the last 10 seconds
+       to: sysadmin
diff --git a/health/health.d/plugin.conf b/src/health/health.d/plugin.conf
index 8615a0213..8615a0213 100644
--- a/health/health.d/plugin.conf
+++ b/src/health/health.d/plugin.conf
diff --git a/health/health.d/portcheck.conf b/src/health/health.d/portcheck.conf
index 281731c86..281731c86 100644
--- a/health/health.d/portcheck.conf
+++ b/src/health/health.d/portcheck.conf
diff --git a/src/health/health.d/postgres.conf b/src/health/health.d/postgres.conf
new file mode 100644
index 000000000..17e418758
--- /dev/null
+++ b/src/health/health.d/postgres.conf
@@ -0,0 +1,216 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: postgres_total_connection_utilization
+       on: postgres.connections_utilization
+    class: Utilization
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of used
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (70) : (80))
+     crit: $this > (($status == $CRITICAL) ? (80) : (90))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL connection utilization
+     info: Average total connection utilization over the last minute
+       to: dba
+
+ template: postgres_acquired_locks_utilization
+       on: postgres.locks_utilization
+    class: Utilization
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of used
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (15) : (20))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL acquired locks utilization
+     info: Average acquired locks utilization over the last minute
+       to: dba
+
+ template: postgres_txid_exhaustion_perc
+       on: postgres.txid_exhaustion_perc
+    class: Utilization
+     type: Database
+component: PostgreSQL
+     calc: $txid_exhaustion	
+    units: %
+    every: 1m
+     warn: $this > 90
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL TXID exhaustion
+     info: Percent towards TXID wraparound
+       to: dba
+
+# Database alarms
+
+ template: postgres_db_cache_io_ratio
+       on: postgres.db_cache_io_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of miss
+     calc: 100 - $this
+    units: %
+    every: 1m
+     warn: $this < (($status >= $WARNING)  ? (70) : (60))
+     crit: $this < (($status == $CRITICAL) ? (60) : (50))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL DB ${label:database} cache hit ratio
+     info: Average cache hit ratio in db ${label:database} over the last minute
+       to: dba
+
+ template: postgres_db_transactions_rollback_ratio	
+       on: postgres.db_transactions_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -5m unaligned of rollback
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (0) : (2))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL DB ${label:database} aborted transactions
+     info: Average aborted transactions percentage in db ${label:database} over the last five minutes
+       to: dba
+
+ template: postgres_db_deadlocks_rate
+       on: postgres.db_deadlocks_rate
+    class: Errors
+     type: Database
+component: PostgreSQL
+   lookup: sum -1m unaligned of deadlocks
+    units: deadlocks
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (0) : (10))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL DB ${label:database} deadlocks rate
+     info: Number of deadlocks detected in db ${label:database} in the last minute
+       to: dba
+
+# Table alarms
+
+ template: postgres_table_cache_io_ratio
+       on: postgres.table_cache_io_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of miss
+     calc: 100 - $this
+    units: %
+    every: 1m
+     warn: $this < (($status >= $WARNING)  ? (70) : (60))
+     crit: $this < (($status == $CRITICAL) ? (60) : (50))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} cache hit ratio
+     info: Average cache hit ratio in db ${label:database} table ${label:table} over the last minute
+       to: dba
+
+ template: postgres_table_index_cache_io_ratio
+       on: postgres.table_index_cache_io_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of miss
+     calc: 100 - $this
+    units: %
+    every: 1m
+     warn: $this < (($status >= $WARNING)  ? (70) : (60))
+     crit: $this < (($status == $CRITICAL) ? (60) : (50))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} index cache hit ratio
+     info: Average index cache hit ratio in db ${label:database} table ${label:table} over the last minute
+       to: dba
+
+ template: postgres_table_toast_cache_io_ratio
+       on: postgres.table_toast_cache_io_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of miss
+     calc: 100 - $this
+    units: %
+    every: 1m
+     warn: $this < (($status >= $WARNING)  ? (70) : (60))
+     crit: $this < (($status == $CRITICAL) ? (60) : (50))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} toast cache hit ratio
+     info: Average TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
+       to: dba
+
+ template: postgres_table_toast_index_cache_io_ratio
+       on: postgres.table_toast_index_cache_io_ratio
+    class: Workload
+     type: Database
+component: PostgreSQL
+   lookup: average -1m unaligned of miss
+     calc: 100 - $this
+    units: %
+    every: 1m
+     warn: $this < (($status >= $WARNING)  ? (70) : (60))
+     crit: $this < (($status == $CRITICAL) ? (60) : (50))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} index toast hit ratio
+     info: average index TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
+       to: dba
+
+ template: postgres_table_bloat_size_perc
+       on: postgres.table_bloat_size_perc
+    class: Errors
+     type: Database
+component: PostgreSQL
+     calc: ($table_size > (1024 * 1024 * 100)) ? ($bloat) : (0)
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (60) : (70))
+     crit: $this > (($status == $CRITICAL) ? (70) : (80))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} bloat size
+     info: Bloat size percentage in db ${label:database} table ${label:table}
+       to: dba
+
+ template: postgres_table_last_autovacuum_time
+       on: postgres.table_autovacuum_since_time
+    class: Errors
+     type: Database
+component: PostgreSQL
+host labels: _hostname=!*
+     calc: $time
+    units: seconds
+    every: 1m
+     warn: $this != nan AND $this > (60 * 60 * 24 * 7)
+  summary: PostgreSQL table ${label:table} db ${label:database} last autovacuum
+     info: Time elapsed since db ${label:database} table ${label:table} was vacuumed by the autovacuum daemon
+       to: dba
+
+ template: postgres_table_last_autoanalyze_time
+       on: postgres.table_autoanalyze_since_time
+    class: Errors
+     type: Database
+component: PostgreSQL
+host labels: _hostname=!*
+     calc: $time
+    units: seconds
+    every: 1m
+     warn: $this != nan AND $this > (60 * 60 * 24 * 7)
+  summary: PostgreSQL table ${label:table} db ${label:database} last autoanalyze
+     info: Time elapsed since db ${label:database} table ${label:table} was analyzed by the autovacuum daemon
+       to: dba
+
+# Index alarms
+
+ template: postgres_index_bloat_size_perc
+       on: postgres.index_bloat_size_perc
+    class: Errors
+     type: Database
+component: PostgreSQL
+     calc: ($index_size > (1024 * 1024 * 10)) ? ($bloat) : (0)
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (60) : (70))
+     crit: $this > (($status == $CRITICAL) ? (70) : (80))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: PostgreSQL table ${label:table} db ${label:database} index bloat size
+     info: Bloat size percentage in db ${label:database} table ${label:table} index ${label:index}
+       to: dba
diff --git a/health/health.d/processes.conf b/src/health/health.d/processes.conf
index 8f2e0fda5..2029c76e4 100644
--- a/health/health.d/processes.conf
+++ b/src/health/health.d/processes.conf
@@ -5,7 +5,6 @@
     class: Workload
      type: System
 component: Processes
-    hosts: *
      calc: $active * 100 / $pidmax
     units: %
     every: 5s
diff --git a/src/health/health.d/python.d.plugin.conf b/src/health/health.d/python.d.plugin.conf
new file mode 100644
index 000000000..f962b07f2
--- /dev/null
+++ b/src/health/health.d/python.d.plugin.conf
@@ -0,0 +1,17 @@
+# make sure python.d.plugin data collection job is running
+
+   template: python.d_job_last_collected_secs
+         on: netdata.pythond_runtime
+      class: Errors
+       type: Netdata
+  component: python.d.plugin
+host labels: _hostname=!*
+       calc: $now - $last_collected_t
+      units: seconds ago
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+       crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+      delay: down 5m multiplier 1.5 max 1h
+    summary: Python.d plugin last collection
+       info: Number of seconds since the last successful data collection
+         to: webmaster
diff --git a/src/health/health.d/qos.conf b/src/health/health.d/qos.conf
new file mode 100644
index 000000000..f524a1578
--- /dev/null
+++ b/src/health/health.d/qos.conf
@@ -0,0 +1,16 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# check if a QoS class is dropping packets
+# the alarm is checked every 10 seconds
+# and examines the last minute of data
+
+   template: 10min_qos_packet_drops
+         on: tc.qos_dropped
+host labels: _os=linux
+     lookup: sum -5m unaligned absolute
+      every: 30s
+       warn: $this > 0
+      units: packets
+    summary: QOS packet drops
+       info: Dropped packets in the last 5 minutes
+         to: silent
diff --git a/src/health/health.d/ram.conf b/src/health/health.d/ram.conf
new file mode 100644
index 000000000..573bc0aca
--- /dev/null
+++ b/src/health/health.d/ram.conf
@@ -0,0 +1,76 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+      alarm: ram_in_use
+         on: system.ram
+      class: Utilization
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $used * 100 / ($used + $cached + $free + $buffers)
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System memory utilization
+       info: System memory utilization
+         to: sysadmin
+
+      alarm: ram_available
+         on: mem.available
+      class: Utilization
+       type: System
+  component: Memory
+host labels: _os=linux
+       calc: $avail * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
+      units: %
+      every: 10s
+       warn: $this < (($status >= $WARNING)  ? (15) : (10))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System available memory
+       info: Percentage of estimated amount of RAM available for userspace processes, without causing swapping
+         to: silent
+
+      alarm: oom_kill
+         on: mem.oom_kill
+host labels: _os=linux
+     lookup: sum -30m unaligned
+      units: kills
+      every: 5m
+       warn: $this > 0
+      delay: down 10m
+    summary: System OOM kills
+       info: Number of out of memory kills in the last 30 minutes
+         to: silent
+
+## FreeBSD
+      alarm: ram_in_use
+         on: system.ram
+      class: Utilization
+       type: System
+  component: Memory
+host labels: _os=freebsd
+       calc: ($active + $wired + $laundry + $buffers) * 100 / ($active + $wired + $laundry + $buffers + $cache + $free + $inactive)
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System memory utilization
+       info: System memory utilization
+         to: sysadmin
+
+      alarm: ram_available
+         on: mem.available
+      class: Utilization
+       type: System
+  component: Memory
+host labels: _os=freebsd
+       calc: $avail * 100 / ($system.ram.free + $system.ram.active + $system.ram.inactive + $system.ram.wired + $system.ram.cache + $system.ram.laundry + $system.ram.buffers)
+      units: %
+      every: 10s
+       warn: $this < (($status >= $WARNING)  ? (15) : (10))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System available memory
+       info: Percentage of estimated amount of RAM available for userspace processes, without causing swapping
+         to: silent
diff --git a/src/health/health.d/redis.conf b/src/health/health.d/redis.conf
new file mode 100644
index 000000000..4f82830a9
--- /dev/null
+++ b/src/health/health.d/redis.conf
@@ -0,0 +1,58 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: redis_connections_rejected
+       on: redis.connections
+    class: Errors
+     type: KV Storage
+component: Redis
+   lookup: sum -1m unaligned of rejected
+    every: 10s
+    units: connections
+     warn: $this > 0
+  summary: Redis rejected connections
+     info: Connections rejected because of maxclients limit in the last minute
+    delay: down 5m multiplier 1.5 max 1h
+       to: dba
+
+ template: redis_bgsave_broken
+       on: redis.bgsave_health
+    class: Errors
+     type: KV Storage
+component: Redis
+    every: 10s
+     calc: $last_bgsave != nan AND $last_bgsave != 0
+     crit: $this
+    units: ok/failed
+  summary: Redis background save
+     info: Status of the last RDB save operation (0: ok, 1: error)
+    delay: down 5m multiplier 1.5 max 1h
+       to: dba
+
+ template: redis_bgsave_slow
+       on: redis.bgsave_now
+    class: Latency
+     type: KV Storage
+component: Redis
+    every: 10s
+     calc: $current_bgsave_time
+     warn: $this > 600
+     crit: $this > 1200
+    units: seconds
+  summary: Redis slow background save
+     info: Duration of the on-going RDB save operation
+    delay: down 5m multiplier 1.5 max 1h
+       to: dba
+
+ template: redis_master_link_down
+       on: redis.master_link_down_since_time
+    class: Errors
+     type: KV Storage
+component: Redis
+    every: 10s
+     calc: $time
+    units: seconds
+     crit: $this != nan AND $this > 0
+  summary: Redis master link down
+     info: Time elapsed since the link between master and slave is down
+    delay: down 5m multiplier 1.5 max 1h
+       to: dba
diff --git a/health/health.d/retroshare.conf b/src/health/health.d/retroshare.conf
index c665430fa..c665430fa 100644
--- a/health/health.d/retroshare.conf
+++ b/src/health/health.d/retroshare.conf
diff --git a/health/health.d/riakkv.conf b/src/health/health.d/riakkv.conf
index 677e3cb4f..677e3cb4f 100644
--- a/health/health.d/riakkv.conf
+++ b/src/health/health.d/riakkv.conf
diff --git a/health/health.d/scaleio.conf b/src/health/health.d/scaleio.conf
index b089cb85e..b089cb85e 100644
--- a/health/health.d/scaleio.conf
+++ b/src/health/health.d/scaleio.conf
diff --git a/src/health/health.d/softnet.conf b/src/health/health.d/softnet.conf
new file mode 100644
index 000000000..03a4ceebd
--- /dev/null
+++ b/src/health/health.d/softnet.conf
@@ -0,0 +1,53 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# check for common /proc/net/softnet_stat errors
+
+      alarm: 1min_netdev_backlog_exceeded
+         on: system.softnet_stat
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m unaligned absolute of dropped
+      units: packets
+      every: 10s
+       warn: $this > (($status >= $WARNING) ? (0) : (10))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System netdev dropped packets
+       info: Average number of dropped packets in the last minute \
+             due to exceeded net.core.netdev_max_backlog
+         to: silent
+
+      alarm: 1min_netdev_budget_ran_outs
+         on: system.softnet_stat
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m unaligned absolute of squeezed
+      units: events
+      every: 10s
+       warn: $this > (($status >= $WARNING) ? (0) : (10))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System netdev budget run outs
+       info: Average number of times ksoftirq ran out of sysctl net.core.netdev_budget or \
+             net.core.netdev_budget_usecs with work remaining over the last minute \
+             (this can be a cause for dropped packets)
+         to: silent
+
+      alarm: 10min_netisr_backlog_exceeded
+         on: system.softnet_stat
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=freebsd
+     lookup: average -1m unaligned absolute of qdrops
+      units: packets
+      every: 10s
+       warn: $this > (($status >= $WARNING) ? (0) : (10))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System netisr drops
+       info: Average number of drops in the last minute \
+             due to exceeded sysctl net.route.netisr_maxqlen \
+             (this can be a cause for dropped packets)
+         to: silent
diff --git a/src/health/health.d/storcli.conf b/src/health/health.d/storcli.conf
new file mode 100644
index 000000000..be71b517e
--- /dev/null
+++ b/src/health/health.d/storcli.conf
@@ -0,0 +1,61 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# Controllers
+
+ template: storcli_controller_health_status
+       on: storcli.controller_health_status
+    class: Errors
+     type: System
+component: RAID
+   lookup: average -1m unaligned percentage of healthy
+    units: %
+    every: 10s
+     crit: $this < 100
+    delay: down 5m multiplier 2 max 10m
+  summary: RAID controller ${label:controller_number} health
+     info: RAID controller ${label:controller_number} is unhealthy
+       to: sysadmin
+
+ template: storcli_controller_bbu_status
+       on: storcli.controller_bbu_status
+    class: Errors
+     type: System
+component: RAID
+   lookup: average -1m unaligned percentage of healthy,na
+    units: %
+    every: 10s
+     crit: $this < 100
+    delay: down 5m multiplier 2 max 10m
+  summary: RAID controller ${label:controller_number} BBU health
+     info: RAID controller ${label:controller_number} BBU is unhealthy
+       to: sysadmin
+
+# Physical Drives
+
+ template: storcli_phys_drive_errors
+       on: storcli.phys_drive_errors
+    class: Errors
+     type: System
+component: RAID
+   lookup: sum -10s
+    units: errors
+    every: 10s
+     warn: $this > 0
+    delay: up 1m down 5m multiplier 2 max 10m
+  summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+     info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+       to: sysadmin
+
+ template: storcli_phys_drive_predictive_failures
+       on: storcli.phys_drive_predictive_failures
+    class: Errors
+     type: System
+component: RAID
+   lookup: sum -10s
+    units: failures
+    every: 10s
+     warn: $this > 0
+    delay: up 1m down 5m multiplier 2 max 10m
+  summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+     info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+       to: sysadmin
diff --git a/src/health/health.d/swap.conf b/src/health/health.d/swap.conf
new file mode 100644
index 000000000..297aebd1e
--- /dev/null
+++ b/src/health/health.d/swap.conf
@@ -0,0 +1,34 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+      alarm: 30min_ram_swapped_out
+         on: mem.swapio
+      class: Workload
+       type: System
+  component: Memory
+host labels: _os=linux freebsd
+     lookup: sum -30m unaligned absolute of out
+             # we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024
+       calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
+      units: % of RAM
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (20) : (30))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System memory swapped out
+       info: Percentage of the system RAM swapped in the last 30 minutes
+         to: silent
+
+      alarm: used_swap
+         on: mem.swap
+      class: Utilization
+       type: System
+  component: Memory
+host labels: _os=linux freebsd
+       calc: (($used + $free) > 0) ? ($used * 100 / ($used + $free)) : 0
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: up 30s down 15m multiplier 1.5 max 1h
+    summary: System swap memory utilization
+       info: Swap memory utilization
+         to: sysadmin
diff --git a/health/health.d/synchronization.conf b/src/health/health.d/synchronization.conf
index 6c947d90b..28b1817ac 100644
--- a/health/health.d/synchronization.conf
+++ b/src/health/health.d/synchronization.conf
@@ -2,7 +2,6 @@
       on: mem.sync
   lookup: sum -1m of sync
    units: calls
-  plugin: ebpf.plugin
    every: 1m
     warn: $this > 6
    delay: up 1m down 10m multiplier 1.5 max 1h
diff --git a/src/health/health.d/systemdunits.conf b/src/health/health.d/systemdunits.conf
new file mode 100644
index 000000000..bb5c627e8
--- /dev/null
+++ b/src/health/health.d/systemdunits.conf
@@ -0,0 +1,177 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+## Service units
+    template: systemd_service_unit_failed_state
+          on: systemd.service_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd service unit in the failed state
+          to: sysadmin
+
+## Socket units
+    template: systemd_socket_unit_failed_state
+          on: systemd.socket_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd socket unit in the failed state
+          to: sysadmin
+
+## Target units
+    template: systemd_target_unit_failed_state
+          on: systemd.target_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd target unit in the failed state
+          to: sysadmin
+
+## Path units
+    template: systemd_path_unit_failed_state
+          on: systemd.path_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd path unit in the failed state
+          to: sysadmin
+
+## Device units
+    template: systemd_device_unit_failed_state
+          on: systemd.device_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd device unit in the failed state
+          to: sysadmin
+
+## Mount units
+    template: systemd_mount_unit_failed_state
+          on: systemd.mount_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd mount units in the failed state
+          to: sysadmin
+
+## Automount units
+    template: systemd_automount_unit_failed_state
+          on: systemd.automount_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd automount unit in the failed state
+          to: sysadmin
+
+## Swap units
+    template: systemd_swap_unit_failed_state
+          on: systemd.swap_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd swap units in the failed state
+          to: sysadmin
+
+## Scope units
+    template: systemd_scope_unit_failed_state
+          on: systemd.scope_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd scope units in the failed state
+          to: sysadmin
+
+## Slice units
+    template: systemd_slice_unit_failed_state
+          on: systemd.slice_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd slice units in the failed state
+          to: sysadmin
+
+## Timer units
+    template: systemd_timer_unit_failed_state
+          on: systemd.timer_unit_state
+       class: Errors
+        type: Linux
+   component: Systemd units
+chart labels: unit_name=!*
+        calc: $failed
+       units: state
+       every: 10s
+        warn: $this != nan AND $this == 1
+       delay: down 5m multiplier 1.5 max 1h
+     summary: systemd unit ${label:unit_name} state
+        info: systemd timer unit in the failed state
+          to: sysadmin
diff --git a/src/health/health.d/tcp_conn.conf b/src/health/health.d/tcp_conn.conf
new file mode 100644
index 000000000..fe4b98db0
--- /dev/null
+++ b/src/health/health.d/tcp_conn.conf
@@ -0,0 +1,21 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# ${tcp_max_connections} may be nan or -1 if the system
+# supports dynamic threshold for TCP connections.
+# In this case, the alarm will always be zero.
+
+      alarm: tcp_connections
+         on: ip.tcpsock
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+       calc: (${tcp_max_connections} > 0) ? ( ${connections} * 100 / ${tcp_max_connections} ) : 0
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING ) ? ( 60 ) : ( 80 ))
+       crit: $this > (($status == $CRITICAL) ? ( 80 ) : ( 90 ))
+      delay: up 0 down 5m multiplier 1.5 max 1h
+    summary: System TCP connections utilization
+       info: IPv4 TCP connections utilization
+         to: sysadmin
diff --git a/src/health/health.d/tcp_listen.conf b/src/health/health.d/tcp_listen.conf
new file mode 100644
index 000000000..bdcce79d4
--- /dev/null
+++ b/src/health/health.d/tcp_listen.conf
@@ -0,0 +1,93 @@
+# There are two queues involved when incoming TCP connections are handled
+# (both at the kernel):
+#
+# SYN queue
+# The SYN queue tracks TCP handshakes until connections are fully established.
+# It overflows when too many incoming TCP connection requests hang in the
+# half-open state and the server is not configured to fall back to SYN cookies.
+# Overflows are usually caused by SYN flood DoS attacks (i.e. someone sends
+# lots of SYN packets and never completes the handshakes).
+#
+# Accept queue
+# The accept queue holds fully established TCP connections waiting to be handled
+# by the listening application. It overflows when the server application fails
+# to accept new connections at the rate they are coming in.
+#
+#
+# -----------------------------------------------------------------------------
+# tcp accept queue (at the kernel)
+
+      alarm: 1m_tcp_accept_queue_overflows
+         on: ip.tcp_accept_queue
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -60s unaligned absolute of ListenOverflows
+      units: overflows
+      every: 10s
+       warn: $this > 1
+       crit: $this > (($status == $CRITICAL) ? (1) : (5))
+      delay: up 0 down 5m multiplier 1.5 max 1h
+    summary: System TCP accept queue overflows
+       info: Average number of overflows in the TCP accept queue over the last minute
+         to: silent
+
+# THIS IS TOO GENERIC
+# CHECK: https://github.com/netdata/netdata/issues/3234#issuecomment-423935842
+      alarm: 1m_tcp_accept_queue_drops
+         on: ip.tcp_accept_queue
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -60s unaligned absolute of ListenDrops
+      units: drops
+      every: 10s
+       warn: $this > 1
+       crit: $this > (($status == $CRITICAL) ? (1) : (5))
+      delay: up 0 down 5m multiplier 1.5 max 1h
+    summary: System TCP accept queue dropped packets
+       info: Average number of dropped packets in the TCP accept queue over the last minute
+         to: silent
+
+# -----------------------------------------------------------------------------
+# tcp SYN queue (at the kernel)
+
+# When the SYN queue is full, either TcpExtTCPReqQFullDoCookies or
+# TcpExtTCPReqQFullDrop is incremented, depending on whether SYN cookies are
+# enabled or not. In both cases this probably indicates a SYN flood attack,
+# so i guess a notification should be sent.
+
+      alarm: 1m_tcp_syn_queue_drops
+         on: ip.tcp_syn_queue
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -60s unaligned absolute of TCPReqQFullDrop
+      units: drops
+      every: 10s
+       warn: $this > 1
+       crit: $this > (($status == $CRITICAL) ? (0) : (5))
+      delay: up 10 down 5m multiplier 1.5 max 1h
+    summary: System  TCP SYN queue drops
+       info: Average number of SYN requests was dropped due to the full TCP SYN queue over the last minute \
+             (SYN cookies were not enabled)
+         to: silent
+
+      alarm: 1m_tcp_syn_queue_cookies
+         on: ip.tcp_syn_queue
+      class: Workload
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -60s unaligned absolute of TCPReqQFullDoCookies
+      units: cookies
+      every: 10s
+       warn: $this > 1
+       crit: $this > (($status == $CRITICAL) ? (0) : (5))
+      delay: up 10 down 5m multiplier 1.5 max 1h
+    summary: System TCP SYN queue cookies
+       info: Average number of sent SYN cookies due to the full TCP SYN queue over the last minute
+         to: silent
diff --git a/src/health/health.d/tcp_mem.conf b/src/health/health.d/tcp_mem.conf
new file mode 100644
index 000000000..b9350e3cd
--- /dev/null
+++ b/src/health/health.d/tcp_mem.conf
@@ -0,0 +1,22 @@
+# check
+# http://blog.tsunanet.net/2011/03/out-of-socket-memory.html
+#
+# We give a warning when TCP is under memory pressure
+# and a critical when TCP is 90% of its upper memory limit
+#
+
+      alarm: tcp_memory
+         on: ipv4.sockstat_tcp_mem
+      class: Utilization
+       type: System
+  component: Network
+host labels: _os=linux
+       calc: ${mem} * 100 / ${tcp_mem_high}
+      units: %
+      every: 10s
+       warn: ${mem} > (($status >= $WARNING  ) ? ( ${tcp_mem_pressure} * 0.8 ) : ( ${tcp_mem_pressure}   ))
+       crit: ${mem} > (($status == $CRITICAL ) ? ( ${tcp_mem_pressure}       ) : ( ${tcp_mem_high} * 0.9 ))
+      delay: up 0 down 5m multiplier 1.5 max 1h
+    summary: System TCP memory utilization
+       info: TCP memory utilization
+         to: silent
diff --git a/src/health/health.d/tcp_orphans.conf b/src/health/health.d/tcp_orphans.conf
new file mode 100644
index 000000000..7b2d95edb
--- /dev/null
+++ b/src/health/health.d/tcp_orphans.conf
@@ -0,0 +1,22 @@
+# check
+# http://blog.tsunanet.net/2011/03/out-of-socket-memory.html
+#
+# The kernel may penalize orphans by 2x or even 4x
+# so we alarm warning at 25% and critical at 50%
+#
+
+      alarm: tcp_orphans
+         on: ipv4.sockstat_tcp_sockets
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+       calc: ${orphan} * 100 / ${tcp_max_orphans}
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING ) ? ( 20 ) : ( 25 ))
+       crit: $this > (($status == $CRITICAL) ? ( 25 ) : ( 50 ))
+      delay: up 0 down 5m multiplier 1.5 max 1h
+    summary: System TCP orphan sockets utilization
+       info: Orphan IPv4 TCP sockets utilization
+         to: silent
diff --git a/src/health/health.d/tcp_resets.conf b/src/health/health.d/tcp_resets.conf
new file mode 100644
index 000000000..63f798d78
--- /dev/null
+++ b/src/health/health.d/tcp_resets.conf
@@ -0,0 +1,66 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# -----------------------------------------------------------------------------
+# tcp resets this host sends
+
+      alarm: 1m_ip_tcp_resets_sent
+         on: ip.tcphandshake
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m at -10s unaligned absolute of OutRsts
+      units: tcp resets/s
+      every: 10s
+       info: average number of sent TCP RESETS over the last minute
+
+      alarm: 10s_ip_tcp_resets_sent
+         on: ip.tcphandshake
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -10s unaligned absolute of OutRsts
+      units: tcp resets/s
+      every: 10s
+       warn: $netdata.uptime.uptime > (1 * 60) AND $this > ((($1m_ip_tcp_resets_sent < 5)?(5):($1m_ip_tcp_resets_sent)) * (($status >= $WARNING)  ? (1) : (10)))
+      delay: up 20s down 60m multiplier 1.2 max 2h
+    options: no-clear-notification
+    summary: System TCP outbound resets
+       info: Average number of sent TCP RESETS over the last 10 seconds. \
+             This can indicate a port scan, \
+             or that a service running on this host has crashed. \
+             Netdata will not send a clear notification for this alarm.
+         to: silent
+
+# -----------------------------------------------------------------------------
+# tcp resets this host receives
+
+      alarm: 1m_ip_tcp_resets_received
+         on: ip.tcphandshake
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux freebsd
+     lookup: average -1m at -10s unaligned absolute of AttemptFails
+      units: tcp resets/s
+      every: 10s
+       info: average number of received TCP RESETS over the last minute
+
+      alarm: 10s_ip_tcp_resets_received
+         on: ip.tcphandshake
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux freebsd
+     lookup: average -10s unaligned absolute of AttemptFails
+      units: tcp resets/s
+      every: 10s
+       warn: $netdata.uptime.uptime > (1 * 60) AND $this > ((($1m_ip_tcp_resets_received < 5)?(5):($1m_ip_tcp_resets_received)) * (($status >= $WARNING)  ? (1) : (10)))
+      delay: up 20s down 60m multiplier 1.2 max 2h
+    options: no-clear-notification
+    summary: System TCP inbound resets
+       info: average number of received TCP RESETS over the last 10 seconds. \
+             This can be an indication that a service this host needs has crashed. \
+             Netdata will not send a clear notification for this alarm.
+         to: silent
diff --git a/src/health/health.d/timex.conf b/src/health/health.d/timex.conf
new file mode 100644
index 000000000..053dc9290
--- /dev/null
+++ b/src/health/health.d/timex.conf
@@ -0,0 +1,17 @@
+# It can take several minutes before ntpd selects a server to synchronize with;
+# try checking after 17 minutes (1024 seconds).
+
+      alarm: system_clock_sync_state
+         on: system.clock_sync_state
+      class: Errors
+       type: System
+  component: Clock
+host labels: _os=linux
+       calc: $state
+      units: synchronization state
+      every: 10s
+       warn: $system.uptime.uptime > 17 * 60 AND $this == 0
+      delay: down 5m
+    summary: System clock sync state
+       info: When set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server
+         to: silent
diff --git a/src/health/health.d/udp_errors.conf b/src/health/health.d/udp_errors.conf
new file mode 100644
index 000000000..745c11e21
--- /dev/null
+++ b/src/health/health.d/udp_errors.conf
@@ -0,0 +1,37 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# -----------------------------------------------------------------------------
+# UDP receive buffer errors
+
+      alarm: 1m_ipv4_udp_receive_buffer_errors
+         on: ipv4.udperrors
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux freebsd
+     lookup: average -1m unaligned absolute of RcvbufErrors
+      units: errors
+      every: 10s
+       warn: $this > (($status >= $WARNING) ? (0) : (10))
+    summary: System UDP receive buffer errors
+       info: Average number of UDP receive buffer errors over the last minute
+      delay: up 1m down 60m multiplier 1.2 max 2h
+         to: silent
+
+# -----------------------------------------------------------------------------
+# UDP send buffer errors
+
+      alarm: 1m_ipv4_udp_send_buffer_errors
+         on: ipv4.udperrors
+      class: Errors
+       type: System
+  component: Network
+host labels: _os=linux
+     lookup: average -1m unaligned absolute of SndbufErrors
+      units: errors
+      every: 10s
+       warn: $this > (($status >= $WARNING) ? (0) : (10))
+    summary: System UDP send buffer errors
+       info: Average number of UDP send buffer errors over the last minute
+      delay: up 1m down 60m multiplier 1.2 max 2h
+         to: silent
diff --git a/health/health.d/unbound.conf b/src/health/health.d/unbound.conf
index 3c898f1d5..3c898f1d5 100644
--- a/health/health.d/unbound.conf
+++ b/src/health/health.d/unbound.conf
diff --git a/src/health/health.d/upsd.conf b/src/health/health.d/upsd.conf
new file mode 100644
index 000000000..17eb5263d
--- /dev/null
+++ b/src/health/health.d/upsd.conf
@@ -0,0 +1,46 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: upsd_10min_ups_load
+       on: upsd.ups_load
+    class: Utilization
+     type: Power Supply
+component: UPS
+   lookup: average -10m unaligned of load
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (70) : (80))
+     crit: $this > (($status == $CRITICAL) ? (85) : (95))
+    delay: down 10m multiplier 1.5 max 1h
+  summary: UPS ${label:ups_name} load
+     info: UPS ${label:ups_name} average load over the last 10 minutes
+       to: sitemgr
+
+ template: upsd_ups_battery_charge
+       on: upsd.ups_battery_charge	
+    class: Errors
+     type: Power Supply
+component: UPS
+   lookup: average -60s unaligned of charge
+    units: %
+    every: 60s
+     warn: $this < 75
+     crit: $this < 40
+    delay: down 10m multiplier 1.5 max 1h
+  summary: UPS ${label:ups_name} battery charge
+     info: UPS ${label:ups_name} average battery charge over the last minute
+       to: sitemgr
+
+ template: upsd_ups_last_collected_secs
+       on: upsd.ups_load
+    class: Latency
+     type: Power Supply
+component: UPS device
+     calc: $now - $last_collected_t
+    every: 10s
+    units: seconds ago
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+  summary: UPS ${label:ups_name} last collected
+     info: UPS ${label:ups_name} number of seconds since the last successful data collection
+       to: sitemgr
diff --git a/health/health.d/vcsa.conf b/src/health/health.d/vcsa.conf
index 3e20bfd1e..3e20bfd1e 100644
--- a/health/health.d/vcsa.conf
+++ b/src/health/health.d/vcsa.conf
diff --git a/health/health.d/vernemq.conf b/src/health/health.d/vernemq.conf
index 6ea9f99dc..6ea9f99dc 100644
--- a/health/health.d/vernemq.conf
+++ b/src/health/health.d/vernemq.conf
diff --git a/src/health/health.d/vsphere.conf b/src/health/health.d/vsphere.conf
new file mode 100644
index 000000000..e22f0b620
--- /dev/null
+++ b/src/health/health.d/vsphere.conf
@@ -0,0 +1,66 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# -----------------------------------------------Virtual Machine--------------------------------------------------------
+
+ template: vsphere_vm_cpu_utilization
+       on: vsphere.vm_cpu_utilization
+    class: Utilization
+     type: Virtual Machine
+component: CPU
+   lookup: average -10m unaligned match-names of used
+    units: %
+    every: 20s
+     warn: $this > (($status >= $WARNING)  ? (75) : (85))
+     crit: $this > (($status == $CRITICAL) ? (85) : (95))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: vSphere CPU utilization for VM ${label:vm}
+     info: CPU utilization VM ${label:vm} host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
+       to: silent
+
+ template: vsphere_vm_mem_utilization
+       on: vsphere.vm_mem_utilization
+    class: Utilization
+     type: Virtual Machine
+component: Memory
+     calc: $used
+    units: %
+    every: 20s
+     warn: $this > (($status >= $WARNING)  ? (80) : (90))
+     crit: $this > (($status == $CRITICAL) ? (90) : (98))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: vSphere memory utilization for VM ${label:vm}
+     info: Memory utilization VM ${label:vm} host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
+       to: silent
+
+# -----------------------------------------------ESXI host--------------------------------------------------------------
+
+ template: vsphere_host_cpu_utilization
+       on: vsphere.host_cpu_utilization
+    class: Utilization
+     type: Virtual Machine
+component: CPU
+   lookup: average -10m unaligned match-names of used
+    units: %
+    every: 20s
+     warn: $this > (($status >= $WARNING)  ? (75) : (85))
+     crit: $this > (($status == $CRITICAL) ? (85) : (95))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: vSphere ESXi CPU utilization for host ${label:host}
+     info: CPU utilization ESXi host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
+       to: sysadmin
+
+ template: vsphere_host_mem_utilization
+       on: vsphere.host_mem_utilization
+    class: Utilization
+     type: Virtual Machine
+component: Memory
+     calc: $used
+    units: %
+    every: 20s
+     warn: $this > (($status >= $WARNING)  ? (80) : (90))
+     crit: $this > (($status == $CRITICAL) ? (90) : (98))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: vSphere ESXi Ram utilization for host ${label:host}
+     info: Memory utilization ESXi host ${label:host} cluster ${label:cluster} datacenter ${label:datacenter}
+       to: sysadmin
diff --git a/health/health.d/web_log.conf b/src/health/health.d/web_log.conf
index 78f1cc7f5..78f1cc7f5 100644
--- a/health/health.d/web_log.conf
+++ b/src/health/health.d/web_log.conf
diff --git a/src/health/health.d/whoisquery.conf b/src/health/health.d/whoisquery.conf
new file mode 100644
index 000000000..6d87ad280
--- /dev/null
+++ b/src/health/health.d/whoisquery.conf
@@ -0,0 +1,14 @@
+
+ template: whoisquery_days_until_expiration
+       on: whoisquery.time_until_expiration
+    class: Utilization
+     type: Other
+component: WHOIS
+     calc: $expiry / 86400
+    units: days
+    every: 60s
+     warn: $this < $days_until_expiration_warning
+     crit: $this < $days_until_expiration_critical
+  summary: Whois expiration time for domain ${label:domain}
+     info: Time until the domain name registration for ${label:domain} expires
+       to: webmaster
diff --git a/src/health/health.d/windows.conf b/src/health/health.d/windows.conf
new file mode 100644
index 000000000..9dfda50c1
--- /dev/null
+++ b/src/health/health.d/windows.conf
@@ -0,0 +1,108 @@
+## CPU
+
+ template: windows_10min_cpu_usage
+       on: windows.cpu_utilization_total
+    class: Utilization
+     type: Windows
+component: CPU
+   lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING)  ? (75) : (85))
+     crit: $this > (($status == $CRITICAL) ? (85) : (95))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: CPU utilization
+     info: Average CPU utilization over the last 10 minutes
+       to: silent
+
+## Memory
+
+ template: windows_ram_in_use
+       on: windows.memory_utilization
+    class: Utilization
+     type: Windows
+component: Memory
+     calc: ($used) * 100 / ($used + $available)
+    units: %
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (80) : (90))
+     crit: $this > (($status == $CRITICAL) ? (90) : (98))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: Ram utilization
+     info: Memory utilization
+       to: sysadmin
+
+## Network
+
+ template: windows_inbound_packets_discarded
+       on: windows.net_nic_discarded
+    class: Errors
+     type: Windows
+component: Network
+   lookup: sum -10m unaligned absolute match-names of inbound
+    units: packets
+    every: 1m
+     warn: $this >= 5
+    delay: down 1h multiplier 1.5 max 2h
+  summary: Inbound network packets discarded
+     info: Number of inbound discarded packets for the network interface in the last 10 minutes
+       to: silent
+
+ template: windows_outbound_packets_discarded
+       on: windows.net_nic_discarded
+    class: Errors
+     type: Windows
+component: Network
+   lookup: sum -10m unaligned absolute match-names of outbound
+    units: packets
+    every: 1m
+     warn: $this >= 5
+    delay: down 1h multiplier 1.5 max 2h
+  summary: Outbound network packets discarded
+     info: Number of outbound discarded packets for the network interface in the last 10 minutes
+       to: silent
+
+ template: windows_inbound_packets_errors
+       on: windows.net_nic_errors
+    class: Errors
+     type: Windows
+component: Network
+   lookup: sum -10m unaligned absolute match-names of inbound
+    units: packets
+    every: 1m
+     warn: $this >= 5
+    delay: down 1h multiplier 1.5 max 2h
+  summary: Inbound network errors
+     info: Number of inbound errors for the network interface in the last 10 minutes
+       to: silent
+
+ template: windows_outbound_packets_errors
+       on: windows.net_nic_errors
+    class: Errors
+     type: Windows
+component: Network
+   lookup: sum -10m unaligned absolute match-names of outbound
+    units: packets
+    every: 1m
+     warn: $this >= 5
+    delay: down 1h multiplier 1.5 max 2h
+  summary: Outbound network errors
+     info: Number of outbound errors for the network interface in the last 10 minutes
+       to: silent
+
+## Disk
+
+ template: windows_disk_in_use
+       on: windows.logical_disk_space_usage
+    class: Utilization
+     type: Windows
+component: Disk
+     calc: ($used) * 100 / ($used + $free)
+    units: %
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? (80) : (90))
+     crit: $this > (($status == $CRITICAL) ? (90) : (98))
+    delay: down 15m multiplier 1.5 max 1h
+  summary: Disk space usage
+     info: Disk space utilization
+       to: sysadmin
diff --git a/src/health/health.d/x509check.conf b/src/health/health.d/x509check.conf
new file mode 100644
index 000000000..1d40c8602
--- /dev/null
+++ b/src/health/health.d/x509check.conf
@@ -0,0 +1,26 @@
+
+ template: x509check_days_until_expiration
+       on: x509check.time_until_expiration
+    class: Latency
+     type: Certificates
+component: x509 certificates
+     calc: $expiry / 86400
+    units: days
+    every: 60s
+     warn: $this < $days_until_expiration_warning
+     crit: $this < $days_until_expiration_critical
+  summary: x509 certificate expiration for ${label:source}
+     info: Time until x509 certificate expires for ${label:source}
+       to: webmaster
+      
+ template: x509check_revocation_status
+       on: x509check.revocation_status
+    class: Errors
+     type: Certificates
+component: x509 certificates
+     calc: $revoked
+    every: 60s
+     crit: $this != nan AND $this != 0
+  summary: x509 certificate revocation status for ${label:source}
+     info: x509 certificate revocation status (0: revoked, 1: valid) for ${label:source}
+       to: webmaster
diff --git a/src/health/health.d/zfs.conf b/src/health/health.d/zfs.conf
new file mode 100644
index 000000000..9c1f0018b
--- /dev/null
+++ b/src/health/health.d/zfs.conf
@@ -0,0 +1,90 @@
+
+    alarm: zfs_memory_throttle
+       on: zfs.memory_ops
+    class: Utilization
+     type: System
+component: File system
+   lookup: sum -10m unaligned absolute of throttled
+    units: events
+    every: 1m
+     warn: $this > 0
+    delay: down 1h multiplier 1.5 max 2h
+  summary: ZFS ARC growth throttling
+     info: number of times ZFS had to limit the ARC growth in the last 10 minutes
+       to: silent
+
+# ZFS pool state
+
+ template: zfs_pool_state_warn
+       on: zfspool.state
+    class: Errors
+     type: System
+component: File system
+     calc: $degraded
+    units: boolean
+    every: 10s
+     warn: $this > 0
+    delay: down 1m multiplier 1.5 max 1h
+  summary: ZFS pool ${label:pool} state
+     info: ZFS pool ${label:pool} state is degraded
+       to: sysadmin
+
+ template: zfs_pool_state_crit
+       on: zfspool.state
+    class: Errors
+     type: System
+component: File system
+     calc: $faulted + $unavail
+    units: boolean
+    every: 10s
+     crit: $this > 0
+    delay: down 1m multiplier 1.5 max 1h
+  summary: Critical ZFS pool ${label:pool} state
+     info: ZFS pool ${label:pool} state is faulted or unavail
+       to: sysadmin
+
+
+## go.d/zfspool
+
+ template: zfs_pool_space_utilization
+       on: zfspool.pool_space_utilization
+    class: Utilization
+     type: System
+component: File system
+     calc: $utilization
+    units: %
+    every: 1m
+     warn: $this > (($status >= $WARNING ) ? (85) : (90))
+     crit: $this > (($status >= $WARNING ) ? (90) : (98))
+    delay: down 1m multiplier 1.5 max 1h
+  summary: ZFS pool ${label:pool} space utilization
+     info: ZFS pool ${label:pool} is nearing capacity. Current space usage is above the threshold.
+       to: sysadmin
+
+ template: zfs_pool_health_state_warn
+       on: zfspool.pool_health_state
+    class: Errors
+     type: System
+component: File system
+     calc: $degraded
+    units: boolean
+    every: 10s
+     warn: $this > 0
+    delay: down 1m multiplier 1.5 max 1h
+  summary: ZFS pool ${label:pool} state
+     info: ZFS pool ${label:pool} state is degraded
+       to: sysadmin
+
+ template: zfs_pool_health_state_crit
+       on: zfspool.pool_health_state
+    class: Errors
+     type: System
+component: File system
+     calc: $faulted + $unavail
+    units: boolean
+    every: 10s
+     crit: $this > 0
+    delay: down 1m multiplier 1.5 max 1h
+  summary: Critical ZFS pool ${label:pool} state
+     info: ZFS pool ${label:pool} state is faulted or unavail
+       to: sysadmin