From 89f3604407aff8f4cb2ed958252c61e23c767e24 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Thu, 9 Jun 2022 06:52:39 +0200
Subject: Adding upstream version 1.35.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 health/QUICKSTART.md                          |   4 +-
 health/health.c                               | 126 +++++++++++++++++---------
 health/health.d/ram.conf                      |  18 +---
 health/health_config.c                        |   7 +-
 health/health_json.c                          |  11 ++-
 health/health_log.c                           |   6 +-
 health/notifications/Makefile.am              |   1 +
 health/notifications/alarm-notify.sh.in       |  69 +++++++++++++-
 health/notifications/gotify/Makefile.inc      |  11 +++
 health/notifications/gotify/README.md         |  62 +++++++++++++
 health/notifications/health_alarm_notify.conf |  22 +++++
 11 files changed, 264 insertions(+), 73 deletions(-)
 create mode 100644 health/notifications/gotify/Makefile.inc
 create mode 100644 health/notifications/gotify/README.md

(limited to 'health')

diff --git a/health/QUICKSTART.md b/health/QUICKSTART.md
index 5cf6929dc..bc2da2df1 100644
--- a/health/QUICKSTART.md
+++ b/health/QUICKSTART.md
@@ -41,9 +41,9 @@ address or hostname for your Agent dashboard, looking for the `stock health conf
 here will show the correct path for your installation.
 
 ```conf
-[health]
+[directories]
  ...
- # stock health configuration directory = /usr/lib/netdata/conf.d/health.d
+ # stock health config = /usr/lib/netdata/conf.d/health.d
 ```
 
 Navigate to the health configuration directory to see all the available files and open them for reading.
diff --git a/health/health.c b/health/health.c
index 528238d74..3c1e5693e 100644
--- a/health/health.c
+++ b/health/health.c
@@ -58,7 +58,7 @@ static inline void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae)
 inline char *health_user_config_dir(void) {
     char buffer[FILENAME_MAX + 1];
     snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_user_config_dir);
-    return config_get(CONFIG_SECTION_HEALTH, "health configuration directory", buffer);
+    return config_get(CONFIG_SECTION_DIRECTORIES, "health config", buffer);
 }
 
 /**
@@ -71,7 +71,7 @@ inline char *health_user_config_dir(void) {
 inline char *health_stock_config_dir(void) {
     char buffer[FILENAME_MAX + 1];
     snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_stock_config_dir);
-    return config_get(CONFIG_SECTION_HEALTH, "stock health configuration directory", buffer);
+    return config_get(CONFIG_SECTION_DIRECTORIES, "stock health config", buffer);
 }
 
 /**
@@ -354,7 +354,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
 
     char *edit_command = ae->source ? health_edit_command_from_source(ae->source) : strdupz("UNKNOWN=0=UNKNOWN");
 
-    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" CALCULATED_NUMBER_FORMAT_ZERO "' '" CALCULATED_NUMBER_FORMAT_ZERO "' '%s' '%u' '%u' '%s' '%s' '%s' '%s' '%s' '%s' '%d' '%d' '%s' '%s' '%s' '%s'",
+    snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" CALCULATED_NUMBER_FORMAT_ZERO "' '" CALCULATED_NUMBER_FORMAT_ZERO "' '%s' '%u' '%u' '%s' '%s' '%s' '%s' '%s' '%s' '%d' '%d' '%s' '%s' '%s' '%s' '%s'",
               exec,
               recipient,
               host->registry_hostname,
@@ -383,7 +383,8 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
               buffer_tostring(warn_alarms),
               buffer_tostring(crit_alarms),
               ae->classification?ae->classification:"Unknown",
-              edit_command
+              edit_command,
+              host != localhost ? host->machine_guid:""
     );
 
     ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
@@ -453,9 +454,11 @@ static inline void health_alarm_log_process(RRDHOST *host) {
     // remember this for the next iteration
     host->health_last_processed_id = first_waiting;
 
+    bool cleanup_excess_log_entries = host->health_log.count > host->health_log.max;
+
     netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
 
-    if(host->health_log.count <= host->health_log.max)
+    if (!cleanup_excess_log_entries)
         return;
 
     // cleanup excess entries in the log
@@ -514,11 +517,6 @@ static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run)
         return 0;
     }
 
-    if(unlikely(!rrdset_flag_check(rc->rrdset, RRDSET_FLAG_ENABLED))) {
-        debug(D_HEALTH, "Health not running alarm '%s.%s'. The chart is not enabled", rc->chart?rc->chart:"NOCHART", rc->name);
-        return 0;
-    }
-
     if(unlikely(rrdset_flag_check(rc->rrdset, RRDSET_FLAG_ARCHIVED))) {
         debug(D_HEALTH, "Health not running alarm '%s.%s'. The chart has been marked as archived", rc->chart?rc->chart:"NOCHART", rc->name);
         return 0;
@@ -576,6 +574,8 @@ static inline int check_if_resumed_from_suspension(void) {
 }
 
 static void health_main_cleanup(void *ptr) {
+    worker_unregister();
+
     struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
     static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
 
@@ -658,35 +658,34 @@ static int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) {
 // Create alarms for dimensions that have been added to charts
 // since the previous iteration.
 static void init_pending_foreach_alarms(RRDHOST *host) {
-    rrdhost_wrlock(host);
+    RRDSET *st;
+    RRDDIM *rd;
 
-    if (host->alarms_with_foreach || host->alarms_template_with_foreach) {
-        if (rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS)) {
-            RRDSET *st;
+    if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS))
+        return;
 
-            rrdset_foreach_read(st, host) {
-                rrdset_wrlock(st);
+    rrdhost_wrlock(host);
 
-                if (rrdset_flag_check(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS)) {
-                    RRDDIM *rd;
+    rrdset_foreach_write(st, host) {
+        if (!rrdset_flag_check(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS))
+            continue;
 
-                    rrddim_foreach_write(rd, st) {
-                        if (rrddim_flag_check(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM)) {
-                            rrdcalc_link_to_rrddim(rd, st, host);
-                            rrddim_flag_clear(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM);
-                        }
-                    }
+        rrdset_rdlock(st);
 
-                    rrdset_flag_clear(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS);
-                }
+        rrddim_foreach_read(rd, st) {
+            if (!rrddim_flag_check(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM))
+                continue;
 
-                rrdset_unlock(st);
-            }
+            rrdcalc_link_to_rrddim(rd, st, host);
 
-            rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS);
+            rrddim_flag_clear(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM);
         }
+
+        rrdset_flag_clear(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS);
+        rrdset_unlock(st);
     }
 
+    rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS);
     rrdhost_unlock(host);
 }
 
@@ -699,7 +698,31 @@ static void init_pending_foreach_alarms(RRDHOST *host) {
  *
  * @return It always returns NULL
  */
+
+#define WORKER_HEALTH_JOB_RRD_LOCK           0
+#define WORKER_HEALTH_JOB_HOST_LOCK          1
+#define WORKER_HEALTH_JOB_DB_QUERY           2
+#define WORKER_HEALTH_JOB_CALC_EVAL          3
+#define WORKER_HEALTH_JOB_WARNING_EVAL       4
+#define WORKER_HEALTH_JOB_CRITICAL_EVAL      5
+#define WORKER_HEALTH_JOB_ALARM_LOG_ENTRY    6
+#define WORKER_HEALTH_JOB_ALARM_LOG_PROCESS  7
+
+#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8
+#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8
+#endif
+
 void *health_main(void *ptr) {
+    worker_register("HEALTH");
+    worker_register_job_name(WORKER_HEALTH_JOB_RRD_LOCK, "rrd lock");
+    worker_register_job_name(WORKER_HEALTH_JOB_HOST_LOCK, "host lock");
+    worker_register_job_name(WORKER_HEALTH_JOB_DB_QUERY, "db lookup");
+    worker_register_job_name(WORKER_HEALTH_JOB_CALC_EVAL, "calc eval");
+    worker_register_job_name(WORKER_HEALTH_JOB_WARNING_EVAL, "warning eval");
+    worker_register_job_name(WORKER_HEALTH_JOB_CRITICAL_EVAL, "critical eval");
+    worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY, "alarm log entry");
+    worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS, "alarm log process");
+
     netdata_thread_cleanup_push(health_main_cleanup, ptr);
 
     int min_run_every = (int)config_get_number(CONFIG_SECTION_HEALTH, "run at least every seconds", 10);
@@ -747,6 +770,7 @@ void *health_main(void *ptr) {
             marked_aclk_reload_loop = loop;
 #endif
 
+        worker_is_busy(WORKER_HEALTH_JOB_RRD_LOCK);
         rrd_rdlock();
 
         RRDHOST *host;
@@ -776,6 +800,7 @@ void *health_main(void *ptr) {
 
             init_pending_foreach_alarms(host);
 
+            worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK);
             rrdhost_rdlock(host);
 
             // the first loop is to lookup values from the db
@@ -790,6 +815,7 @@ void *health_main(void *ptr) {
                              rrdset_flag_check(rc->rrdset, RRDSET_FLAG_OBSOLETE) &&
                              now > (rc->rrdset->last_collected_time.tv_sec + 60))) {
                     if (!rrdcalc_isrepeating(rc)) {
+                        worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
                         time_t now = now_realtime_sec();
                         ALARM_ENTRY *ae = health_create_alarm_entry(
                             host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
@@ -804,11 +830,10 @@ void *health_main(void *ptr) {
                             rc->value = NAN;
 #if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
                             if (netdata_cloud_setting && likely(!aclk_alert_reloaded))
-                                sql_queue_removed_alerts_to_aclk(host);
+                                sql_queue_alarm_to_aclk(host, ae, 1);
 #endif
                         }
                     }
-                    continue;
                 }
 
                 if (unlikely(!rrdcalc_isrunnable(rc, now, &next_run))) {
@@ -825,6 +850,8 @@ void *health_main(void *ptr) {
                 // if there is database lookup, do it
 
                 if (unlikely(RRDCALC_HAS_DB_LOOKUP(rc))) {
+                    worker_is_busy(WORKER_HEALTH_JOB_DB_QUERY);
+
                     /* time_t old_db_timestamp = rc->db_before; */
                     int value_is_null = 0;
 
@@ -881,6 +908,8 @@ void *health_main(void *ptr) {
                 // if there is calculation expression, run it
 
                 if (unlikely(rc->calculation)) {
+                    worker_is_busy(WORKER_HEALTH_JOB_CALC_EVAL);
+
                     if (unlikely(!expression_evaluate(rc->calculation))) {
                         // calculation failed
                         rc->value = NAN;
@@ -929,6 +958,8 @@ void *health_main(void *ptr) {
                     // check the warning expression
 
                     if (likely(rc->warning)) {
+                        worker_is_busy(WORKER_HEALTH_JOB_WARNING_EVAL);
+
                         if (unlikely(!expression_evaluate(rc->warning))) {
                             // calculation failed
                             rc->rrdcalc_flags |= RRDCALC_FLAG_WARN_ERROR;
@@ -953,6 +984,8 @@ void *health_main(void *ptr) {
                     // check the critical expression
 
                     if (likely(rc->critical)) {
+                        worker_is_busy(WORKER_HEALTH_JOB_CRITICAL_EVAL);
+
                         if (unlikely(!expression_evaluate(rc->critical))) {
                             // calculation failed
                             rc->rrdcalc_flags |= RRDCALC_FLAG_CRIT_ERROR;
@@ -1010,6 +1043,7 @@ void *health_main(void *ptr) {
                     // check if the new status and the old differ
 
                     if (status != rc->status) {
+                        worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
                         int delay = 0;
 
                         // apply trigger hysteresis
@@ -1041,19 +1075,19 @@ void *health_main(void *ptr) {
                         rc->delay_last = delay;
                         rc->delay_up_to_timestamp = now + delay;
 
-                        if(likely(!rrdcalc_isrepeating(rc))) {
-                            ALARM_ENTRY *ae = health_create_alarm_entry(
-                                    host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
-                                    rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
-                                    rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
-                                    rc->delay_last,
-                                    (
-                                            ((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
-                                            ((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
-                                    )
-                            );
-                            health_alarm_log(host, ae);
-                        }
+
+                        ALARM_ENTRY *ae = health_create_alarm_entry(
+                                host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
+                                rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
+                                rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
+                                rc->delay_last,
+                                (
+                                        ((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
+                                        ((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
+                                )
+                        );
+                        health_alarm_log(host, ae);
+
                         rc->last_status_change = now;
                         rc->old_status = rc->status;
                         rc->status = status;
@@ -1091,7 +1125,9 @@ void *health_main(void *ptr) {
                     }
 
                     if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
+                        worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
                         rc->last_repeat = now;
+                        if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
                         ALARM_ENTRY *ae = health_create_alarm_entry(
                                 host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
                                 rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
@@ -1122,6 +1158,7 @@ void *health_main(void *ptr) {
 
             // execute notifications
             // and cleanup
+            worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS);
             health_alarm_log_process(host);
 
             if (unlikely(netdata_exit)) {
@@ -1160,6 +1197,7 @@ void *health_main(void *ptr) {
 
         now = now_realtime_sec();
         if(now < next_run) {
+            worker_is_idle();
             debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs", loop, (int) (next_run - now));
             sleep_usec(USEC_PER_SEC * (usec_t) (next_run - now));
             now = now_realtime_sec();
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
index 6e6e3b400..ff5f3ac17 100644
--- a/health/health.d/ram.conf
+++ b/health/health.d/ram.conf
@@ -1,18 +1,6 @@
 
 # you can disable an alarm notification by setting the 'to' line to: silent
 
-    alarm: used_ram_to_ignore
-       on: system.ram
-    class: Utilization
-     type: System
-component: Memory
-       os: linux freebsd
-    hosts: *
-     calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz - $zfs.arc_size.min)
-    every: 10s
-     info: amount of memory reported as used, \
-           but it is actually capable for resizing itself based on the system needs (eg. ZFS ARC)
-
     alarm: ram_in_use
        on: system.ram
     class: Utilization
@@ -20,7 +8,7 @@ component: Memory
 component: Memory
        os: linux
     hosts: *
-     calc: ($used - $used_ram_to_ignore) * 100 / ($used + $cached + $free + $buffers)
+     calc: $used * 100 / ($used + $cached + $free + $buffers)
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
@@ -66,7 +54,7 @@ host labels: _is_k8s_node = false
 component: Memory
        os: freebsd
     hosts: *
-     calc: ($active + $wired + $laundry + $buffers - $used_ram_to_ignore) * 100 / ($active + $wired + $laundry + $buffers - $used_ram_to_ignore + $cache + $free + $inactive)
+     calc: ($active + $wired + $laundry + $buffers) * 100 / ($active + $wired + $laundry + $buffers - $used_ram_to_ignore + $cache + $free + $inactive)
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
@@ -82,7 +70,7 @@ component: Memory
 component: Memory
        os: freebsd
     hosts: *
-     calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers)
+     calc: ($free + $inactive + $cache) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers)
     units: %
     every: 10s
      warn: $this < (($status >= $WARNING)  ? (15) : (10))
diff --git a/health/health_config.c b/health/health_config.c
index e1f5f0e31..df6d7b609 100644
--- a/health/health_config.c
+++ b/health/health_config.c
@@ -109,7 +109,7 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL
                         && !strcmp(t->name, rt->name)
                         && !strcmp(t->family_match?t->family_match:"*", rt->family_match?rt->family_match:"*")
             )) {
-                error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
+                info("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
                 return 0;
             }
         }
@@ -127,7 +127,7 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL
                         && !strcmp(t->name, rt->name)
                         && !strcmp(t->family_match?t->family_match:"*", rt->family_match?rt->family_match:"*")
             )) {
-                error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
+                info("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
                 return 0;
             }
         }
@@ -433,6 +433,9 @@ static inline int health_parse_db_lookup(
         else if(!strcasecmp(key, "unaligned")) {
             *options |= RRDR_OPTION_NOT_ALIGNED;
         }
+        else if(!strcasecmp(key, "anomaly-bit")) {
+            *options |= RRDR_OPTION_ANOMALY_BIT;
+        }
         else if(!strcasecmp(key, "match-ids") || !strcasecmp(key, "match_ids")) {
             *options |= RRDR_OPTION_MATCH_IDS;
         }
diff --git a/health/health_json.c b/health/health_json.c
index be95100bc..d5285c11e 100644
--- a/health/health_json.c
+++ b/health/health_json.c
@@ -165,6 +165,10 @@ static inline void health_rrdcalc_values2json_nolock(RRDHOST *host, BUFFER *wb,
     buffer_rrd_value(wb, rc->value);
     buffer_strcat(wb, ",\n");
 
+    buffer_strcat(wb, "\t\t\t\"last_updated\":");
+    buffer_sprintf(wb, "%lu", (unsigned long)rc->last_updated);
+    buffer_strcat(wb, ",\n");
+
     buffer_sprintf(wb,
                    "\t\t\t\"status\": \"%s\"\n"
                    , rrdcalc_status2string(rc->status));
@@ -227,6 +231,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
                     "\t\t\t\"crit_repeat_every\": \"%u\",\n"
                     "\t\t\t\"value_string\": \"%s\",\n"
                     "\t\t\t\"last_repeat\": \"%lu\",\n"
+                    "\t\t\t\"times_repeat\": %lu,\n"
                    , rc->chart, rc->name
                    , (unsigned long)rc->id
                    , hash_id
@@ -259,6 +264,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
                    , rc->crit_repeat_every
                    , value_string
                    , (unsigned long)rc->last_repeat
+                   , (unsigned long)rc->times_repeat
     );
 
     if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
@@ -338,6 +344,8 @@ void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCAL
             for(rc = host->alarms; rc ; rc = rc->next) {
                 if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
                     continue;
+                if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
+                    continue;
                 if(unlikely(rc->rrdset && rc->rrdset->hash_context == simple_hash(tok)
                             && !strcmp(rc->rrdset->context, tok)
                             && ((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status)))
@@ -349,7 +357,8 @@ void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCAL
         for(rc = host->alarms; rc ; rc = rc->next) {
             if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
                 continue;
-
+            if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
+                continue;
             if(unlikely((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status))
                 numberOfAlarms++;
         }
diff --git a/health/health_log.c b/health/health_log.c
index 6d63966c7..54f6dc9fc 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -162,7 +162,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
 
 #ifdef ENABLE_ACLK
     if (netdata_cloud_setting) {
-        sql_queue_alarm_to_aclk(host, ae);
+        sql_queue_alarm_to_aclk(host, ae, 0);
     }
 #endif
 }
@@ -560,10 +560,6 @@ inline void health_alarm_log(
 ) {
     debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
 
-    if(unlikely(alarm_entry_isrepeating(host, ae))) {
-        error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id);
-        return;
-    }
     // link it
     netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
     ae->next = host->health_log.alarms;
diff --git a/health/notifications/Makefile.am b/health/notifications/Makefile.am
index 46a6e472c..f026171a7 100644
--- a/health/notifications/Makefile.am
+++ b/health/notifications/Makefile.am
@@ -31,6 +31,7 @@ include awssns/Makefile.inc
 include discord/Makefile.inc
 include email/Makefile.inc
 include flock/Makefile.inc
+include gotify/Makefile.inc
 include hangouts/Makefile.inc
 include irc/Makefile.inc
 include kavenegar/Makefile.inc
diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in
index 287cabfef..38a69a0f3 100755
--- a/health/notifications/alarm-notify.sh.in
+++ b/health/notifications/alarm-notify.sh.in
@@ -38,6 +38,7 @@
 #  - Dynatrace Event by @illumine
 #  - Stackpulse Event by @thiagoftsm
 #  - Opsgenie by @thiaoftsm #9858
+#  - Gotify by @coffeegrind123
 
 # -----------------------------------------------------------------------------
 # testing notifications
@@ -243,6 +244,7 @@ else
   total_crit_alarms="${26}"  # List of alarms in critical state
   classification="${27}"     # The class field from .conf files
   edit_command_line="${28}"  # The command to edit the alarm, with the line number
+  child_machine_guid="${29}" # If populated, the notification is sent for a child
 fi
 
 # -----------------------------------------------------------------------------
@@ -400,6 +402,10 @@ SEND_DYNATRACE=
 # stackpulse configs
 STACKPULSE_WEBHOOK=
 
+# gotify configs
+GOTIFY_APP_URL=
+GOTIFY_APP_TOKEN=
+
 # opsgenie configs
 OPSGENIE_API_KEY=
 
@@ -589,6 +595,9 @@ filter_recipient_by_criticality() {
 # check matrix
 { [ -z "${MATRIX_HOMESERVER}" ] || [ -z "${MATRIX_ACCESSTOKEN}" ]; } && SEND_MATRIX="NO"
 
+# check gotify
+{ [ -z "${GOTIFY_APP_TOKEN}" ] || [ -z "${GOTIFY_APP_URL}" ]; } && SEND_GOTIFY="NO"
+
 # check stackpulse
 [ -z "${STACKPULSE_WEBHOOK}" ] && SEND_STACKPULSE="NO"
 
@@ -626,7 +635,8 @@ if [ "${SEND_PUSHOVER}" = "YES" ] ||
   [ "${SEND_MSTEAMS}" = "YES" ] ||
   [ "${SEND_DYNATRACE}" = "YES" ] ||
   [ "${SEND_STACKPULSE}" = "YES" ] ||
-  [ "${SEND_OPSGENIE}" = "YES" ]; then
+  [ "${SEND_OPSGENIE}" = "YES" ] ||
+  [ "${SEND_GOTIFY}" = "YES" ]; then
   # if we need curl, check for the curl command
   if [ -z "${curl}" ]; then
     curl="$(command -v curl 2>/dev/null)"
@@ -656,6 +666,7 @@ if [ "${SEND_PUSHOVER}" = "YES" ] ||
     SEND_DYNATRACE="NO"
     SEND_STACKPULSE="NO"
     SEND_OPSGENIE="NO"
+    SEND_GOTIFY="NO"
   fi
 fi
 
@@ -795,7 +806,8 @@ for method in "${SEND_EMAIL}" \
   "${SEND_MSTEAMS}" \
   "${SEND_DYNATRACE}" \
   "${SEND_STACKPULSE}" \
-  "${SEND_OPSGENIE}" ; do
+  "${SEND_OPSGENIE}" \
+  "${SEND_GOTIFY}" ; do
 
   if [ "${method}" == "YES" ]; then
     proceed=1
@@ -2277,6 +2289,45 @@ EOF
   return 0
 }
 
+# -----------------------------------------------------------------------------
+# Gotify sender
+
+send_gotify() {
+  local payload httpcode priority
+  [ "${SEND_GOTIFY}" != "YES" ] && return 1
+
+  if [ -z "${GOTIFY_APP_TOKEN}" ] ; then
+    info "Can't send Gotify notification, because GOTIFY_APP_TOKEN is not defined"
+    return 1
+  fi
+
+  # priority for Gotify Android app
+  case "${status}" in
+    CRITICAL) priority=10 ;; # sound + vibration
+    WARNING) priority=4 ;; # sound
+    *) priority=1 ;; # notification only
+  esac
+
+  payload=$(cat <<EOF
+  {
+    "title" : "${status}, ${name} = ${value_string}, on ${host}",
+    "message" : "${date}: ${chart} ${value_string}",
+    "priority" : ${priority}
+  }
+EOF
+)
+
+  httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "${GOTIFY_APP_URL}/message?token=${GOTIFY_APP_TOKEN}")
+  if [ "${httpcode}" = "200" ]; then
+    info "sent gotify notification for: ${host} ${chart}.${name} is ${status}"
+  else
+    error "failed to send gotify notification for: ${host} ${chart}.${name} is ${status}, with HTTP error code ${httpcode}."
+    return 1
+  fi
+
+  return 0
+}
+
 # -----------------------------------------------------------------------------
 # prepare the content of the notification
 
@@ -2311,7 +2362,11 @@ if [ ${GOTOCLOUD} -eq 0 ]; then
 else
     # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud
     # Re-allow alarm redirection, for alarms 2.0, new template
-  goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}"
+  if [ -z "${child_machine_guid}" ]; then
+      goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}"
+  else
+      goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&childId=${child_machine_guid}&${redirect_params}"
+  fi
 fi
 
 # the severity of the alarm
@@ -3466,6 +3521,11 @@ SENT_STACKPULSE=$?
 send_opsgenie
 SENT_OPSGENIE=$?
 
+# -----------------------------------------------------------------------------
+# send messages to Gotify
+send_gotify
+SENT_GOTIFY=$?
+
 # -----------------------------------------------------------------------------
 # let netdata know
 for state in "${SENT_EMAIL}" \
@@ -3495,7 +3555,8 @@ for state in "${SENT_EMAIL}" \
   "${SENT_MSTEAMS}" \
   "${SENT_DYNATRACE}" \
   "${SENT_STACKPULSE}" \
-  "${SENT_OPSGENIE}"; do
+  "${SENT_OPSGENIE}" \
+  "${SENT_GOTIFY}"; do
   if [ "${state}" -eq 0 ]; then
     # we sent something
     exit 0
diff --git a/health/notifications/gotify/Makefile.inc b/health/notifications/gotify/Makefile.inc
new file mode 100644
index 000000000..782559125
--- /dev/null
+++ b/health/notifications/gotify/Makefile.inc
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# THIS IS NOT A COMPLETE Makefile
+# IT IS INCLUDED BY ITS PARENT'S Makefile.am
+# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
+
+# install these files
+dist_noinst_DATA += \
+    gotify/README.md \
+    gotify/Makefile.inc \
+    $(NULL)
diff --git a/health/notifications/gotify/README.md b/health/notifications/gotify/README.md
new file mode 100644
index 000000000..c253c845c
--- /dev/null
+++ b/health/notifications/gotify/README.md
@@ -0,0 +1,62 @@
+<!--
+title: "Send notifications to Gotify"
+description: "Send alerts to your Gotify instance when an alert gets triggered in Netdata."
+sidebar_label: "Gotify"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/health/notifications/gotify/README.md
+-->
+
+# Send notifications to Gotify
+
+[Gotify](https://gotify.net/) is a self-hosted push notification service created for sending and receiving messages in real time.
+
+## Configuring Gotify
+
+### Prerequisites
+
+To use Gotify as your notification service, you need an application token. 
+You can generate a new token in the Gotify Web UI. 
+
+### Configuration
+
+To set up Gotify in Netdata: 
+
+1. Switch to your [config
+directory](/docs/configure/nodes.md) and edit the file `health_alarm_notify.conf` using the edit config script.
+ 
+   ```bash
+   ./edit-config health_alarm_notify.conf
+   ```
+
+2. Change the variable `GOTIFY_APP_TOKEN` to the application token you generated in the Gotify Web UI. Change
+`GOTIFY_APP_URL` to point to your Gotify instance.
+
+   ```conf
+   SEND_GOTIFY="YES"
+
+   # Application token
+   # Gotify instance url
+   GOTIFY_APP_TOKEN=XXXXXXXXXXXXXXX
+   GOTIFY_APP_URL=https://push.example.de/
+   ```
+
+   Changes to `health_alarm_notify.conf` do not require a Netdata restart. 
+   
+3. Test your Gotify notifications configuration by running the following commands, replacing `ROLE` with your preferred role:
+
+   ```sh
+   # become user netdata
+   sudo su -s /bin/bash netdata
+
+   # send a test alarm
+   /usr/libexec/netdata/plugins.d/alarm-notify.sh test ROLE
+   ```
+
+   🟢 If everything works, you'll see alarms in Gotify:
+
+   ![Example alarm notifications in Gotify](https://user-images.githubusercontent.com/103264516/162509205-1e88e5d9-96b6-4f7f-9426-182776158128.png)
+
+   🔴 If sending the test notifications fails, check `/var/log/netdata/error.log` to find the relevant error message:
+
+   ```log 
+   2020-09-03 23:07:00: alarm-notify.sh: ERROR: failed to send Gotify notification for: hades test.chart.test_alarm is CRITICAL, with HTTP error code 401.
+   ```
diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf
index 873c7c353..b69c6d538 100755
--- a/health/notifications/health_alarm_notify.conf
+++ b/health/notifications/health_alarm_notify.conf
@@ -278,6 +278,16 @@ STACKPULSE_WEBHOOK=""
 
 DEFAULT_RECIPIENT_STACKPULSE=""
 
+#------------------------------------------------------------------------------
+# gotify global notification options
+SEND_GOTIFY="YES"
+
+# App token and url
+GOTIFY_APP_TOKEN=""
+GOTIFY_APP_URL=""
+
+DEFAULT_RECIPIENT_GOTIFY=""
+
 #------------------------------------------------------------------------------
 # opsgenie global notification options
 SEND_OPSGENIE="YES"
@@ -971,6 +981,8 @@ role_recipients_matrix[sysadmin]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[sysadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
 
+role_recipients_gotify[sysadmin]="${DEFAULT_RECIPIENT_GOTIFY}"
+
 # -----------------------------------------------------------------------------
 # DNS related alarms
 
@@ -1028,6 +1040,8 @@ role_recipients_matrix[domainadmin]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[domainadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
 
+role_recipients_gotify[domainadmin]="${DEFAULT_RECIPIENT_GOTIFY}"
+
 # -----------------------------------------------------------------------------
 # database servers alarms
 # mysql, redis, memcached, postgres, etc
@@ -1086,6 +1100,8 @@ role_recipients_matrix[dba]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[dba]="${DEFAULT_RECIPIENT_STACKPULSE}"
 
+role_recipients_gotify[dba]="${DEFAULT_RECIPIENT_GOTIFY}"
+
 # -----------------------------------------------------------------------------
 # web servers alarms
 # apache, nginx, lighttpd, etc
@@ -1144,6 +1160,8 @@ role_recipients_matrix[webmaster]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[webmaster]="${DEFAULT_RECIPIENT_STACKPULSE}"
 
+role_recipients_gotify[webmaster]="${DEFAULT_RECIPIENT_GOTIFY}"
+
 # -----------------------------------------------------------------------------
 # proxy servers alarms
 # squid, etc
@@ -1202,6 +1220,8 @@ role_recipients_matrix[proxyadmin]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[proxyadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
 
+role_recipients_gotify[proxyadmin]="${DEFAULT_RECIPIENT_GOTIFY}"
+
 # -----------------------------------------------------------------------------
 # peripheral devices
 # UPS, photovoltaics, etc
@@ -1257,3 +1277,5 @@ role_recipients_opsgenie[sitemgr]="${DEFAULT_RECIPIENT_OPSGENIE}"
 role_recipients_matrix[sitemgr]="${DEFAULT_RECIPIENT_MATRIX}"
 
 role_recipients_stackpulse[sitemgr]="${DEFAULT_RECIPIENT_STACKPULSE}"
+
+role_recipients_gotify[sitemgr]="${DEFAULT_RECIPIENT_GOTIFY}"
-- 
cgit v1.2.3