Adding upstream version 1.47.0.upstream/1.47.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-08-26 08:15:20 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-08-26 08:15:20 +0000
commit: 87d772a7d708fec12f48cd8adc0dedff6e1025da (patch)
tree: 1fee344c64cc3f43074a01981e21126c8482a522 /src/health
parent: Adding upstream version 1.46.3. (diff)
download: netdata-87d772a7d708fec12f48cd8adc0dedff6e1025da.tar.xz
netdata-87d772a7d708fec12f48cd8adc0dedff6e1025da.zip
18 files changed, 159 insertions, 205 deletions
diff --git a/src/health/guides/httpcheck/httpcheck_web_service_bad_content.md b/src/health/guides/httpcheck/httpcheck_web_service_bad_content.md
index 433425e09..cbf42694d 100644
--- a/src/health/guides/httpcheck/httpcheck_web_service_bad_content.md
+++ b/src/health/guides/httpcheck/httpcheck_web_service_bad_content.md
@@ -27,4 +27,4 @@ sudo ./edit-config go.d/httpcheck.conf
 
 ### Useful resources
 
-1. [HTTP endpoint monitoring with Netdata](/src/go/collectors/go.d.plugin/modules/httpcheck/integrations/http_endpoints.md)
-\ No newline at end of file
+1. [HTTP endpoint monitoring with Netdata](/src/go/plugin/go.d/modules/httpcheck/integrations/http_endpoints.md)
+\ No newline at end of file
diff --git a/src/health/guides/httpcheck/httpcheck_web_service_bad_status.md b/src/health/guides/httpcheck/httpcheck_web_service_bad_status.md
index 60fabd751..8ac06a57e 100644
--- a/src/health/guides/httpcheck/httpcheck_web_service_bad_status.md
+++ b/src/health/guides/httpcheck/httpcheck_web_service_bad_status.md
@@ -18,4 +18,4 @@ root@netdata # curl -v <your_http_endpoint>:<port>/<path>
 
 ### Useful resources
 
-1. [HTTP endpoint monitoring with Netdata](/src/go/collectors/go.d.plugin/modules/httpcheck/integrations/http_endpoints.md)
+1. [HTTP endpoint monitoring with Netdata](/src/go/plugin/go.d/modules/httpcheck/integrations/http_endpoints.md)
diff --git a/src/health/guides/httpcheck/httpcheck_web_service_slow.md b/src/health/guides/httpcheck/httpcheck_web_service_slow.md
index 4f962e155..8f46a0f14 100644
--- a/src/health/guides/httpcheck/httpcheck_web_service_slow.md
+++ b/src/health/guides/httpcheck/httpcheck_web_service_slow.md
@@ -14,5 +14,5 @@ To troubleshoot this issue, check for:
 
 ### Useful resources
 
-1. [HTTP endpoint monitoring with Netdata](/src/go/collectors/go.d.plugin/modules/httpcheck/integrations/http_endpoints.md)
+1. [HTTP endpoint monitoring with Netdata](/src/go/plugin/go.d/modules/httpcheck/integrations/http_endpoints.md)
 
diff --git a/src/health/guides/httpcheck/httpcheck_web_service_unreachable.md b/src/health/guides/httpcheck/httpcheck_web_service_unreachable.md
index c77d33c0b..306ce1fee 100644
--- a/src/health/guides/httpcheck/httpcheck_web_service_unreachable.md
+++ b/src/health/guides/httpcheck/httpcheck_web_service_unreachable.md
@@ -30,4 +30,4 @@ To troubleshoot this error, check the following:
 
 ### Useful resources
 
-1. [HTTP endpoint monitoring with Netdata](/src/go/collectors/go.d.plugin/modules/httpcheck/integrations/http_endpoints.md)
-\ No newline at end of file
+1. [HTTP endpoint monitoring with Netdata](/src/go/plugin/go.d/modules/httpcheck/integrations/http_endpoints.md)
+\ No newline at end of file
diff --git a/src/health/health.d/beanstalkd.conf b/src/health/health.d/beanstalkd.conf
index 0d37f28e0..51b280491 100644
--- a/src/health/health.d/beanstalkd.conf
+++ b/src/health/health.d/beanstalkd.conf
@@ -11,31 +11,5 @@ component: Beanstalk
      warn: $this > 3
     delay: up 0 down 5m multiplier 1.2 max 1h
   summary: Beanstalk buried jobs
-     info: Number of buried jobs across all tubes. \
-           You need to manually kick them so they can be processed. \
-           Presence of buried jobs in a tube does not affect new jobs.
-       to: sysadmin
-      
-# get the number of buried jobs per queue
-
-#template: beanstalk_tube_buried_jobs
-#      on: beanstalk.jobs
-#    calc: $buried
-#   units: jobs
-#   every: 10s
-#    warn: $this > 0
-#    crit: $this > 10
-#   delay: up 0 down 5m multiplier 1.2 max 1h
-#    info: the number of jobs buried per tube
-#      to: sysadmin
-
-# get the current number of tubes
-
-#template: beanstalk_number_of_tubes
-#      on: beanstalk.current_tubes
-#    calc: $tubes
-#   every: 10s
-#    warn: $this < 5
-#   delay: up 0 down 5m multiplier 1.2 max 1h
-#    info: the current number of tubes on the server
-#      to: sysadmin
+     info: Number of buried jobs across all tubes.
+       to: silent
diff --git a/src/health/health.d/docker.conf b/src/health/health.d/docker.conf
index 668614d4d..edb63a08c 100644
--- a/src/health/health.d/docker.conf
+++ b/src/health/health.d/docker.conf
@@ -1,4 +1,6 @@
- template: docker_container_unhealthy
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+template: docker_container_unhealthy
        on: docker.container_health_status
     class: Errors
      type: Containers
@@ -10,3 +12,22 @@ component: Docker
   summary: Docker container ${label:container_name} health
      info: ${label:container_name} docker container health status is unhealthy
        to: sysadmin
+
+# This alert monitors the status of Docker containers and triggers if any container is exited (down).
+# To enable this alert for specific containers, you need to modify the "chart labels" filter.
+# This filter uses Netdata's simple pattern matching syntax.
+
+    template: docker_container_down
+          on: docker.container_state
+       class: Errors
+        type: Containers
+   component: Docker
+chart labels: container_name=!*
+       units: status
+       every: 10s
+      lookup: average -10s of exited
+        warn: $this > 0
+       delay: down 1m multiplier 1.5 max 2h
+     summary: Docker container ${label:container_name} down
+        info: Docker container ${label:container_name} is currently not running
+          to: sysadmin
diff --git a/src/health/health.d/gearman.conf b/src/health/health.d/gearman.conf
index 78e1165d1..2b19105b5 100644
--- a/src/health/health.d/gearman.conf
+++ b/src/health/health.d/gearman.conf
@@ -1,14 +1,15 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
 
- template: gearman_workers_queued
-       on: gearman.single_job
-    class: Latency
-     type: Computing
-component: Gearman
-   lookup: average -10m unaligned match-names of Pending
-    units: workers
-    every: 10s
-     warn: $this > 30000
-    delay: down 5m multiplier 1.5 max 1h
-  summary: Gearman queued jobs
-     info: Average number of queued jobs over the last 10 minutes
-       to: sysadmin
+# template: gearman_function_waiting_jobs
+#       on: gearman.function_queued_jobs_activity
+#    class: Latency
+#     type: Computing
+#component: Gearman
+#   lookup: average -10m unaligned of waiting
+#    units: jobs
+#    every: 10s
+#     warn: $this > 30000
+#    delay: down 5m multiplier 1.5 max 1h
+#  summary: Waiting jobs for ${label:task_name} function
+#     info: Average number of waiting jobs for ${label:function_name} function over the last 10 minutes
+#       to: sysadmin
diff --git a/src/health/health.d/ipfs.conf b/src/health/health.d/ipfs.conf
index 4dfee3c7f..bc3b0b1ea 100644
--- a/src/health/health.d/ipfs.conf
+++ b/src/health/health.d/ipfs.conf
@@ -1,10 +1,10 @@
 
  template: ipfs_datastore_usage
-       on: ipfs.repo_size
+       on: ipfs.datastore_space_utilization
     class: Utilization
      type: Data Sharing
 component: IPFS
-     calc: $size * 100 / $avail
+     calc: $used
     units: %
     every: 10s
      warn: $this > (($status >= $WARNING)  ? (80) : (90))
diff --git a/src/health/health.d/x509check.conf b/src/health/health.d/x509check.conf
index 1d40c8602..38187326f 100644
--- a/src/health/health.d/x509check.conf
+++ b/src/health/health.d/x509check.conf
@@ -12,15 +12,16 @@ component: x509 certificates
   summary: x509 certificate expiration for ${label:source}
      info: Time until x509 certificate expires for ${label:source}
        to: webmaster
-      
+
  template: x509check_revocation_status
        on: x509check.revocation_status
     class: Errors
      type: Certificates
 component: x509 certificates
      calc: $revoked
+    units: status
     every: 60s
-     crit: $this != nan AND $this != 0
+     crit: $this == 1
   summary: x509 certificate revocation status for ${label:source}
-     info: x509 certificate revocation status (0: revoked, 1: valid) for ${label:source}
+     info: x509 certificate revocation status for ${label:source}
        to: webmaster
diff --git a/src/health/health.d/zfs.conf b/src/health/health.d/zfs.conf
index 9c1f0018b..5c8065aa3 100644
--- a/src/health/health.d/zfs.conf
+++ b/src/health/health.d/zfs.conf
@@ -67,7 +67,7 @@ component: File system
      type: System
 component: File system
      calc: $degraded
-    units: boolean
+    units: status
     every: 10s
      warn: $this > 0
     delay: down 1m multiplier 1.5 max 1h
@@ -81,10 +81,25 @@ component: File system
      type: System
 component: File system
      calc: $faulted + $unavail
-    units: boolean
+    units: status
     every: 10s
      crit: $this > 0
     delay: down 1m multiplier 1.5 max 1h
   summary: Critical ZFS pool ${label:pool} state
      info: ZFS pool ${label:pool} state is faulted or unavail
        to: sysadmin
+
+
+ template: zfs_vdev_health_state
+       on: zfspool.vdev_health_state
+    class: Errors
+     type: System
+component: File system
+     calc: $degraded + $faulted
+    units: status
+    every: 10s
+     warn: $this > 0
+    delay: down 1m multiplier 1.5 max 1h
+  summary: ZFS vdev ${label:vdev} pool ${label:pool} state
+     info: ZFS vdev ${label:vdev} state is faulted or degraded
+       to: sysadmin
diff --git a/src/health/health_event_loop.c b/src/health/health_event_loop.c
index 756ffa165..b50812f2a 100644
--- a/src/health/health_event_loop.c
+++ b/src/health/health_event_loop.c
@@ -101,26 +101,10 @@ static void health_sleep(time_t next_run, unsigned int loop __maybe_unused) {
     }
 }
 
-static void sql_health_postpone_queue_removed(RRDHOST *host __maybe_unused) {
-#ifdef ENABLE_ACLK
-    if (netdata_cloud_enabled) {
-        struct aclk_sync_cfg_t *wc = host->aclk_config;
-        if (unlikely(!wc)) {
-            return;
-        }
-
-        if (wc->alert_queue_removed >= 1) {
-            wc->alert_queue_removed+=6;
-        }
-    }
-#endif
-}
-
 static void health_execute_delayed_initializations(RRDHOST *host) {
     health_plugin_init();
 
     RRDSET *st;
-    bool must_postpone = false;
 
     if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION)) return;
     rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION);
@@ -131,11 +115,8 @@ static void health_execute_delayed_initializations(RRDHOST *host) {
 
         worker_is_busy(WORKER_HEALTH_JOB_DELAYED_INIT_RRDSET);
         health_prototype_alerts_for_rrdset_incrementally(st);
-        must_postpone = true;
     }
     rrdset_foreach_done(st);
-    if (must_postpone)
-        sql_health_postpone_queue_removed(host);
 }
 
 static void health_initialize_rrdhost(RRDHOST *host) {
@@ -179,6 +160,50 @@ static inline int check_if_resumed_from_suspension(void) {
     return ret;
 }
 
+static void do_eval_expression(
+    RRDCALC *rc,
+    EVAL_EXPRESSION *expression,
+    const char *expression_type __maybe_unused,
+    size_t job_type,
+    RRDCALC_FLAGS error_type,
+    RRDCALC_STATUS *calc_status,
+    NETDATA_DOUBLE *result)
+{
+    if (!expression || (!calc_status && !result))
+        return;
+
+    worker_is_busy(job_type);
+
+    if (unlikely(!expression_evaluate(expression))) {
+        // calculation failed
+        rc->run_flags |= error_type;
+        if (result)
+            *result = NAN;
+
+        netdata_log_debug(D_HEALTH,
+                          "Health on host '%s', alarm '%s.%s': %s expression failed with error: %s",
+                          rrdhost_hostname(rc->rrdset->rrdhost), rrdcalc_chart_name(rc), rrdcalc_name(rc), expression_type,
+                          expression_error_msg(expression)
+        );
+        return;
+    }
+    rc->run_flags &= ~error_type;
+    netdata_log_debug(D_HEALTH,
+                      "Health on host '%s', alarm '%s.%s': %s expression gave value "
+                      NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
+                      rrdhost_hostname(rc->rrdset->rrdhost),
+                      rrdcalc_chart_name(rc),
+                      rrdcalc_name(rc),
+                      expression_type,
+                      expression_result(expression),
+                      expression_error_msg(expression),
+                      rrdcalc_source(rc));
+    if (calc_status)
+        *calc_status = rrdcalc_value2status(expression_result(expression));
+    else
+        *result = expression_result(expression);
+}
+
 static void health_event_loop(void) {
     bool health_running_logged = false;
 
@@ -270,6 +295,13 @@ static void health_event_loop(void) {
             }
 
             worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK);
+#ifdef ENABLE_ACLK
+            if (netdata_cloud_enabled) {
+                struct aclk_sync_cfg_t *wc = host->aclk_config;
+                if (wc && wc->send_snapshot == 2)
+                    continue;
+            }
+#endif
 
             // the first loop is to lookup values from the db
             foreach_rrdcalc_in_rrdhost_read(host, rc) {
@@ -314,11 +346,6 @@ static void health_event_loop(void) {
                             rc->last_status_change_value = rc->value;
                             rc->last_updated = now_tmp;
                             rc->value = NAN;
-
-#ifdef ENABLE_ACLK
-                            if (netdata_cloud_enabled)
-                                sql_queue_alarm_to_aclk(host, ae, true);
-#endif
                         }
                     }
                 }
@@ -404,36 +431,7 @@ static void health_event_loop(void) {
                 // ------------------------------------------------------------
                 // if there is calculation expression, run it
 
-                if (unlikely(rc->config.calculation)) {
-                    worker_is_busy(WORKER_HEALTH_JOB_CALC_EVAL);
-
-                    if (unlikely(!expression_evaluate(rc->config.calculation))) {
-                        // calculation failed
-                        rc->value = NAN;
-                        rc->run_flags |= RRDCALC_FLAG_CALC_ERROR;
-
-                        netdata_log_debug(
-                            D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' failed: %s",
-                            rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
-                            expression_parsed_as(rc->config.calculation), expression_error_msg(rc->config.calculation)
-                        );
-                    }
-                    else {
-                        rc->run_flags &= ~RRDCALC_FLAG_CALC_ERROR;
-
-                        netdata_log_debug(
-                            D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' gave value "
-                            NETDATA_DOUBLE_FORMAT": %s (source: %s)",
-                            rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
-                            expression_parsed_as(rc->config.calculation),
-                            expression_result(rc->config.calculation),
-                            expression_error_msg(rc->config.calculation),
-                            rrdcalc_source(rc)
-                        );
-
-                        rc->value = expression_result(rc->config.calculation);
-                    }
-                }
+                do_eval_expression(rc, rc->config.calculation, "calculation", WORKER_HEALTH_JOB_CALC_EVAL, RRDCALC_FLAG_CALC_ERROR, NULL, &rc->value);
             }
             foreach_rrdcalc_in_rrdhost_done(rc);
 
@@ -453,65 +451,8 @@ static void health_event_loop(void) {
                     RRDCALC_STATUS warning_status = RRDCALC_STATUS_UNDEFINED;
                     RRDCALC_STATUS critical_status = RRDCALC_STATUS_UNDEFINED;
 
-                    // --------------------------------------------------------
-                    // check the warning expression
-
-                    if (likely(rc->config.warning)) {
-                        worker_is_busy(WORKER_HEALTH_JOB_WARNING_EVAL);
-
-                        if (unlikely(!expression_evaluate(rc->config.warning))) {
-                            // calculation failed
-                            rc->run_flags |= RRDCALC_FLAG_WARN_ERROR;
-
-                            netdata_log_debug(D_HEALTH,
-                                              "Health on host '%s', alarm '%s.%s': warning expression failed with error: %s",
-                                              rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
-                                              expression_error_msg(rc->config.warning)
-                            );
-                        } else {
-                            rc->run_flags &= ~RRDCALC_FLAG_WARN_ERROR;
-                            netdata_log_debug(D_HEALTH,
-                                              "Health on host '%s', alarm '%s.%s': warning expression gave value "
-                                              NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
-                                              rrdhost_hostname(host),
-                                              rrdcalc_chart_name(rc),
-                                              rrdcalc_name(rc),
-                                              expression_result(rc->config.warning),
-                                              expression_error_msg(rc->config.warning),
-                                              rrdcalc_source(rc)
-                            );
-                            warning_status = rrdcalc_value2status(expression_result(rc->config.warning));
-                        }
-                    }
-
-                    // --------------------------------------------------------
-                    // check the critical expression
-
-                    if (likely(rc->config.critical)) {
-                        worker_is_busy(WORKER_HEALTH_JOB_CRITICAL_EVAL);
-
-                        if (unlikely(!expression_evaluate(rc->config.critical))) {
-                            // calculation failed
-                            rc->run_flags |= RRDCALC_FLAG_CRIT_ERROR;
-
-                            netdata_log_debug(D_HEALTH,
-                                              "Health on host '%s', alarm '%s.%s': critical expression failed with error: %s",
-                                              rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
-                                              expression_error_msg(rc->config.critical)
-                            );
-                        } else {
-                            rc->run_flags &= ~RRDCALC_FLAG_CRIT_ERROR;
-                            netdata_log_debug(D_HEALTH,
-                                              "Health on host '%s', alarm '%s.%s': critical expression gave value "
-                                              NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
-                                              rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
-                                              expression_result(rc->config.critical),
-                                              expression_error_msg(rc->config.critical),
-                                              rrdcalc_source(rc)
-                            );
-                            critical_status = rrdcalc_value2status(expression_result(rc->config.critical));
-                        }
-                    }
+                    do_eval_expression(rc, rc->config.warning, "warning", WORKER_HEALTH_JOB_WARNING_EVAL, RRDCALC_FLAG_WARN_ERROR, &warning_status, NULL);
+                    do_eval_expression(rc, rc->config.critical, "critical", WORKER_HEALTH_JOB_CRITICAL_EVAL, RRDCALC_FLAG_CRIT_ERROR, &critical_status, NULL);
 
                     // --------------------------------------------------------
                     // decide the final alarm status
@@ -706,26 +647,18 @@ static void health_event_loop(void) {
                 wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up();
                 break;
             }
+        }
 #ifdef ENABLE_ACLK
-            if (netdata_cloud_enabled) {
-                struct aclk_sync_cfg_t *wc = host->aclk_config;
-                if (unlikely(!wc))
-                    continue;
-
-                if (wc->alert_queue_removed == 1) {
-                    sql_queue_removed_alerts_to_aclk(host);
-                } else if (wc->alert_queue_removed > 1) {
-                    wc->alert_queue_removed--;
-                }
-
-                if (wc->alert_checkpoint_req == 1) {
-                    aclk_push_alarm_checkpoint(host);
-                } else if (wc->alert_checkpoint_req > 1) {
-                    wc->alert_checkpoint_req--;
-                }
-            }
-#endif
+        struct aclk_sync_cfg_t *wc = host->aclk_config;
+        if (wc && wc->send_snapshot == 1) {
+            wc->send_snapshot = 2;
+            rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
         }
+        else
+            if (process_alert_pending_queue(host))
+                rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
+#endif
+
         dfe_done(host);
 
         // wait for all notifications to finish before allowing health to be cleaned up
diff --git a/src/health/health_log.c b/src/health/health_log.c
index b04f8f248..143b741bf 100644
--- a/src/health/health_log.c
+++ b/src/health/health_log.c
@@ -4,7 +4,8 @@
 
 // ----------------------------------------------------------------------------
 
-inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
+inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae)
+{
     sql_health_alarm_log_save(host, ae);
 }
 
@@ -43,7 +44,7 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int
     };
     ND_LOG_STACK_PUSH(lgs);
 
-    errno = 0;
+    errno_clear();
 
     ND_LOG_FIELD_PRIORITY priority = NDLP_INFO;
 
diff --git a/src/health/health_notifications.c b/src/health/health_notifications.c
index 79426f48c..85dd2d0d8 100644
--- a/src/health/health_notifications.c
+++ b/src/health/health_notifications.c
@@ -23,7 +23,13 @@ void health_alarm_wait_for_execution(ALARM_ENTRY *ae) {
     if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS))
         return;
 
-    spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp);
+    if(!ae->popen_instance) {
+        // nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification");
+        return;
+    }
+
+    ae->exec_code = spawn_popen_wait(ae->popen_instance);
+
     netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code);
     ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
 
@@ -75,7 +81,6 @@ static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae)
         alarm_notifications_in_progress.head = ae;
     }
     alarm_notifications_in_progress.tail = ae;
-
 }
 
 static bool prepare_command(BUFFER *wb,
@@ -462,7 +467,7 @@ void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_rais
 
         netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run);
         ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
-        ae->exec_spawn_serial = spawn_enq_cmd(command_to_run);
+        ae->popen_instance = spawn_popen_run(command_to_run);
         enqueue_alarm_notify_in_progress(ae);
         health_alarm_log_save(host, ae);
     } else {
diff --git a/src/health/health_prototypes.c b/src/health/health_prototypes.c
index c43096115..a8681a453 100644
--- a/src/health/health_prototypes.c
+++ b/src/health/health_prototypes.c
@@ -687,15 +687,6 @@ void health_apply_prototypes_to_host(RRDHOST *host) {
         health_prototype_reset_alerts_for_rrdset(st);
     }
     rrdset_foreach_done(st);
-
-#ifdef ENABLE_ACLK
-    if (netdata_cloud_enabled) {
-        struct aclk_sync_cfg_t *wc = host->aclk_config;
-        if (likely(wc)) {
-            wc->alert_queue_removed = SEND_REMOVED_AFTER_HEALTH_LOOPS;
-        }
-    }
-#endif
 }
 
 void health_apply_prototypes_to_all_hosts(void) {
diff --git a/src/health/notifications/alarm-notify.sh.in b/src/health/notifications/alarm-notify.sh.in
index 9a5780de1..c7c44cb11 100755
--- a/src/health/notifications/alarm-notify.sh.in
+++ b/src/health/notifications/alarm-notify.sh.in
@@ -641,8 +641,12 @@ filter_recipient_by_criticality() {
       ;;
 
     CLEAR)
-      # remove tracking file
-      [ -f "${tracking_file}" ] && rm "${tracking_file}"
+      if [ -f "${tracking_file}" ]; then
+        tracking_file_existed="yes"
+        rm "${tracking_file}"
+      else
+        tracking_file_existed=""
+      fi
 
       # "noclear" modifier set, block notification
       if [ "${mod_noclear}" == "1" ]; then
@@ -657,7 +661,7 @@ filter_recipient_by_criticality() {
       fi
 
       # "critical" modifier set, send notification if tracking file exists
-      if [ "${mod_critical}" == "1" ] && [ -f "${tracking_file}" ]; then
+      if [ "${mod_critical}" == "1" ] && [ -n "${tracking_file_existed}" ]; then
         debug "SEVERITY FILTERING for ${recipient_arg} VIA ${method}: ALLOW: recipient has been notified for this alarm in the past (no status change will be sent from now)"
         return 0
       fi
@@ -1515,13 +1519,20 @@ send_telegram() {
       notify_telegram=1
       notify_retries=${TELEGRAM_RETRIES_ON_LIMIT:-0}
 
+      IFS=":" read -r chatID threadID <<< "${chatid}"
+
+      # https://core.telegram.org/bots/api#sendmessage
+      api_url="https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=${chatID}"
+      if [ -n "${threadID}" ]; then
+        api_url+="&message_thread_id=${threadID}"
+      fi
+
       while [ ${notify_telegram} -eq 1 ]; do
-        # https://core.telegram.org/bots/api#sendmessage
         httpcode=$(docurl ${disableNotification} \
           --data-urlencode "parse_mode=HTML" \
           --data-urlencode "disable_web_page_preview=true" \
           --data-urlencode "text=${emoji} ${message}" \
-          "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=${chatid}")
+          "${api_url}")
 
         notify_telegram=0
 
diff --git a/src/health/notifications/health_alarm_notify.conf b/src/health/notifications/health_alarm_notify.conf
index f3b67c9de..9dcec27ae 100755
--- a/src/health/notifications/health_alarm_notify.conf
+++ b/src/health/notifications/health_alarm_notify.conf
@@ -413,6 +413,7 @@ DEFAULT_RECIPIENT_KAVENEGAR=""
 
 # multiple recipients can be given like this:
 #                  "CHAT_ID_1 CHAT_ID_2 ..."
+# To send alerts to a specific topic within a chat, use `CHAT_ID:TOPIC_ID`.
 
 # enable/disable sending telegram messages
 SEND_TELEGRAM="YES"
diff --git a/src/health/notifications/telegram/README.md b/src/health/notifications/telegram/README.md
index e263d0bb5..90cca4214 100644
--- a/src/health/notifications/telegram/README.md
+++ b/src/health/notifications/telegram/README.md
@@ -55,7 +55,7 @@ The following options can be defined for this notification
 |:----|:-----------|:-------|:--------:|
 | SEND_TELEGRAM | Set `SEND_TELEGRAM` to YES | YES | yes |
 | TELEGRAM_BOT_TOKEN | set `TELEGRAM_BOT_TOKEN` to your bot token. |  | yes |
-| DEFAULT_RECIPIENT_TELEGRAM | Set `DEFAULT_RECIPIENT_TELEGRAM` to the chat ID you want the alert notifications to be sent to. You can define multiple chat IDs like this: -49999333322 -1009999222255. |  | yes |
+| DEFAULT_RECIPIENT_TELEGRAM | Set the `DEFAULT_RECIPIENT_TELEGRAM` variable in your config file to your Telegram chat ID (find it with @myidbot). Separate multiple chat IDs with spaces. To send alerts to a specific topic within a chat, use `chatID:topicID`. |  | yes |
 
 ##### DEFAULT_RECIPIENT_TELEGRAM
 
diff --git a/src/health/notifications/telegram/metadata.yaml b/src/health/notifications/telegram/metadata.yaml
index cc6d8c91e..daa45da72 100644
--- a/src/health/notifications/telegram/metadata.yaml
+++ b/src/health/notifications/telegram/metadata.yaml
@@ -40,7 +40,7 @@
             required: true
           - name: 'DEFAULT_RECIPIENT_TELEGRAM'
             default_value: ''
-            description: "Set `DEFAULT_RECIPIENT_TELEGRAM` to the chat ID you want the alert notifications to be sent to. You can define multiple chat IDs like this: -49999333322 -1009999222255."
+            description: "Set the `DEFAULT_RECIPIENT_TELEGRAM` variable in your config file to your Telegram chat ID (find it with @myidbot). Separate multiple chat IDs with spaces. To send alerts to a specific topic within a chat, use `chatID:topicID`."
             required: true
             detailed_description: |
               All roles will default to this variable if left unconfigured.
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-08-26 08:15:20 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-08-26 08:15:20 +0000
commit	87d772a7d708fec12f48cd8adc0dedff6e1025da (patch)
tree	1fee344c64cc3f43074a01981e21126c8482a522 /src/health
parent	Adding upstream version 1.46.3. (diff)
download	netdata-87d772a7d708fec12f48cd8adc0dedff6e1025da.tar.xz netdata-87d772a7d708fec12f48cd8adc0dedff6e1025da.zip