diff options
Diffstat (limited to 'src/health')
71 files changed, 579 insertions, 386 deletions
diff --git a/src/health/README.md b/src/health/README.md index 5c479af5f..081a8b8f8 100644 --- a/src/health/README.md +++ b/src/health/README.md @@ -2,9 +2,11 @@ Netdata offers two ways to receive alert notifications on external integrations. These methods work independently, which means you can enable both at the same time to send alert notifications to any number of endpoints. -Both methods use a node's health alerts to generate the content of a notification. +Both methods use a node's health alerts to generate the content of a notification. -Read our documentation on [configuring alerts](/src/health/REFERENCE.md) to change the preconfigured thresholds or to create tailored alerts for your infrastructure. +Read our documentation on [configuring alerts](/src/health/REFERENCE.md) to change the pre-configured thresholds or to create tailored alerts for your infrastructure. + +<!-- virtual links below, should not lead anywhere outside of the rendered Learn doc --> - Netdata Cloud provides centralized alert notifications, utilizing the health status data already sent to Netdata Cloud from connected nodes to send alerts to configured integrations. [Supported integrations](/docs/alerts-&-notifications/notifications/centralized-cloud-notifications) include Amazon SNS, Discord, Slack, Splunk, and others. diff --git a/src/health/REFERENCE.md b/src/health/REFERENCE.md index 8b0a9177e..b46012d04 100644 --- a/src/health/REFERENCE.md +++ b/src/health/REFERENCE.md @@ -640,7 +640,7 @@ See our [simple patterns docs](/src/libnetdata/simple_pattern/README.md) for mor Similar to host labels, the `chart labels` key can be used to filter if an alert will load or not for a specific chart, based on whether these chart labels match or not. -The list of chart labels present on each chart can be obtained from http://localhost:19999/api/v1/charts?all +The list of chart labels present on each chart can be obtained from <http://localhost:19999/api/v1/charts?all> For example, each `disk_space` chart defines a chart label called `mount_point` with each instance of this chart having a value there of which mount point it monitors. @@ -808,14 +808,14 @@ You can find all the variables that can be used for a given chart, using Agent dashboard. For example, [variables for the `system.cpu` chart of the registry](https://registry.my-netdata.io/api/v1/alarm_variables?chart=system.cpu). -> If you don't know how to find the CHART_NAME, you can read about it [here](/src/web/README.md#charts). +<!-- > If you don't know how to find the CHART_NAME, you can read about it [here](/src/web/README.md#charts). --> Netdata supports 3 internal indexes for variables that will be used in health monitoring. <details><summary>The variables below can be used in both chart alerts and context templates.</summary> Although the `alarm_variables` link shows you variables for a particular chart, the same variables can also be used in -templates for charts belonging to a given [context](/src/web/README.md#contexts). The reason is that all charts of a given +templates for charts belonging to a given context. The reason is that all charts of a given context are essentially identical, with the only difference being the family that identifies a particular hardware or software instance. </details> @@ -1064,7 +1064,7 @@ template: ml_5min_cpu_chart info: rolling 5min anomaly rate for system.cpu chart ``` -The `lookup` line will calculate the average anomaly rate across all `system.cpu` dimensions over the last 5 minues. In this case +The `lookup` line will calculate the average anomaly rate across all `system.cpu` dimensions over the last 5 minutes. In this case Netdata will create one alert for the chart. ### Example 7 - [Anomaly rate](/src/ml/README.md#anomaly-rate) based node level alert @@ -1083,7 +1083,7 @@ template: ml_5min_node info: rolling 5min anomaly rate for all ML enabled dims ``` -The `lookup` line will use the `anomaly_rate` dimension of the `anomaly_detection.anomaly_rate` ML chart to calculate the average [node level anomaly rate](/src/ml/README.md#node-anomaly-rate) over the last 5 minutes. +The `lookup` line will use the `anomaly_rate` dimension of the `anomaly_detection.anomaly_rate` ML chart to calculate the average [node level anomaly rate](/src/ml/README.md#anomaly-rate) over the last 5 minutes. ## Troubleshooting diff --git a/src/health/health.c b/src/health/health.c index 7039a193c..78559d7f4 100644 --- a/src/health/health.c +++ b/src/health/health.c @@ -14,7 +14,7 @@ struct health_plugin_globals health_globals = { .use_summary_for_notifications = true, .health_log_entries_max = HEALTH_LOG_ENTRIES_DEFAULT, - .health_log_history = HEALTH_LOG_HISTORY_DEFAULT, + .health_log_retention_s = HEALTH_LOG_RETENTION_DEFAULT, .default_warn_repeat_every = 0, .default_crit_repeat_every = 0, @@ -55,17 +55,17 @@ static void health_load_config_defaults(void) { health_globals.config.use_summary_for_notifications); health_globals.config.default_warn_repeat_every = - config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never"); + config_get_duration_seconds(CONFIG_SECTION_HEALTH, "default repeat warning", 0); health_globals.config.default_crit_repeat_every = - config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never"); + config_get_duration_seconds(CONFIG_SECTION_HEALTH, "default repeat critical", 0); health_globals.config.health_log_entries_max = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", health_globals.config.health_log_entries_max); - health_globals.config.health_log_history = - config_get_number(CONFIG_SECTION_HEALTH, "health log history", HEALTH_LOG_DEFAULT_HISTORY); + health_globals.config.health_log_retention_s = + config_get_duration_seconds(CONFIG_SECTION_HEALTH, "health log retention", HEALTH_LOG_RETENTION_DEFAULT); snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir); health_globals.config.default_exec = @@ -76,14 +76,13 @@ static void health_load_config_defaults(void) { NULL, SIMPLE_PATTERN_EXACT, true); health_globals.config.run_at_least_every_seconds = - (int)config_get_number(CONFIG_SECTION_HEALTH, - "run at least every seconds", - health_globals.config.run_at_least_every_seconds); + (int)config_get_duration_seconds(CONFIG_SECTION_HEALTH, "run at least every", + health_globals.config.run_at_least_every_seconds); health_globals.config.postpone_alarms_during_hibernation_for_seconds = - config_get_number(CONFIG_SECTION_HEALTH, - "postpone alarms during hibernation for seconds", - health_globals.config.postpone_alarms_during_hibernation_for_seconds); + config_get_duration_seconds(CONFIG_SECTION_HEALTH, + "postpone alarms during hibernation for", + health_globals.config.postpone_alarms_during_hibernation_for_seconds); health_globals.config.default_recipient = string_strdupz("root"); @@ -115,27 +114,27 @@ static void health_load_config_defaults(void) { (long)health_globals.config.health_log_entries_max); } - if (health_globals.config.health_log_history < HEALTH_LOG_MINIMUM_HISTORY) { + if (health_globals.config.health_log_retention_s < HEALTH_LOG_MINIMUM_HISTORY) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "Health configuration has invalid health log history %u. Using minimum %d", - health_globals.config.health_log_history, HEALTH_LOG_MINIMUM_HISTORY); + "Health configuration has invalid health log retention %u. Using minimum %d", + health_globals.config.health_log_retention_s, HEALTH_LOG_MINIMUM_HISTORY); - health_globals.config.health_log_history = HEALTH_LOG_MINIMUM_HISTORY; - config_set_number(CONFIG_SECTION_HEALTH, "health log history", health_globals.config.health_log_history); + health_globals.config.health_log_retention_s = HEALTH_LOG_MINIMUM_HISTORY; + config_set_duration_seconds(CONFIG_SECTION_HEALTH, "health log retention", health_globals.config.health_log_retention_s); } nd_log(NDLS_DAEMON, NDLP_DEBUG, "Health log history is set to %u seconds (%u days)", - health_globals.config.health_log_history, health_globals.config.health_log_history / 86400); + health_globals.config.health_log_retention_s, health_globals.config.health_log_retention_s / 86400); } -inline char *health_user_config_dir(void) { +inline const char *health_user_config_dir(void) { char buffer[FILENAME_MAX + 1]; snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_user_config_dir); return config_get(CONFIG_SECTION_DIRECTORIES, "health config", buffer); } -inline char *health_stock_config_dir(void) { +inline const char *health_stock_config_dir(void) { char buffer[FILENAME_MAX + 1]; snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_stock_config_dir); return config_get(CONFIG_SECTION_DIRECTORIES, "stock health config", buffer); diff --git a/src/health/health.d/anomalies.conf b/src/health/health.d/anomalies.conf deleted file mode 100644 index 80d63bb8d..000000000 --- a/src/health/health.d/anomalies.conf +++ /dev/null @@ -1,25 +0,0 @@ -## raise a warning alarm if an anomaly probability is consistently above 50% - -## "foreach" was removed, these alarms don't work anymore - -# template: anomalies_anomaly_probabilities -# on: anomalies.probability -# class: Errors -# type: Netdata -#component: ML -# lookup: average -2m foreach * -# every: 1m -# warn: $this > 50 -# info: average anomaly probability over the last 2 minutes - -# raise a warning alarm if an anomaly flag is consistently firing - -# template: anomalies_anomaly_flags -# on: anomalies.anomaly -# class: Errors -# type: Netdata -#component: ML -# lookup: sum -2m foreach * -# every: 1m -# warn: $this > 10 -# info: number of anomalies in the last 2 minutes diff --git a/src/health/health.d/apcupsd.conf b/src/health/health.d/apcupsd.conf index 5fd7aa112..58d3b214b 100644 --- a/src/health/health.d/apcupsd.conf +++ b/src/health/health.d/apcupsd.conf @@ -1,11 +1,11 @@ # you can disable an alarm notification by setting the 'to' line to: silent - template: apcupsd_10min_ups_load - on: apcupsd.load + template: apcupsd_ups_load_capacity + on: apcupsd.ups_load_capacity_utilization class: Utilization type: Power Supply -component: UPS - lookup: average -10m unaligned of percentage +component: UPS device + lookup: average -10m unaligned of load units: % every: 1m warn: $this > (($status >= $WARNING) ? (70) : (80)) @@ -14,13 +14,11 @@ component: UPS info: APC UPS average load over the last 10 minutes to: sitemgr -# Discussion in https://github.com/netdata/netdata/pull/3928: -# Fire the alarm as soon as it's going on battery (99% charge) and clear only when full. - template: apcupsd_ups_charge - on: apcupsd.charge + template: apcupsd_ups_battery_charge + on: apcupsd.ups_battery_charge class: Errors type: Power Supply -component: UPS +component: UPS device lookup: average -60s unaligned of charge units: % every: 60s @@ -32,7 +30,7 @@ component: UPS to: sitemgr template: apcupsd_last_collected_secs - on: apcupsd.load + on: apcupsd.ups_status class: Latency type: Power Supply component: UPS device @@ -47,21 +45,21 @@ component: UPS device #Send out a warning when SELFTEST code is BT or NG. Code descriptions can be found at: #http://www.apcupsd.org/manual/#:~:text=or%20N/A.-,SELFTEST,-The%20results%20of - template: apcupsd_selftest_warning - on: apcupsd.selftest + template: apcupsd_ups_selftest_warning + on: apcupsd.ups_selftest lookup: max -1s unaligned match-names of BT,NG units: status every: 10s warn: $this == 1 delay: up 0 down 15m multiplier 1.5 max 1h - info: APC UPS self-test failed due to insufficient battery capacity or due to overload. + info: APC UPS self-test failed due to insufficient battery capacity or due to overload to: sitemgr #Send out a warning when STATUS code is ONBATT,OVERLOAD,LOWBATT,REPLACEBATT,NOBATT,COMMLOST #https://man.archlinux.org/man/apcaccess.8.en#:~:text=apcupsd%20was%20started-,STATUS,-%3A%20UPS%20status.%20One - template: apcupsd_status_onbatt - on: apcupsd.status + template: apcupsd_ups_status_onbatt + on: apcupsd.ups_status lookup: max -1s unaligned match-names of ONBATT units: status every: 10s @@ -70,8 +68,8 @@ component: UPS device info: APC UPS has switched to battery power because the input power has failed to: sitemgr - template: apcupsd_status_overload - on: apcupsd.status + template: apcupsd_ups_status_overload + on: apcupsd.ups_status lookup: max -1s unaligned match-names of OVERLOAD units: status every: 10s @@ -80,8 +78,8 @@ component: UPS device info: APC UPS is overloaded and cannot supply enough power to the load to: sitemgr - template: apcupsd_status_lowbatt - on: apcupsd.status + template: apcupsd_ups_status_lowbatt + on: apcupsd.ups_status lookup: max -1s unaligned match-names of LOWBATT units: status every: 10s @@ -90,8 +88,8 @@ component: UPS device info: APC UPS battery is low and needs to be recharged to: sitemgr - template: apcupsd_status_replacebatt - on: apcupsd.status + template: apcupsd_ups_status_replacebatt + on: apcupsd.ups_status lookup: max -1s unaligned match-names of REPLACEBATT units: status every: 10s @@ -100,8 +98,8 @@ component: UPS device info: APC UPS battery has reached the end of its lifespan and needs to be replaced to: sitemgr - template: apcupsd_status_nobatt - on: apcupsd.status + template: apcupsd_ups_status_nobatt + on: apcupsd.ups_status lookup: max -1s unaligned match-names of NOBATT units: status every: 10s @@ -110,8 +108,8 @@ component: UPS device info: APC UPS has no battery to: sitemgr - template: apcupsd_status_commlost - on: apcupsd.status + template: apcupsd_ups_status_commlost + on: apcupsd.ups_status lookup: max -1s unaligned match-names of COMMLOST units: status every: 10s diff --git a/src/health/health.d/boinc.conf b/src/health/health.d/boinc.conf index 6fd987de1..987d20212 100644 --- a/src/health/health.d/boinc.conf +++ b/src/health/health.d/boinc.conf @@ -2,11 +2,11 @@ # Warn on any compute errors encountered. template: boinc_compute_errors - on: boinc.states + on: boinc.tasks_per_state class: Errors type: Computing component: BOINC - lookup: average -10m unaligned of comperror + lookup: average -10m unaligned of compute_error units: tasks every: 1m warn: $this > 0 @@ -17,7 +17,7 @@ component: BOINC # Warn on lots of upload errors template: boinc_upload_errors - on: boinc.states + on: boinc.tasks_per_state class: Errors type: Computing component: BOINC diff --git a/src/health/health.d/ceph.conf b/src/health/health.d/ceph.conf index 44d351338..0048e2a7c 100644 --- a/src/health/health.d/ceph.conf +++ b/src/health/health.d/ceph.conf @@ -1,16 +1,16 @@ # low ceph disk available - template: ceph_cluster_space_usage - on: ceph.general_usage + template: ceph_cluster_physical_capacity_utilization + on: ceph.cluster_physical_capacity_utilization class: Utilization type: Storage component: Ceph - calc: $used * 100 / ($used + $avail) + calc: $utilization units: % every: 1m - warn: $this > (($status >= $WARNING ) ? (85) : (90)) - crit: $this > (($status == $CRITICAL) ? (90) : (98)) - delay: down 5m multiplier 1.2 max 1h - summary: Ceph cluster disk space utilization - info: Ceph cluster disk space utilization - to: sysadmin + warn: $this > (($status >= $WARNING ) ? (85) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 5m multiplier 1.2 max 1h + summary: Ceph cluster ${label:fsid} disk space utilization + info: Ceph cluster ${label:fsid} disk space utilization + to: sysadmin diff --git a/src/health/health.d/disks.conf b/src/health/health.d/disks.conf index fe96837fb..d8176a6be 100644 --- a/src/health/health.d/disks.conf +++ b/src/health/health.d/disks.conf @@ -12,24 +12,22 @@ class: Utilization type: System component: Disk - host labels: _os=linux freebsd -chart labels: mount_point=!/dev !/dev/* !/run !/run/* * - calc: $used * 100 / ($avail + $used) - units: % - every: 1m - warn: $this > (($status >= $WARNING ) ? (80) : (90)) - crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5 - delay: up 1m down 15m multiplier 1.5 max 1h - summary: Disk ${label:mount_point} space usage - info: Total space utilization of disk ${label:mount_point} - to: sysadmin +chart labels: mount_point=!/dev !/dev/* !/run !/run/* !HarddiskVolume* * + calc: $used * 100 / ($avail + $used) + units: % + every: 1m + warn: $this > (($status >= $WARNING ) ? (80) : (90)) + crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5 + delay: up 1m down 15m multiplier 1.5 max 1h + summary: Disk ${label:mount_point} space usage + info: Total space utilization of disk ${label:mount_point} + to: sysadmin template: disk_inode_usage on: disk.inodes class: Utilization type: System component: Disk - host labels: _os=linux freebsd chart labels: mount_point=!/dev !/dev/* !/run !/run/* * calc: $used * 100 / ($avail + $used) units: % @@ -55,7 +53,6 @@ chart labels: mount_point=!/dev !/dev/* !/run !/run/* * template: disk_fill_rate on: disk.space -host labels: _os=linux freebsd lookup: min -10m at -50m unaligned of avail calc: ($this - $avail) / (($now - $after) / 3600) every: 1m @@ -67,7 +64,6 @@ host labels: _os=linux freebsd template: out_of_disk_space_time on: disk.space -host labels: _os=linux freebsd calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) units: hours every: 10s @@ -92,7 +88,6 @@ host labels: _os=linux freebsd template: disk_inode_rate on: disk.inodes -host labels: _os=linux freebsd lookup: min -10m at -50m unaligned of avail calc: ($this - $avail) / (($now - $after) / 3600) every: 1m @@ -105,7 +100,6 @@ host labels: _os=linux freebsd template: out_of_disk_inodes_time on: disk.inodes -host labels: _os=linux freebsd calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf) units: hours every: 10s @@ -129,7 +123,6 @@ host labels: _os=linux freebsd class: Utilization type: System component: Disk -host labels: _os=linux freebsd lookup: average -10m unaligned units: % every: 1m @@ -150,7 +143,6 @@ host labels: _os=linux freebsd class: Latency type: System component: Disk -host labels: _os=linux freebsd lookup: average -10m unaligned units: ms every: 1m diff --git a/src/health/health.d/net.conf b/src/health/health.d/net.conf index 448a3733d..609741aca 100644 --- a/src/health/health.d/net.conf +++ b/src/health/health.d/net.conf @@ -19,7 +19,7 @@ component: Network class: Workload type: System component: Network -host labels: _os=linux +host labels: _os=linux windows lookup: average -1m unaligned absolute of received calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % @@ -35,7 +35,7 @@ host labels: _os=linux class: Workload type: System component: Network -host labels: _os=linux +host labels: _os=linux windows lookup: average -1m unaligned absolute of sent calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % @@ -214,7 +214,6 @@ host labels: _os=linux class: Workload type: System component: Network -host labels: _os=linux freebsd lookup: average -1m unaligned of received units: packets every: 10s @@ -225,7 +224,6 @@ host labels: _os=linux freebsd class: Workload type: System component: Network -host labels: _os=linux freebsd lookup: average -10s unaligned of received calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) every: 10s @@ -237,3 +235,21 @@ host labels: _os=linux freebsd info: Ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \ compared to the rate over the last minute to: silent + +# ----------------------------------------------------------------------------- +# output queue length + + template: network_interface_output_queue_length + on: net.queue_length + class: Errors + type: System + component: Network +host labels: _os=windows + units: packets + every: 10s + warn: $length > 2 + delay: up 1m down 1m multiplier 1.5 max 1h + summary: System network interface ${label:device} output queue length + info: The Output Queue Length on interface ${label:device} should be zero, otherwise there are delays and bottlenecks. + to: silent + diff --git a/src/health/health.d/vernemq.conf b/src/health/health.d/vernemq.conf index 6ea9f99dc..df7f68fc4 100644 --- a/src/health/health.d/vernemq.conf +++ b/src/health/health.d/vernemq.conf @@ -2,67 +2,67 @@ # Socket errors template: vernemq_socket_errors - on: vernemq.socket_errors + on: vernemq.node_socket_errors class: Errors type: Messaging component: VerneMQ - lookup: sum -1m unaligned absolute of socket_error + lookup: sum -1m unaligned units: errors every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ socket errors - info: Number of socket errors in the last minute + summary: Node ${label:node} socket errors + info: Node ${label:node} socket errors in the last minute to: sysadmin # Queues dropped/expired/unhandled PUBLISH messages template: vernemq_queue_message_drop - on: vernemq.queue_undelivered_messages + on: vernemq.node_queue_undelivered_messages class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute of queue_message_drop + lookup: average -1m unaligned absolute of dropped units: dropped messages every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ dropped messages - info: Number of dropped messages due to full queues in the last minute + summary: Node ${label:node} dropped messages + info: Node ${label:node} dropped messages due to full queues in the last minute to: sysadmin template: vernemq_queue_message_expired - on: vernemq.queue_undelivered_messages + on: vernemq.node_queue_undelivered_messages class: Latency type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute of queue_message_expired + lookup: average -1m unaligned absolute of expired units: expired messages every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ expired messages - info: number of messages which expired before delivery in the last minute + summary: Node ${label:node} expired messages + info: Node ${label:node} expired before delivery messages in the last minute to: sysadmin template: vernemq_queue_message_unhandled - on: vernemq.queue_undelivered_messages + on: vernemq.node_queue_undelivered_messages class: Latency type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute of queue_message_unhandled + lookup: average -1m unaligned absolute of unhandled units: unhandled messages every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unhandled messages - info: Number of unhandled messages (connections with clean session=true) in the last minute + summary: Node ${label:node} unhandled messages + info: Node ${label:node} unhandled messages in the last minute to: sysadmin # Erlang VM template: vernemq_average_scheduler_utilization - on: vernemq.average_scheduler_utilization + on: vernemq.node_average_scheduler_utilization class: Utilization type: Messaging component: VerneMQ @@ -72,14 +72,14 @@ component: VerneMQ warn: $this > (($status >= $WARNING) ? (75) : (85)) crit: $this > (($status == $CRITICAL) ? (85) : (95)) delay: down 15m multiplier 1.5 max 1h - summary: VerneMQ scheduler utilization - info: Average scheduler utilization over the last 10 minutes + summary: Node ${label:node} scheduler utilization + info: Node ${label:node} scheduler utilization over the last 10 minutes to: sysadmin # Cluster communication and netsplits template: vernemq_cluster_dropped - on: vernemq.cluster_dropped + on: vernemq.node_cluster_dropped class: Errors type: Messaging component: VerneMQ @@ -88,74 +88,74 @@ component: VerneMQ every: 1m warn: $this > 0 delay: up 5m down 5m multiplier 1.5 max 1h - summary: VerneMQ dropped traffic - info: Amount of traffic dropped during communication with the cluster nodes in the last minute + summary: Node ${label:node} dropped cluster traffic + info: Node ${label:node} traffic dropped during communication with the cluster nodes in the last minute to: sysadmin template: vernemq_netsplits - on: vernemq.netsplits + on: vernemq.node_netsplits class: Workload type: Messaging component: VerneMQ - lookup: sum -1m unaligned absolute of netsplit_detected + lookup: sum -1m unaligned absolute of detected units: netsplits every: 10s warn: $this > 0 delay: down 5m multiplier 1.5 max 2h - summary: VerneMQ netsplits - info: Number of detected netsplits (split brain situation) in the last minute + summary: Node ${label:node} detected netsplits + info: Node ${label:node} detected netsplits (split brain) in the last minute to: sysadmin # Unsuccessful CONNACK template: vernemq_mqtt_connack_sent_reason_unsuccessful - on: vernemq.mqtt_connack_sent_reason + on: vernemq.node_mqtt_connack_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful CONNACK - info: Number of sent unsuccessful v3/v5 CONNACK packets in the last minute + summary: Node ${label:node} unsuccessful sent CONNACK + info: Node ${label:node} unsuccessful sent v5 CONNACK packets in the last minute to: sysadmin # Not normal DISCONNECT template: vernemq_mqtt_disconnect_received_reason_not_normal - on: vernemq.mqtt_disconnect_received_reason + on: vernemq.node_mqtt_disconnect_received_by_reason_code class: Workload type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !normal_disconnect,* + lookup: average -1m unaligned absolute of !normal_disconnect,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ received not normal DISCONNECT - info: Number of received not normal v5 DISCONNECT packets in the last minute + summary: Node ${label:node} received not normal DISCONNECT + info: Node ${label:node} received not normal v5 DISCONNECT packets in the last minute to: sysadmin template: vernemq_mqtt_disconnect_sent_reason_not_normal - on: vernemq.mqtt_disconnect_sent_reason + on: vernemq.node_mqtt_disconnect_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !normal_disconnect,* + lookup: average -1m unaligned absolute of !normal_disconnect,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ sent not normal DISCONNECT - info: Number of sent not normal v5 DISCONNECT packets in the last minute + summary: Node ${label:node} sent not normal DISCONNECT + info: Node ${label:node} sent not normal v5 DISCONNECT packets in the last minute to: sysadmin # SUBSCRIBE errors and unauthorized attempts template: vernemq_mqtt_subscribe_error - on: vernemq.mqtt_subscribe_error + on: vernemq.node_mqtt_subscribe_error class: Errors type: Messaging component: VerneMQ @@ -164,12 +164,12 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ failed SUBSCRIBE - info: Number of failed v3/v5 SUBSCRIBE operations in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} failed SUBSCRIBE + info: Node ${label:node} mqtt v${label:mqtt_version} failed SUBSCRIBE operations in the last minute to: sysadmin template: vernemq_mqtt_subscribe_auth_error - on: vernemq.mqtt_subscribe_auth_error + on: vernemq.node_mqtt_subscribe_auth_error class: Workload type: Messaging component: VerneMQ @@ -178,14 +178,14 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unauthorized SUBSCRIBE - info: number of unauthorized v3/v5 SUBSCRIBE attempts in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} unauthorized SUBSCRIBE + info: Node ${label:node} mqtt v${label:mqtt_version} unauthorized SUBSCRIBE attempts in the last minute to: sysadmin # UNSUBSCRIBE errors template: vernemq_mqtt_unsubscribe_error - on: vernemq.mqtt_unsubscribe_error + on: vernemq.node_mqtt_unsubscribe_error class: Errors type: Messaging component: VerneMQ @@ -194,14 +194,14 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ failed UNSUBSCRIBE - info: Number of failed v3/v5 UNSUBSCRIBE operations in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} failed UNSUBSCRIBE + info: Node ${label:node} mqtt v${label:mqtt_version} failed UNSUBSCRIBE operations in the last minute to: sysadmin # PUBLISH errors and unauthorized attempts template: vernemq_mqtt_publish_errors - on: vernemq.mqtt_publish_errors + on: vernemq.node_mqtt_publish_errors class: Errors type: Messaging component: VerneMQ @@ -210,12 +210,12 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ failed PUBLISH - info: Number of failed v3/v5 PUBLISH operations in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} failed PUBLISH + info: Node ${label:node} mqtt v${label:mqtt_version} failed PUBLISH operations in the last minute to: sysadmin template: vernemq_mqtt_publish_auth_errors - on: vernemq.mqtt_publish_auth_errors + on: vernemq.node_mqtt_publish_auth_errors class: Workload type: Messaging component: VerneMQ @@ -224,42 +224,42 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unauthorized PUBLISH - info: Number of unauthorized v3/v5 PUBLISH attempts in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} unauthorized PUBLISH + info: Node ${label:node} mqtt v${label:mqtt_version} unauthorized PUBLISH attempts in the last minute to: sysadmin # Unsuccessful and unexpected PUBACK template: vernemq_mqtt_puback_received_reason_unsuccessful - on: vernemq.mqtt_puback_received_reason + on: vernemq.node_mqtt_puback_received_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful received PUBACK - info: Number of received unsuccessful v5 PUBACK packets in the last minute + summary: Node ${label:node} mqtt v5 received unsuccessful PUBACK + info: Node ${label:node} mqtt v5 received unsuccessful PUBACK packets in the last minute to: sysadmin template: vernemq_mqtt_puback_sent_reason_unsuccessful - on: vernemq.mqtt_puback_sent_reason + on: vernemq.node_mqtt_puback_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful sent PUBACK - info: Number of sent unsuccessful v5 PUBACK packets in the last minute + summary: Node ${label:node} mqtt v5 unsuccessful sent PUBACK + info: Node ${label:node} mqtt v5 unsuccessful sent PUBACK packets in the last minute to: sysadmin template: vernemq_mqtt_puback_unexpected - on: vernemq.mqtt_puback_invalid_error + on: vernemq.node_mqtt_puback_invalid_error class: Workload type: Messaging component: VerneMQ @@ -268,42 +268,42 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unnexpected recieved PUBACK - info: Number of received unexpected v3/v5 PUBACK packets in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} recieved unnexpected PUBACK + info: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBACK messages in the last minute to: sysadmin # Unsuccessful and unexpected PUBREC template: vernemq_mqtt_pubrec_received_reason_unsuccessful - on: vernemq.mqtt_pubrec_received_reason + on: vernemq.node_mqtt_pubrec_received_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful received PUBREC - info: Number of received unsuccessful v5 PUBREC packets in the last minute + summary: Node ${label:node} mqtt v5 received unsuccessful PUBREC + info: Node ${label:node} mqtt v5 received unsuccessful PUBREC packets in the last minute to: sysadmin template: vernemq_mqtt_pubrec_sent_reason_unsuccessful - on: vernemq.mqtt_pubrec_sent_reason + on: vernemq.node_mqtt_pubrec_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful sent PUBREC - info: Number of sent unsuccessful v5 PUBREC packets in the last minute + summary: Node ${label:node} mqtt v5 unsuccessful sent PUBREC + info: Node ${label:node} mqtt v5 unsuccessful sent PUBREC packets in the last minute to: sysadmin template: vernemq_mqtt_pubrec_invalid_error - on: vernemq.mqtt_pubrec_invalid_error + on: vernemq.node_mqtt_pubrec_invalid_error class: Workload type: Messaging component: VerneMQ @@ -312,72 +312,72 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ invalid received PUBREC - info: Number of received invalid v3 PUBREC packets in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} received invalid PUBREC + info: Node ${label:node} mqtt v${label:mqtt_version} received invalid PUBREC packets in the last minute to: sysadmin # Unsuccessful PUBREL template: vernemq_mqtt_pubrel_received_reason_unsuccessful - on: vernemq.mqtt_pubrel_received_reason + on: vernemq.node_mqtt_pubrel_received_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful received PUBREL - info: Number of received unsuccessful v5 PUBREL packets in the last minute + summary: Node ${label:node} mqtt v5 received unsuccessful PUBREL + info: Node ${label:node} mqtt v5 received unsuccessful PUBREL packets in the last minute to: sysadmin template: vernemq_mqtt_pubrel_sent_reason_unsuccessful - on: vernemq.mqtt_pubrel_sent_reason + on: vernemq.node_mqtt_pubrel_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful sent PUBREL - info: number of sent unsuccessful v5 PUBREL packets in the last minute + summary: Node ${label:node} mqtt v5 unsuccessful sent PUBREL + info: Node ${label:node} mqtt v5 unsuccessful sent PUBREL packets in the last minute to: sysadmin # Unsuccessful and unexpected PUBCOMP template: vernemq_mqtt_pubcomp_received_reason_unsuccessful - on: vernemq.mqtt_pubcomp_received_reason + on: vernemq.node_mqtt_pubcomp_received_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful received PUBCOMP - info: Number of received unsuccessful v5 PUBCOMP packets in the last minute + summary: Node ${label:node} mqtt v5 received unsuccessful PUBCOMP + info: Node ${label:node} mqtt v5 received unsuccessful PUBCOMP packets in the last minute to: sysadmin template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful - on: vernemq.mqtt_pubcomp_sent_reason + on: vernemq.node_mqtt_pubcomp_sent_by_reason_code class: Errors type: Messaging component: VerneMQ - lookup: average -1m unaligned absolute match-names of !success,* + lookup: average -1m unaligned absolute of !success,* units: packets every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unsuccessful sent PUBCOMP - info: number of sent unsuccessful v5 PUBCOMP packets in the last minute + summary: Node ${label:node} mqtt v5 unsuccessful sent PUBCOMP + info: Node ${label:node} mqtt v5 unsuccessful sent PUBCOMP packets in the last minute to: sysadmin template: vernemq_mqtt_pubcomp_unexpected - on: vernemq.mqtt_pubcomp_invalid_error + on: vernemq.node_mqtt_pubcomp_invalid_error class: Workload type: Messaging component: VerneMQ @@ -386,6 +386,6 @@ component: VerneMQ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (5)) delay: up 2m down 5m multiplier 1.5 max 2h - summary: VerneMQ unexpected received PUBCOMP - info: number of received unexpected v3/v5 PUBCOMP packets in the last minute + summary: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBCOMP + info: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBCOMP packets in the last minute to: sysadmin diff --git a/src/health/health.h b/src/health/health.h index b1ac5a9e1..cdd089623 100644 --- a/src/health/health.h +++ b/src/health/health.h @@ -34,8 +34,8 @@ void health_entry_flags_to_json_array(BUFFER *wb, const char *key, HEALTH_ENTRY_ #define HEALTH_LISTEN_BACKLOG 4096 #endif -#ifndef HEALTH_LOG_DEFAULT_HISTORY -#define HEALTH_LOG_DEFAULT_HISTORY 432000 +#ifndef HEALTH_LOG_RETENTION_DEFAULT +#define HEALTH_LOG_RETENTION_DEFAULT (5 * 86400) #endif #ifndef HEALTH_LOG_MINIMUM_HISTORY @@ -51,7 +51,7 @@ void health_plugin_reload(void); void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* context, RRDCALC_STATUS status); void health_alarms2json(RRDHOST *host, BUFFER *wb, int all); -void health_alert2json_conf(RRDHOST *host, BUFFER *wb, CONTEXTS_V2_OPTIONS all); +void health_alert2json_conf(RRDHOST *host, BUFFER *wb, CONTEXTS_OPTIONS all); void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all); void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb); @@ -75,8 +75,8 @@ ALARM_ENTRY* health_create_alarm_entry( void health_alarm_log_add_entry(RRDHOST *host, ALARM_ENTRY *ae); -char *health_user_config_dir(void); -char *health_stock_config_dir(void); +const char *health_user_config_dir(void); +const char *health_stock_config_dir(void); void health_alarm_log_free(RRDHOST *host); void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae); diff --git a/src/health/health_config.c b/src/health/health_config.c index c17f7e21d..d261f9022 100644 --- a/src/health/health_config.c +++ b/src/health/health_config.c @@ -29,14 +29,14 @@ static inline int health_parse_delay( while(*s && isspace((uint8_t)*s)) *s++ = '\0'; if(!strcasecmp(key, "up")) { - if (!config_parse_duration(value, delay_up_duration)) { + if (!duration_parse_seconds(value, delay_up_duration)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } else given_up = 1; } else if(!strcasecmp(key, "down")) { - if (!config_parse_duration(value, delay_down_duration)) { + if (!duration_parse_seconds(value, delay_down_duration)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } @@ -51,7 +51,7 @@ static inline int health_parse_delay( else given_multiplier = 1; } else if(!strcasecmp(key, "max")) { - if (!config_parse_duration(value, delay_max_duration)) { + if (!duration_parse_seconds(value, delay_max_duration)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, filename, value, key); } @@ -139,13 +139,13 @@ static inline int health_parse_repeat( return 1; } if(!strcasecmp(key, "warning")) { - if (!config_parse_duration(value, (int*)warn_repeat_every)) { + if (!duration_parse_seconds(value, (int *)warn_repeat_every)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, file, value, key); } } else if(!strcasecmp(key, "critical")) { - if (!config_parse_duration(value, (int*)crit_repeat_every)) { + if (!duration_parse_seconds(value, (int *)crit_repeat_every)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid value '%s' for '%s' keyword", line, file, value, key); } @@ -155,13 +155,6 @@ static inline int health_parse_repeat( return 1; } -static inline int isvariableterm(const char s) { - if(isalnum(s) || s == '.' || s == '_') - return 0; - - return 1; -} - static inline int health_parse_db_lookup(size_t line, const char *filename, char *string, struct rrd_alert_config *ac) { if(ac->dimensions) string_freez(ac->dimensions); ac->dimensions = NULL; @@ -273,7 +266,7 @@ static inline int health_parse_db_lookup(size_t line, const char *filename, char while(*s && !isspace((uint8_t)*s)) s++; while(*s && isspace((uint8_t)*s)) *s++ = '\0'; - if(!config_parse_duration(key, &ac->after)) { + if(!duration_parse_seconds(key, &ac->after)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid duration '%s' after group method", line, filename, key); return 0; @@ -294,7 +287,7 @@ static inline int health_parse_db_lookup(size_t line, const char *filename, char while(*s && !isspace((uint8_t)*s)) s++; while(*s && isspace((uint8_t)*s)) *s++ = '\0'; - if (!config_parse_duration(value, &ac->before)) { + if (!duration_parse_seconds(value, &ac->before)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid duration '%s' for '%s' keyword", line, filename, value, key); } @@ -304,7 +297,7 @@ static inline int health_parse_db_lookup(size_t line, const char *filename, char while(*s && !isspace((uint8_t)*s)) s++; while(*s && isspace((uint8_t)*s)) *s++ = '\0'; - if (!config_parse_duration(value, &ac->update_every)) { + if (!duration_parse_seconds(value, &ac->update_every)) { netdata_log_error("Health configuration at line %zu of file '%s': invalid duration '%s' for '%s' keyword", line, filename, value, key); } @@ -725,7 +718,7 @@ int health_readfile(const char *filename, void *data __maybe_unused, bool stock_ health_parse_db_lookup(line, filename, value, ac); } else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) { - if(!config_parse_duration(value, &ac->update_every)) + if(!duration_parse_seconds(value, &ac->update_every)) netdata_log_error( "Health configuration at line %zu of file '%s' for alarm '%s' at key '%s' " "cannot parse duration: '%s'.", diff --git a/src/health/health_dyncfg.c b/src/health/health_dyncfg.c index f2b9bc607..48346f662 100644 --- a/src/health/health_dyncfg.c +++ b/src/health/health_dyncfg.c @@ -68,8 +68,8 @@ static bool parse_match(json_object *jobj, const char *path, struct rrd_alert_ma } static bool parse_config_value_database_lookup(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error, bool strict) { - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "after", config->after, error, strict); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "before", config->before, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "after", config->after, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "before", config->before, error, strict); JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "time_group", time_grouping_txt2id, config->time_group, error, strict); JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "dims_group", alerts_dims_grouping2id, config->dims_group, error, strict); JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "data_source", alerts_data_sources2id, config->data_source, error, strict); @@ -98,7 +98,7 @@ static bool parse_config_value(json_object *jobj, const char *path, struct rrd_a JSONC_PARSE_SUBOBJECT(jobj, path, "database_lookup", config, parse_config_value_database_lookup, error, strict); JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, "calculation", config->calculation, error, false); JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "units", config->units, error, false); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "update_every", config->update_every, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "update_every", config->update_every, error, strict); return true; } @@ -109,17 +109,17 @@ static bool parse_config_conditions(json_object *jobj, const char *path, struct } static bool parse_config_action_delay(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error, bool strict) { - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "up", config->delay_up_duration, error, strict); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "down", config->delay_down_duration, error, strict); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "max", config->delay_max_duration, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "up", config->delay_up_duration, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "down", config->delay_down_duration, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "max", config->delay_max_duration, error, strict); JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, "multiplier", config->delay_multiplier, error, strict); return true; } static bool parse_config_action_repeat(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error, bool strict) { JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(jobj, path, "enabled", config->has_custom_repeat_config, error, strict); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "warning", config->warn_repeat_every, error, strict); - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "critical", config->crit_repeat_every, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "warning", config->warn_repeat_every, error, strict); + JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "critical", config->crit_repeat_every, error, strict); return true; } @@ -153,7 +153,7 @@ static bool parse_config(json_object *jobj, const char *path, RRD_ALERT_PROTOTYP static bool parse_prototype(json_object *jobj, const char *path, RRD_ALERT_PROTOTYPE *base, BUFFER *error, const char *name, bool strict) { int64_t version = 0; - JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "format_version", version, error, strict); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "format_version", version, error, strict); if(version != 1) { buffer_sprintf(error, "unsupported document version"); @@ -164,6 +164,11 @@ static bool parse_prototype(json_object *jobj, const char *path, RRD_ALERT_PROTO json_object *rules; if (json_object_object_get_ex(jobj, "rules", &rules)) { + if (json_object_get_type(rules) != json_type_array) { + buffer_sprintf(error, "member 'rules' is not an array"); + return false; + } + size_t rules_len = json_object_array_length(rules); RRD_ALERT_PROTOTYPE *ap = base; // fill the first entry @@ -270,7 +275,7 @@ static inline void health_prototype_rule_to_json_array_member(BUFFER *wb, RRD_AL buffer_json_member_add_object(wb, "config"); { if(!for_hashing) { - buffer_json_member_add_uuid(wb, "hash", &ap->config.hash_id); + buffer_json_member_add_uuid(wb, "hash", ap->config.hash_id); buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(ap->config.source_type)); buffer_json_member_add_string(wb, "source", string2str(ap->config.source)); } diff --git a/src/health/health_event_loop.c b/src/health/health_event_loop.c index 04d70e11f..0bf6892dd 100644 --- a/src/health/health_event_loop.c +++ b/src/health/health_event_loop.c @@ -133,12 +133,12 @@ static void health_initialize_rrdhost(RRDHOST *host) { rrdhost_flag_set(host, RRDHOST_FLAG_INITIALIZED_HEALTH); host->health_log.max = health_globals.config.health_log_entries_max; - host->health_log.health_log_history = health_globals.config.health_log_history; + host->health_log.health_log_retention_s = health_globals.config.health_log_retention_s; host->health.health_default_exec = string_dup(health_globals.config.default_exec); host->health.health_default_recipient = string_dup(health_globals.config.default_recipient); host->health.use_summary_for_notifications = health_globals.config.use_summary_for_notifications; - host->health_log.next_log_id = (uint32_t)now_realtime_sec(); + host->health_log.next_log_id = get_uint32_id(); host->health_log.next_alarm_id = 0; rw_spinlock_init(&host->health_log.spinlock); @@ -229,7 +229,7 @@ static void health_event_loop(void) { "Postponing alarm checks for %"PRId32" seconds, " "because it seems that the system was just resumed from suspension.", (int32_t)health_globals.config.postpone_alarms_during_hibernation_for_seconds); - schedule_node_info_update(localhost); + schedule_node_state_update(localhost, 0); } if (unlikely(silencers->all_alarms && silencers->stype == STYPE_DISABLE_ALARMS)) { @@ -298,13 +298,11 @@ static void health_event_loop(void) { } worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK); -#ifdef ENABLE_ACLK - if (netdata_cloud_enabled) { + { struct aclk_sync_cfg_t *wc = host->aclk_config; if (wc && wc->send_snapshot == 2) continue; } -#endif // the first loop is to lookup values from the db foreach_rrdcalc_in_rrdhost_read(host, rc) { @@ -651,7 +649,6 @@ static void health_event_loop(void) { break; } } -#ifdef ENABLE_ACLK struct aclk_sync_cfg_t *wc = host->aclk_config; if (wc && wc->send_snapshot == 1) { wc->send_snapshot = 2; @@ -660,7 +657,6 @@ static void health_event_loop(void) { else if (process_alert_pending_queue(host)) rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS); -#endif dfe_done(host); diff --git a/src/health/health_internals.h b/src/health/health_internals.h index 638a96195..a86e62956 100644 --- a/src/health/health_internals.h +++ b/src/health/health_internals.h @@ -9,7 +9,7 @@ #define HEALTH_LOG_ENTRIES_MAX 100000U #define HEALTH_LOG_ENTRIES_MIN 10U -#define HEALTH_LOG_HISTORY_DEFAULT (5 * 86400) +#define HEALTH_LOG_RETENTION_DEFAULT (5 * 86400) #define HEALTH_CONF_MAX_LINE 4096 @@ -76,7 +76,7 @@ struct health_plugin_globals { bool use_summary_for_notifications; unsigned int health_log_entries_max; - uint32_t health_log_history; // the health log history in seconds to be kept in db + uint32_t health_log_retention_s; // the health log retention in seconds to be kept in db STRING *silencers_filename; STRING *default_exec; diff --git a/src/health/health_notifications.c b/src/health/health_notifications.c index 85dd2d0d8..443c0246f 100644 --- a/src/health/health_notifications.c +++ b/src/health/health_notifications.c @@ -20,17 +20,27 @@ struct health_raised_summary { }; void health_alarm_wait_for_execution(ALARM_ENTRY *ae) { - if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) - return; + // this has to ALWAYS remove the given alarm entry from the queue - if(!ae->popen_instance) { - // nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification"); - return; + int code = 0; + + if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not an execution in progress"); + code = 128; + goto cleanup; } - ae->exec_code = spawn_popen_wait(ae->popen_instance); + if(!ae->popen_instance) { + nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification"); + code = 128; + goto cleanup; + } + code = spawn_popen_wait(ae->popen_instance); netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code); + +cleanup: + ae->exec_code = code; ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; if(ae->exec_code != 0) @@ -466,13 +476,18 @@ void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_rais ae->exec_run_timestamp = now_realtime_sec(); /* will be updated by real time after spawning */ netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run); - ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; ae->popen_instance = spawn_popen_run(command_to_run); - enqueue_alarm_notify_in_progress(ae); + if(ae->popen_instance) { + ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; + enqueue_alarm_notify_in_progress(ae); + } + else + netdata_log_error("Failed to execute alarm notification"); + health_alarm_log_save(host, ae); - } else { - netdata_log_error("Failed to format command arguments"); } + else + netdata_log_error("Failed to format command arguments"); buffer_free(warn_alarms); buffer_free(crit_alarms); diff --git a/src/health/notifications/README.md b/src/health/notifications/README.md index 5a2b032a3..e930e261f 100644 --- a/src/health/notifications/README.md +++ b/src/health/notifications/README.md @@ -10,10 +10,10 @@ The default script is `alarm-notify.sh`. > > This file mentions editing configuration files. > -> - To edit configuration files in a safe way, we provide the [`edit config` script](/docs/netdata-agent/configuration/README.md#edit-netdataconf)located in your [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. +> - To edit configuration files in a safe way, we provide the [`edit config` script](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config)located in your [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. > Note that to run the script you need to be inside your Netdata config directory. > -> - Please also note that after most configuration changes you will need to [restart the Agent](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for the changes to take effect. +> - Please also note that after most configuration changes you will need to [restart the Agent](/docs/netdata-agent/start-stop-restart.md) for the changes to take effect. > > It is recommended to use this way for configuring Netdata. @@ -29,7 +29,7 @@ It uses **roles**. For example `sysadmin`, `webmaster`, `dba`, etc. Each alert is assigned to one or more roles, using the `to` line of the alert configuration. For example, here is the alert configuration for `ram.conf` that defaults to the role `sysadmin`: -```conf +```text alarm: ram_in_use on: system.ram class: Utilization @@ -52,7 +52,7 @@ Then `alarm-notify.sh` uses its own configuration file `health_alarm_notify.conf Here is an example, of the `sysadmin`'s role recipients for the email notification. You can send the notification to multiple recipients by separating the emails with a space. -```conf +```text ############################################################################### # RECIPIENTS PER ROLE @@ -84,7 +84,7 @@ You can edit `health_alarm_notify.conf` using the `edit-config` script to config - **Recipients** per role per notification method - ```conf + ```text role_recipients_email[sysadmin]="${DEFAULT_RECIPIENT_EMAIL}" role_recipients_pushover[sysadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" role_recipients_pushbullet[sysadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" @@ -132,7 +132,7 @@ When you define recipients per role for notification methods, you can append `|c In the following examples, the first recipient receives all the alerts, while the second one receives only notifications for alerts that have at some point become critical. The second user may still receive warning and clear notifications, but only for the event that previously caused a critical alert. -```conf +```text email : "user1@example.com user2@example.com|critical" pushover : "2987343...9437837 8756278...2362736|critical" telegram : "111827421 112746832|critical" @@ -158,7 +158,7 @@ This works for all notification methods (including the default recipients). If you need to send curl based notifications (pushover, pushbullet, slack, alerta, flock, discord, telegram) via a proxy, you should set these variables to your proxy address: -```conf +```text export http_proxy="http://10.0.0.1:3128/" export https_proxy="http://10.0.0.1:3128/" ``` @@ -173,7 +173,7 @@ If you have an Internet facing netdata (or you have copied the images/ folder of netdata to your web server), set its URL here, to fetch the notification images from it. -```conf +```text images_base_url="http://my.public.netdata.server:19999" ``` diff --git a/src/health/notifications/alarm-notify.sh.in b/src/health/notifications/alarm-notify.sh.in index c7c44cb11..d7baa7345 100755 --- a/src/health/notifications/alarm-notify.sh.in +++ b/src/health/notifications/alarm-notify.sh.in @@ -769,6 +769,9 @@ fi # check custom [ -z "${DEFAULT_RECIPIENT_CUSTOM}" ] && SEND_CUSTOM="NO" +# check ilert +[ -z "${ILERT_ALERT_SOURCE_URL}" ] && SEND_ILERT="NO" + # ----------------------------------------------------------------------------- # check the availability of targets @@ -798,7 +801,8 @@ check_supported_targets() { [ "${SEND_DYNATRACE}" = "YES" ] || [ "${SEND_OPSGENIE}" = "YES" ] || [ "${SEND_GOTIFY}" = "YES" ] || - [ "${SEND_NTFY}" = "YES" ]; then + [ "${SEND_NTFY}" = "YES" ] || + [ "${SEND_ILERT}" = "YES" ]; then # if we need curl, check for the curl command if [ -z "${curl}" ]; then curl="$(command -v curl 2>/dev/null)" @@ -828,6 +832,7 @@ check_supported_targets() { SEND_OPSGENIE="NO" SEND_GOTIFY="NO" SEND_NTFY="NO" + SEND_ILERT="NO" fi fi @@ -983,7 +988,8 @@ for method in "${SEND_EMAIL}" \ "${SEND_DYNATRACE}" \ "${SEND_OPSGENIE}" \ "${SEND_GOTIFY}" \ - "${SEND_NTFY}" ; do + "${SEND_NTFY}" \ + "${SEND_ILERT}" ; do if [ "${method}" == "YES" ]; then proceed=1 @@ -2432,6 +2438,50 @@ send_ntfy() { } # ----------------------------------------------------------------------------- +# ilert sender + +send_ilert() { + local payload httpcode + [ "${SEND_ILERT}" != "YES" ] && return 1 + + if [ -z "${ILERT_ALERT_SOURCE_URL}" ] ; then + info "Can't send ilert notification, because ILERT_ALERT_SOURCE_URL is not defined" + return 1 + fi + + payload=$(cat <<EOF + { + "alert" : "${name}", + "alert_url" : "${goto_url}", + "alarm_id" : ${alarm_id}, + "chart" : "${chart}", + "date" : "${when}", + "duration" : "${duration_txt}", + "host" : "${host}", + "info" : "${info}", + "message" : "${status_message}", + "severity": ${status}, + "total_critical" : "${total_critical}", + "total_warnings" : "${total_warnings}", + "value" : "${value_string}", + "image_url": "${image}", + "src" : "${src}" + } +EOF +) + + httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "${ILERT_ALERT_SOURCE_URL}") + if [ "${httpcode}" = "200" ] || [ "${httpcode}" = "202" ]; then + info "sent ilert event for ${notification_description}" + else + error "failed to send ilert event for ${notification_description}, with HTTP response status code ${httpcode}." + return 1 + fi + + return 0 +} + +# ----------------------------------------------------------------------------- # prepare the content of the notification # the url to send the user on click @@ -3604,6 +3654,11 @@ send_ntfy "${DEFAULT_RECIPIENT_NTFY}" SENT_NTFY=$? # ----------------------------------------------------------------------------- +# send messages to ilert +send_ilert +SENT_ILERT=$? + +# ----------------------------------------------------------------------------- # let netdata know for state in "${SENT_EMAIL}" \ "${SENT_PUSHOVER}" \ @@ -3632,7 +3687,8 @@ for state in "${SENT_EMAIL}" \ "${SENT_DYNATRACE}" \ "${SENT_OPSGENIE}" \ "${SENT_GOTIFY}" \ - "${SENT_NTFY}"; do + "${SENT_NTFY}" \ + "${SENT_ILERT}"; do if [ "${state}" -eq 0 ]; then # we sent something exit 0 diff --git a/src/health/notifications/alerta/README.md b/src/health/notifications/alerta/README.md index 40fef3fd7..4999f1db4 100644 --- a/src/health/notifications/alerta/README.md +++ b/src/health/notifications/alerta/README.md @@ -39,8 +39,8 @@ You can send Netdata alerts to Alerta to see alerts coming from many Netdata hos The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -71,7 +71,7 @@ You will need an API key to send messages from any source, if Alerta is configur The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_alerta[sysadmin]="Systems" role_recipients_alerta[domainadmin]="Domains" role_recipients_alerta[dba]="Databases Systems" diff --git a/src/health/notifications/alerta/metadata.yaml b/src/health/notifications/alerta/metadata.yaml index f815032b9..363dd6e2b 100644 --- a/src/health/notifications/alerta/metadata.yaml +++ b/src/health/notifications/alerta/metadata.yaml @@ -58,7 +58,7 @@ detailed_description: | The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_alerta[sysadmin]="Systems" role_recipients_alerta[domainadmin]="Domains" role_recipients_alerta[dba]="Databases Systems" diff --git a/src/health/notifications/awssns/README.md b/src/health/notifications/awssns/README.md index b5a4cc5f4..8bcaf045c 100644 --- a/src/health/notifications/awssns/README.md +++ b/src/health/notifications/awssns/README.md @@ -56,8 +56,8 @@ You can send notifications through Amazon SNS using Netdata's Agent alert notifi The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -124,7 +124,7 @@ All roles will default to this variable if left unconfigured. You can have different recipient Topics per **role**, by editing `DEFAULT_RECIPIENT_AWSSNS` with the Topic ARN you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_awssns[sysadmin]="arn:aws:sns:us-east-2:123456789012:Systems" role_recipients_awssns[domainadmin]="arn:aws:sns:us-east-2:123456789012:Domains" role_recipients_awssns[dba]="arn:aws:sns:us-east-2:123456789012:Databases" @@ -143,7 +143,7 @@ role_recipients_awssns[sitemgr]="arn:aws:sns:us-east-2:123456789012:Sites" An example working configuration would be: ```yaml -```conf +```text #------------------------------------------------------------------------------ # Amazon SNS notifications diff --git a/src/health/notifications/awssns/metadata.yaml b/src/health/notifications/awssns/metadata.yaml index 93389bad0..0eb704d4a 100644 --- a/src/health/notifications/awssns/metadata.yaml +++ b/src/health/notifications/awssns/metadata.yaml @@ -104,7 +104,7 @@ You can have different recipient Topics per **role**, by editing `DEFAULT_RECIPIENT_AWSSNS` with the Topic ARN you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_awssns[sysadmin]="arn:aws:sns:us-east-2:123456789012:Systems" role_recipients_awssns[domainadmin]="arn:aws:sns:us-east-2:123456789012:Domains" role_recipients_awssns[dba]="arn:aws:sns:us-east-2:123456789012:Databases" @@ -122,7 +122,7 @@ enabled: false description: 'An example working configuration would be:' config: | - ```conf + ```text #------------------------------------------------------------------------------ # Amazon SNS notifications diff --git a/src/health/notifications/custom/README.md b/src/health/notifications/custom/README.md index 785aec59d..ba20f1c9c 100644 --- a/src/health/notifications/custom/README.md +++ b/src/health/notifications/custom/README.md @@ -36,8 +36,8 @@ Netdata Agent's alert notification feature allows you to send custom notificatio The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata diff --git a/src/health/notifications/discord/README.md b/src/health/notifications/discord/README.md index 128e04a44..5d6cda8e4 100644 --- a/src/health/notifications/discord/README.md +++ b/src/health/notifications/discord/README.md @@ -38,8 +38,8 @@ Send notifications to Discord using Netdata's Agent alert notification feature, The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -61,7 +61,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. You can then have different channels per role, by editing `DEFAULT_RECIPIENT_DISCORD` with the channel you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_discord[sysadmin]="systems" role_recipients_discord[domainadmin]="domains" role_recipients_discord[dba]="databases systems" diff --git a/src/health/notifications/discord/metadata.yaml b/src/health/notifications/discord/metadata.yaml index a46a8ec98..f62a1fc08 100644 --- a/src/health/notifications/discord/metadata.yaml +++ b/src/health/notifications/discord/metadata.yaml @@ -45,7 +45,7 @@ detailed_description: | All roles will default to this variable if left unconfigured. You can then have different channels per role, by editing `DEFAULT_RECIPIENT_DISCORD` with the channel you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_discord[sysadmin]="systems" role_recipients_discord[domainadmin]="domains" role_recipients_discord[dba]="databases systems" diff --git a/src/health/notifications/dynatrace/README.md b/src/health/notifications/dynatrace/README.md index 6785cdb82..7764f893c 100644 --- a/src/health/notifications/dynatrace/README.md +++ b/src/health/notifications/dynatrace/README.md @@ -41,8 +41,8 @@ You can send notifications to Dynatrace using Netdata's Agent alert notification The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata diff --git a/src/health/notifications/email/README.md b/src/health/notifications/email/README.md index 1e831d58e..781ab7d53 100644 --- a/src/health/notifications/email/README.md +++ b/src/health/notifications/email/README.md @@ -37,8 +37,8 @@ Send notifications via Email using Netdata's Agent alert notification feature, w The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -60,7 +60,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_email[sysadmin]="systems@example.com" role_recipients_email[domainadmin]="domains@example.com" role_recipients_email[dba]="databases@example.com systems@example.com" diff --git a/src/health/notifications/email/metadata.yaml b/src/health/notifications/email/metadata.yaml index f0d4a62a9..cbef3ab27 100644 --- a/src/health/notifications/email/metadata.yaml +++ b/src/health/notifications/email/metadata.yaml @@ -44,7 +44,7 @@ detailed_description: | All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_email[sysadmin]="systems@example.com" role_recipients_email[domainadmin]="domains@example.com" role_recipients_email[dba]="databases@example.com systems@example.com" diff --git a/src/health/notifications/flock/README.md b/src/health/notifications/flock/README.md index 332ede832..5db467cd3 100644 --- a/src/health/notifications/flock/README.md +++ b/src/health/notifications/flock/README.md @@ -37,8 +37,8 @@ Send notifications to Flock using Netdata's Agent alert notification feature, wh The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -59,7 +59,7 @@ The following options can be defined for this notification ##### DEFAULT_RECIPIENT_FLOCK You can have different channels per role, by editing DEFAULT_RECIPIENT_FLOCK with the channel you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_flock[sysadmin]="systems" role_recipients_flock[domainadmin]="domains" role_recipients_flock[dba]="databases systems" diff --git a/src/health/notifications/flock/metadata.yaml b/src/health/notifications/flock/metadata.yaml index 62e7f4995..619c0a0a6 100644 --- a/src/health/notifications/flock/metadata.yaml +++ b/src/health/notifications/flock/metadata.yaml @@ -43,7 +43,7 @@ required: true detailed_description: | You can have different channels per role, by editing DEFAULT_RECIPIENT_FLOCK with the channel you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_flock[sysadmin]="systems" role_recipients_flock[domainadmin]="domains" role_recipients_flock[dba]="databases systems" diff --git a/src/health/notifications/gotify/README.md b/src/health/notifications/gotify/README.md index f0f8a7edb..7ddeda55b 100644 --- a/src/health/notifications/gotify/README.md +++ b/src/health/notifications/gotify/README.md @@ -38,8 +38,8 @@ You can send alerts to your Gotify instance using Netdata's Agent alert notifica The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata diff --git a/src/health/notifications/health_alarm_notify.conf b/src/health/notifications/health_alarm_notify.conf index 9dcec27ae..84d6d6225 100755 --- a/src/health/notifications/health_alarm_notify.conf +++ b/src/health/notifications/health_alarm_notify.conf @@ -845,6 +845,15 @@ NTFY_ACCESS_TOKEN="" DEFAULT_RECIPIENT_NTFY="" #------------------------------------------------------------------------------ +# ilert global notification options +SEND_ILERT="YES" + +# Api key +ILERT_ALERT_SOURCE_URL="" + +DEFAULT_RECIPIENT_ILERT="" + +#------------------------------------------------------------------------------ # custom notifications # @@ -984,6 +993,8 @@ custom_sender() { # role_recipients_ntfy[sysadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ilert[sysadmin]="${DEFAULT_RECIPIENT_ILERT}" + # ----------------------------------------------------------------------------- # DNS related alarms @@ -1041,6 +1052,8 @@ custom_sender() { # role_recipients_ntfy[domainadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ilert[domainadmin]="${DEFAULT_RECIPIENT_ILERT}" + # ----------------------------------------------------------------------------- # database servers alarms # mysql, redis, memcached, postgres, etc @@ -1099,6 +1112,8 @@ custom_sender() { # role_recipients_ntfy[dba]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ilert[dba]="databases ${DEFAULT_RECIPIENT_ILERT}" + # ----------------------------------------------------------------------------- # web servers alarms # apache, nginx, lighttpd, etc @@ -1157,6 +1172,8 @@ custom_sender() { # role_recipients_ntfy[webmaster]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ilert[webmaster]="${DEFAULT_RECIPIENT_ILERT}" + # ----------------------------------------------------------------------------- # proxy servers alarms # squid, etc @@ -1215,6 +1232,8 @@ custom_sender() { # role_recipients_ntfy[proxyadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ilert[proxyadmin]="${DEFAULT_RECIPIENT_ILERT}" + # ----------------------------------------------------------------------------- # peripheral devices # UPS, photovoltaics, etc @@ -1270,3 +1289,5 @@ custom_sender() { # role_recipients_gotify[sitemgr]="${DEFAULT_RECIPIENT_GOTIFY}" # role_recipients_ntfy[sitemgr]="${DEFAULT_RECIPIENT_NTFY}" + +# role_recipients_ilert[sitemgr]="${DEFAULT_RECIPIENT_ILERT}" diff --git a/src/health/notifications/ilert/README.md b/src/health/notifications/ilert/README.md new file mode 100644 index 000000000..6d6541ddd --- /dev/null +++ b/src/health/notifications/ilert/README.md @@ -0,0 +1,96 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/health/notifications/ilert/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/health/notifications/ilert/metadata.yaml" +sidebar_label: "ilert" +learn_status: "Published" +learn_rel_path: "Alerts & Notifications/Notifications/Agent Dispatched Notifications" +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE NOTIFICATION'S metadata.yaml FILE" +endmeta--> + +# ilert + + +<img src="https://netdata.cloud/img/ilert.svg" width="150"/> + + +ilert is an alerting and incident management tool. It helps teams reduce response times by enhancing monitoring and ticketing tools with reliable alerts, automatic escalations, on-call schedules, and features for incident response, communication, and status updates. +Sending notification to ilert via Netdata's Agent alert notification feature includes links, images and resolving of corresponding alerts. + + + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Setup + +### Prerequisites + +#### + +- A Netdata alert source in ilert. You can create a [Netdata alert source](https://docs.ilert.com/inbound-integrations/netdata) in [ilert](https://www.ilert.com/). +- Access to the terminal where Netdata Agent is running + + + +### Configuration + +#### File + +The configuration file name for this integration is `health_alarm_notify.conf`. + + +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config health_alarm_notify.conf +``` +#### Options + +The following options can be defined for this notification + +<details open><summary>Config Options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| SEND_ILERT | Set `SEND_ILERT` to YES | YES | yes | +| ILERT_ALERT_SOURCE_URL | Set `ILERT_ALERT_SOURCE_URL` to your Netdata alert source url in ilert. | | yes | + +</details> + +#### Examples + +##### Basic Configuration + + + +```yaml +SEND_ILERT="YES" +ILERT_ALERT_SOURCE_URL="https://api.ilert.com/api/v1/events/netdata/{API-KEY}" + +``` + + +## Troubleshooting + +### Test Notification + +You can run the following command by hand, to test alerts configuration: + +```bash +# become user netdata +sudo su -s /bin/bash netdata + +# enable debugging info on the console +export NETDATA_ALARM_NOTIFY_DEBUG=1 + +# send test alarms to sysadmin +/usr/libexec/netdata/plugins.d/alarm-notify.sh test + +# send test alarms to any role +/usr/libexec/netdata/plugins.d/alarm-notify.sh test "ROLE" +``` + +Note that this will test _all_ alert mechanisms for the selected role. + + diff --git a/src/health/notifications/ilert/metadata.yaml b/src/health/notifications/ilert/metadata.yaml new file mode 100644 index 000000000..7e2454834 --- /dev/null +++ b/src/health/notifications/ilert/metadata.yaml @@ -0,0 +1,55 @@ +# yamllint disable rule:line-length +--- +- id: "notify-ilert" + meta: + name: "ilert" + link: "https://www.ilert.com/" + categories: + - notify.agent + icon_filename: "ilert.svg" + keywords: + - ilert + overview: + notification_description: | + ilert is an alerting and incident management tool. It helps teams reduce response times by enhancing monitoring and ticketing tools with reliable alerts, automatic escalations, on-call schedules, and features for incident response, communication, and status updates. + Sending notification to ilert via Netdata's Agent alert notification feature includes links, images and resolving of corresponding alerts. + notification_limitations: "" + setup: + prerequisites: + list: + - title: "" + description: | + - A Netdata alert source in ilert. You can create a [Netdata alert source](https://docs.ilert.com/inbound-integrations/netdata) in [ilert](https://www.ilert.com/). + - Access to the terminal where Netdata Agent is running + configuration: + file: + name: "health_alarm_notify.conf" + options: + description: "The following options can be defined for this notification" + folding: + title: "Config Options" + enabled: true + list: + - name: "SEND_ILERT" + default_value: "YES" + description: "Set `SEND_ILERT` to YES" + required: true + - name: "ILERT_ALERT_SOURCE_URL" + default_value: "" + description: "Set `ILERT_ALERT_SOURCE_URL` to your Netdata alert source url in ilert." + required: true + examples: + folding: + enabled: true + title: "" + list: + - name: "Basic Configuration" + folding: + enabled: false + description: "" + config: | + SEND_ILERT="YES" + ILERT_ALERT_SOURCE_URL="https://api.ilert.com/api/v1/events/netdata/{API-KEY}" + troubleshooting: + problems: + list: [] diff --git a/src/health/notifications/irc/README.md b/src/health/notifications/irc/README.md index 76d3f5bc2..5674fb39d 100644 --- a/src/health/notifications/irc/README.md +++ b/src/health/notifications/irc/README.md @@ -37,8 +37,8 @@ Send notifications to IRC using Netdata's Agent alert notification feature, whic The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -76,7 +76,7 @@ nc="/usr/bin/nc" ##### DEFAULT_RECIPIENT_IRC The `DEFAULT_RECIPIENT_IRC` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_irc[sysadmin]="#systems" role_recipients_irc[domainadmin]="#domains" role_recipients_irc[dba]="#databases #systems" diff --git a/src/health/notifications/irc/metadata.yaml b/src/health/notifications/irc/metadata.yaml index aa2593f91..4a7585eef 100644 --- a/src/health/notifications/irc/metadata.yaml +++ b/src/health/notifications/irc/metadata.yaml @@ -69,7 +69,7 @@ required: true detailed_description: | The `DEFAULT_RECIPIENT_IRC` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_irc[sysadmin]="#systems" role_recipients_irc[domainadmin]="#domains" role_recipients_irc[dba]="#databases #systems" diff --git a/src/health/notifications/kavenegar/README.md b/src/health/notifications/kavenegar/README.md index eedd43a23..ff4479d6d 100644 --- a/src/health/notifications/kavenegar/README.md +++ b/src/health/notifications/kavenegar/README.md @@ -38,8 +38,8 @@ You can send notifications to Kavenegar using Netdata's Agent alert notification The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -63,7 +63,7 @@ The following options can be defined for this notification All roles will default to this variable if lest unconfigured. You can then have different SMS recipients per role, by editing `DEFAULT_RECIPIENT_KAVENEGAR` with the SMS recipients you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_kavenegar[sysadmin]="09100000000" role_recipients_kavenegar[domainadmin]="09111111111" role_recipients_kavenegar[dba]="0922222222" diff --git a/src/health/notifications/kavenegar/metadata.yaml b/src/health/notifications/kavenegar/metadata.yaml index 559dbac09..70c87b637 100644 --- a/src/health/notifications/kavenegar/metadata.yaml +++ b/src/health/notifications/kavenegar/metadata.yaml @@ -50,7 +50,7 @@ All roles will default to this variable if lest unconfigured. You can then have different SMS recipients per role, by editing `DEFAULT_RECIPIENT_KAVENEGAR` with the SMS recipients you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_kavenegar[sysadmin]="09100000000" role_recipients_kavenegar[domainadmin]="09111111111" role_recipients_kavenegar[dba]="0922222222" diff --git a/src/health/notifications/matrix/README.md b/src/health/notifications/matrix/README.md index 3c01a9ef2..c0387d211 100644 --- a/src/health/notifications/matrix/README.md +++ b/src/health/notifications/matrix/README.md @@ -39,8 +39,8 @@ Send notifications to Matrix network rooms using Netdata's Agent alert notificat The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -77,7 +77,7 @@ All roles will default to this variable if left unconfigured. You can have different Rooms per role, by editing `DEFAULT_RECIPIENT_MATRIX` with the `!roomid:homeservername` you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_matrix[sysadmin]="!roomid1:homeservername" role_recipients_matrix[domainadmin]="!roomid2:homeservername" role_recipients_matrix[dba]="!roomid3:homeservername" diff --git a/src/health/notifications/matrix/metadata.yaml b/src/health/notifications/matrix/metadata.yaml index db7f92eb1..770e0905a 100644 --- a/src/health/notifications/matrix/metadata.yaml +++ b/src/health/notifications/matrix/metadata.yaml @@ -61,7 +61,7 @@ You can have different Rooms per role, by editing `DEFAULT_RECIPIENT_MATRIX` with the `!roomid:homeservername` you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_matrix[sysadmin]="!roomid1:homeservername" role_recipients_matrix[domainadmin]="!roomid2:homeservername" role_recipients_matrix[dba]="!roomid3:homeservername" diff --git a/src/health/notifications/messagebird/README.md b/src/health/notifications/messagebird/README.md index 4b668fce3..d961a3b4d 100644 --- a/src/health/notifications/messagebird/README.md +++ b/src/health/notifications/messagebird/README.md @@ -37,8 +37,8 @@ Send notifications to MessageBird using Netdata's Agent alert notification featu The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -62,7 +62,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. You can then have different recipients per role, by editing `DEFAULT_RECIPIENT_MESSAGEBIRD` with the number you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_messagebird[sysadmin]="+15555555555" role_recipients_messagebird[domainadmin]="+15555555556" role_recipients_messagebird[dba]="+15555555557" diff --git a/src/health/notifications/messagebird/metadata.yaml b/src/health/notifications/messagebird/metadata.yaml index a97cdc712..3ba65ecf3 100644 --- a/src/health/notifications/messagebird/metadata.yaml +++ b/src/health/notifications/messagebird/metadata.yaml @@ -49,7 +49,7 @@ All roles will default to this variable if left unconfigured. You can then have different recipients per role, by editing `DEFAULT_RECIPIENT_MESSAGEBIRD` with the number you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_messagebird[sysadmin]="+15555555555" role_recipients_messagebird[domainadmin]="+15555555556" role_recipients_messagebird[dba]="+15555555557" diff --git a/src/health/notifications/msteams/README.md b/src/health/notifications/msteams/README.md index e24730777..91fe7a081 100644 --- a/src/health/notifications/msteams/README.md +++ b/src/health/notifications/msteams/README.md @@ -38,8 +38,8 @@ You can send Netdata alerts to Microsoft Teams using Netdata's Agent alert notif The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -64,7 +64,7 @@ In Microsoft Teams the channel name is encoded in the URI after `/IncomingWebhoo All roles will default to this variable if left unconfigured. You can have different channels per role, by editing `DEFAULT_RECIPIENT_MSTEAMS` with the channel you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_msteams[sysadmin]="CHANNEL1" role_recipients_msteams[domainadmin]="CHANNEL2" role_recipients_msteams[dba]="databases CHANNEL3" diff --git a/src/health/notifications/msteams/metadata.yaml b/src/health/notifications/msteams/metadata.yaml index 72de507a4..d37c08c0a 100644 --- a/src/health/notifications/msteams/metadata.yaml +++ b/src/health/notifications/msteams/metadata.yaml @@ -50,7 +50,7 @@ All roles will default to this variable if left unconfigured. You can have different channels per role, by editing `DEFAULT_RECIPIENT_MSTEAMS` with the channel you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_msteams[sysadmin]="CHANNEL1" role_recipients_msteams[domainadmin]="CHANNEL2" role_recipients_msteams[dba]="databases CHANNEL3" diff --git a/src/health/notifications/ntfy/README.md b/src/health/notifications/ntfy/README.md index a03e30304..39613338b 100644 --- a/src/health/notifications/ntfy/README.md +++ b/src/health/notifications/ntfy/README.md @@ -39,8 +39,8 @@ You can send alerts to an ntfy server using Netdata's Agent alert notification f The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -67,7 +67,7 @@ You can define multiple recipient URLs like this: `https://SERVER1/TOPIC1` `http All roles will default to this variable if left unconfigured. You can then have different servers and/or topics per role, by editing DEFAULT_RECIPIENT_NTFY with the server-topic combination you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_ntfy[sysadmin]="https://SERVER1/TOPIC1" role_recipients_ntfy[domainadmin]="https://SERVER2/TOPIC2" role_recipients_ntfy[dba]="https://SERVER3/TOPIC3" diff --git a/src/health/notifications/ntfy/metadata.yaml b/src/health/notifications/ntfy/metadata.yaml index 0d6c0beac..90ce3f6cd 100644 --- a/src/health/notifications/ntfy/metadata.yaml +++ b/src/health/notifications/ntfy/metadata.yaml @@ -45,7 +45,7 @@ All roles will default to this variable if left unconfigured. You can then have different servers and/or topics per role, by editing DEFAULT_RECIPIENT_NTFY with the server-topic combination you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_ntfy[sysadmin]="https://SERVER1/TOPIC1" role_recipients_ntfy[domainadmin]="https://SERVER2/TOPIC2" role_recipients_ntfy[dba]="https://SERVER3/TOPIC3" diff --git a/src/health/notifications/opsgenie/README.md b/src/health/notifications/opsgenie/README.md index fa5859d7d..2a4dc1fd8 100644 --- a/src/health/notifications/opsgenie/README.md +++ b/src/health/notifications/opsgenie/README.md @@ -38,8 +38,8 @@ You can send notifications to Opsgenie using Netdata's Agent alert notification The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata diff --git a/src/health/notifications/pagerduty/README.md b/src/health/notifications/pagerduty/README.md index ae45e5385..d85dd46c9 100644 --- a/src/health/notifications/pagerduty/README.md +++ b/src/health/notifications/pagerduty/README.md @@ -40,8 +40,8 @@ You can send notifications to PagerDuty using Netdata's Agent alert notification The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -63,7 +63,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PD` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_pd[sysadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxa" role_recipients_pd[domainadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxb" role_recipients_pd[dba]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxc" diff --git a/src/health/notifications/pagerduty/metadata.yaml b/src/health/notifications/pagerduty/metadata.yaml index 6fc1d640e..3973825fc 100644 --- a/src/health/notifications/pagerduty/metadata.yaml +++ b/src/health/notifications/pagerduty/metadata.yaml @@ -44,7 +44,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PD` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_pd[sysadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxa" role_recipients_pd[domainadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxb" role_recipients_pd[dba]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxc" diff --git a/src/health/notifications/prowl/README.md b/src/health/notifications/prowl/README.md index 0d206cee0..ba00b3212 100644 --- a/src/health/notifications/prowl/README.md +++ b/src/health/notifications/prowl/README.md @@ -43,8 +43,8 @@ Send notifications to Prowl using Netdata's Agent alert notification feature, wh The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -66,7 +66,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PROWL` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_prowl[sysadmin]="AAAAAAAA" role_recipients_prowl[domainadmin]="BBBBBBBBB" role_recipients_prowl[dba]="CCCCCCCCC" diff --git a/src/health/notifications/prowl/metadata.yaml b/src/health/notifications/prowl/metadata.yaml index b3f0e0a1e..3142d155c 100644 --- a/src/health/notifications/prowl/metadata.yaml +++ b/src/health/notifications/prowl/metadata.yaml @@ -43,7 +43,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PROWL` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_prowl[sysadmin]="AAAAAAAA" role_recipients_prowl[domainadmin]="BBBBBBBBB" role_recipients_prowl[dba]="CCCCCCCCC" diff --git a/src/health/notifications/pushbullet/README.md b/src/health/notifications/pushbullet/README.md index 1b30f4c97..0f22b5d54 100644 --- a/src/health/notifications/pushbullet/README.md +++ b/src/health/notifications/pushbullet/README.md @@ -37,8 +37,8 @@ Send notifications to Pushbullet using Netdata's Agent alert notification featur The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -63,7 +63,7 @@ You can define multiple entries like this: user1@email.com user2@email.com. All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PUSHBULLET` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_pushbullet[sysadmin]="user1@email.com" role_recipients_pushbullet[domainadmin]="user2@mail.com" role_recipients_pushbullet[dba]="#channel1" diff --git a/src/health/notifications/pushbullet/metadata.yaml b/src/health/notifications/pushbullet/metadata.yaml index 430033cca..03ee41233 100644 --- a/src/health/notifications/pushbullet/metadata.yaml +++ b/src/health/notifications/pushbullet/metadata.yaml @@ -47,7 +47,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PUSHBULLET` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_pushbullet[sysadmin]="user1@email.com" role_recipients_pushbullet[domainadmin]="user2@mail.com" role_recipients_pushbullet[dba]="#channel1" diff --git a/src/health/notifications/pushover/README.md b/src/health/notifications/pushover/README.md index 9d30dfa97..8a296691b 100644 --- a/src/health/notifications/pushover/README.md +++ b/src/health/notifications/pushover/README.md @@ -41,8 +41,8 @@ Send notification to Pushover using Netdata's Agent alert notification feature, The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -65,7 +65,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PUSHOVER` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_pushover[sysadmin]="USERTOKEN1" role_recipients_pushover[domainadmin]="USERTOKEN2" role_recipients_pushover[dba]="USERTOKEN3 USERTOKEN4" diff --git a/src/health/notifications/pushover/metadata.yaml b/src/health/notifications/pushover/metadata.yaml index 9af729ea8..e45f909b3 100644 --- a/src/health/notifications/pushover/metadata.yaml +++ b/src/health/notifications/pushover/metadata.yaml @@ -49,7 +49,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_PUSHOVER` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_pushover[sysadmin]="USERTOKEN1" role_recipients_pushover[domainadmin]="USERTOKEN2" role_recipients_pushover[dba]="USERTOKEN3 USERTOKEN4" diff --git a/src/health/notifications/rocketchat/README.md b/src/health/notifications/rocketchat/README.md index b9b0d5687..f23032bd0 100644 --- a/src/health/notifications/rocketchat/README.md +++ b/src/health/notifications/rocketchat/README.md @@ -38,8 +38,8 @@ Send notifications to Rocket.Chat using Netdata's Agent alert notification featu The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -62,7 +62,7 @@ The following options can be defined for this notification All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_ROCKETCHAT` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_rocketchat[sysadmin]="systems" role_recipients_rocketchat[domainadmin]="domains" role_recipients_rocketchat[dba]="databases systems" diff --git a/src/health/notifications/rocketchat/metadata.yaml b/src/health/notifications/rocketchat/metadata.yaml index f644b93e1..17ee37acb 100644 --- a/src/health/notifications/rocketchat/metadata.yaml +++ b/src/health/notifications/rocketchat/metadata.yaml @@ -46,7 +46,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_ROCKETCHAT` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_rocketchat[sysadmin]="systems" role_recipients_rocketchat[domainadmin]="domains" role_recipients_rocketchat[dba]="databases systems" diff --git a/src/health/notifications/slack/README.md b/src/health/notifications/slack/README.md index 35cb75a18..10fc707c9 100644 --- a/src/health/notifications/slack/README.md +++ b/src/health/notifications/slack/README.md @@ -38,8 +38,8 @@ Send notifications to a Slack workspace using Netdata's Agent alert notification The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata diff --git a/src/health/notifications/smstools3/README.md b/src/health/notifications/smstools3/README.md index dafc0b7f4..ee8eb6c8d 100644 --- a/src/health/notifications/smstools3/README.md +++ b/src/health/notifications/smstools3/README.md @@ -42,8 +42,8 @@ The SMS Server Tools 3 is a SMS Gateway software which can send and receive shor The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -74,7 +74,7 @@ sendsms="/usr/bin/sendsms" All roles will default to this variable if left unconfigured. You can then have different phone numbers per role, by editing `DEFAULT_RECIPIENT_SMS` with the phone number you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_sms[sysadmin]="PHONE1" role_recipients_sms[domainadmin]="PHONE2" role_recipients_sms[dba]="PHONE3" diff --git a/src/health/notifications/smstools3/metadata.yaml b/src/health/notifications/smstools3/metadata.yaml index 3a29183a5..e23e41c41 100644 --- a/src/health/notifications/smstools3/metadata.yaml +++ b/src/health/notifications/smstools3/metadata.yaml @@ -57,7 +57,7 @@ All roles will default to this variable if left unconfigured. You can then have different phone numbers per role, by editing `DEFAULT_RECIPIENT_SMS` with the phone number you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_sms[sysadmin]="PHONE1" role_recipients_sms[domainadmin]="PHONE2" role_recipients_sms[dba]="PHONE3" diff --git a/src/health/notifications/syslog/README.md b/src/health/notifications/syslog/README.md index 72534b1c8..0428533f5 100644 --- a/src/health/notifications/syslog/README.md +++ b/src/health/notifications/syslog/README.md @@ -37,8 +37,8 @@ Send notifications to Syslog using Netdata's Agent alert notification feature, w The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -78,7 +78,7 @@ All roles will default to this variable if left unconfigured. You can then have different recipients per role, by editing DEFAULT_RECIPIENT_SYSLOG with the recipient you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_syslog[sysadmin]="daemon.notice@loghost1:514/netdata" role_recipients_syslog[domainadmin]="daemon.notice@loghost2:514/netdata" role_recipients_syslog[dba]="daemon.notice@loghost3:514/netdata" diff --git a/src/health/notifications/syslog/metadata.yaml b/src/health/notifications/syslog/metadata.yaml index c5f241e76..2793b3ae2 100644 --- a/src/health/notifications/syslog/metadata.yaml +++ b/src/health/notifications/syslog/metadata.yaml @@ -59,7 +59,7 @@ detailed_description: | You can then have different recipients per role, by editing DEFAULT_RECIPIENT_SYSLOG with the recipient you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_syslog[sysadmin]="daemon.notice@loghost1:514/netdata" role_recipients_syslog[domainadmin]="daemon.notice@loghost2:514/netdata" role_recipients_syslog[dba]="daemon.notice@loghost3:514/netdata" diff --git a/src/health/notifications/telegram/README.md b/src/health/notifications/telegram/README.md index 90cca4214..f44cfcb9f 100644 --- a/src/health/notifications/telegram/README.md +++ b/src/health/notifications/telegram/README.md @@ -38,8 +38,8 @@ Send notifications to Telegram using Netdata's Agent alert notification feature, The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -63,7 +63,7 @@ All roles will default to this variable if left unconfigured. The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: -```conf +```text role_recipients_telegram[sysadmin]="-49999333324" role_recipients_telegram[domainadmin]="-49999333389" role_recipients_telegram[dba]="-10099992222" diff --git a/src/health/notifications/telegram/metadata.yaml b/src/health/notifications/telegram/metadata.yaml index daa45da72..7fd2f05b5 100644 --- a/src/health/notifications/telegram/metadata.yaml +++ b/src/health/notifications/telegram/metadata.yaml @@ -47,7 +47,7 @@ The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_telegram[sysadmin]="-49999333324" role_recipients_telegram[domainadmin]="-49999333389" role_recipients_telegram[dba]="-10099992222" diff --git a/src/health/notifications/twilio/README.md b/src/health/notifications/twilio/README.md index cd9b17e7f..18b9ffa2b 100644 --- a/src/health/notifications/twilio/README.md +++ b/src/health/notifications/twilio/README.md @@ -37,8 +37,8 @@ Send notifications to Twilio using Netdata's Agent alert notification feature, w The configuration file name for this integration is `health_alarm_notify.conf`. -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). +You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). ```bash cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata @@ -62,7 +62,7 @@ The following options can be defined for this notification You can then have different recipients per role, by editing DEFAULT_RECIPIENT_TWILIO with the recipient's number you want, in the following entries at the bottom of the same file: -```conf +```text role_recipients_twilio[sysadmin]="+15555555555" role_recipients_twilio[domainadmin]="+15555555556" role_recipients_twilio[dba]="+15555555557" diff --git a/src/health/notifications/twilio/metadata.yaml b/src/health/notifications/twilio/metadata.yaml index 35fc3f042..594936a1d 100644 --- a/src/health/notifications/twilio/metadata.yaml +++ b/src/health/notifications/twilio/metadata.yaml @@ -52,7 +52,7 @@ detailed_description: | You can then have different recipients per role, by editing DEFAULT_RECIPIENT_TWILIO with the recipient's number you want, in the following entries at the bottom of the same file: - ```conf + ```text role_recipients_twilio[sysadmin]="+15555555555" role_recipients_twilio[domainadmin]="+15555555556" role_recipients_twilio[dba]="+15555555557" diff --git a/src/health/notifications/web/README.md b/src/health/notifications/web/README.md index d7115be3d..baa0bfaaa 100644 --- a/src/health/notifications/web/README.md +++ b/src/health/notifications/web/README.md @@ -1,13 +1,3 @@ -<!-- -title: "Browser pop up agent alert notifications" -sidebar_label: "Browser pop ups" -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/health/notifications/web/README.md" -learn_status: "Published" -learn_topic_type: "Tasks" -learn_rel_path: "Integrations/Notify/Agent alert notifications" -learn_autogeneration_metadata: "{'part_of_cloud': False, 'part_of_agent': True}" ---> - # Browser pop up agent alert notifications The Netdata dashboard shows HTML notifications, when it is open. diff --git a/src/health/rrdcalc.c b/src/health/rrdcalc.c index bce709bf4..e5a26db07 100644 --- a/src/health/rrdcalc.c +++ b/src/health/rrdcalc.c @@ -80,7 +80,7 @@ uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint3 alarm_id = sql_get_alarm_id(host, chart, name, next_event_id); if (!alarm_id) { if (unlikely(!host->health_log.next_alarm_id)) - host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); + host->health_log.next_alarm_id = get_uint32_id(); alarm_id = host->health_log.next_alarm_id++; } } diff --git a/src/health/rrdvar.c b/src/health/rrdvar.c index 75cb9739b..5d6e3cf84 100644 --- a/src/health/rrdvar.c +++ b/src/health/rrdvar.c @@ -9,20 +9,6 @@ typedef struct rrdvar { // ---------------------------------------------------------------------------- // RRDVAR management -inline int rrdvar_fix_name(char *variable) { - int fixed = 0; - while(*variable) { - if (!isalnum((uint8_t)*variable) && *variable != '.' && *variable != '_') { - *variable++ = '_'; - fixed++; - } - else - variable++; - } - - return fixed; -} - inline STRING *rrdvar_name_to_string(const char *name) { char *variable = strdupz(name); rrdvar_fix_name(variable); diff --git a/src/health/rrdvar.h b/src/health/rrdvar.h index 3297984cb..f61b04b4a 100644 --- a/src/health/rrdvar.h +++ b/src/health/rrdvar.h @@ -7,8 +7,6 @@ #define RRDVAR_MAX_LENGTH 1024 -int rrdvar_fix_name(char *variable); - #include "database/rrd.h" STRING *rrdvar_name_to_string(const char *name); diff --git a/src/health/schema.d/health%3Aalert%3Aprototype.json b/src/health/schema.d/health%3Aalert%3Aprototype.json index 309d052de..9d0f1eafd 100644 --- a/src/health/schema.d/health%3Aalert%3Aprototype.json +++ b/src/health/schema.d/health%3Aalert%3Aprototype.json @@ -380,7 +380,7 @@ "classification": { "$ref": "#/definitions/configClassification" }, "value": { "$ref": "#/definitions/configValue" }, "conditions": { "$ref": "#/definitions/configConditions" }, - "actions": { "$ref": "#/definitions/configAction" } + "action": { "$ref": "#/definitions/configAction" } }, "required": [] }, |