summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-08-10 09:18:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-08-10 09:18:49 +0000
commitdd814a7c1a8de056a79f7238578b09236edd5506 (patch)
tree429e7eed5a634a4efe9a6877ce66da8e64aa1782 /health
parentAdding upstream version 1.41.0. (diff)
downloadnetdata-f2412a44d03144f2b402591befe2dbc00e842029.tar.xz
netdata-f2412a44d03144f2b402591befe2dbc00e842029.zip
Adding upstream version 1.42.0.upstream/1.42.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'health')
-rw-r--r--health/REFERENCE.md64
-rw-r--r--health/health.c30
-rw-r--r--health/health.d/bcache.conf4
-rw-r--r--health/health.d/btrfs.conf9
-rw-r--r--health/health.d/cgroups.conf155
-rw-r--r--health/health.d/cpu.conf10
-rw-r--r--health/health.d/disks.conf87
-rw-r--r--health/health.d/file_descriptors.conf10
-rw-r--r--health/health.d/ioping.conf2
-rw-r--r--health/health.d/ipmi.conf2
-rw-r--r--health/health.d/linux_power_supply.conf2
-rw-r--r--health/health.d/load.conf6
-rw-r--r--health/health.d/mdstat.conf16
-rw-r--r--health/health.d/memory.conf63
-rw-r--r--health/health.d/net.conf20
-rw-r--r--health/health.d/qos.conf21
-rw-r--r--health/health.d/ram.conf7
-rw-r--r--health/health.d/softnet.conf4
-rw-r--r--health/health.d/swap.conf6
-rw-r--r--health/health.d/synchronization.conf2
-rw-r--r--health/health.d/systemdunits.conf10
-rw-r--r--health/health.d/tcp_listen.conf8
-rw-r--r--health/health.d/tcp_mem.conf2
-rw-r--r--health/health.d/tcp_orphans.conf2
-rw-r--r--health/health.d/tcp_resets.conf4
-rw-r--r--health/health.d/udp_errors.conf4
-rw-r--r--health/health.d/windows.conf10
-rw-r--r--health/health.d/zfs.conf2
-rw-r--r--health/health.h1
-rw-r--r--health/health_log.c2
-rw-r--r--health/notifications/alerta/metadata.yaml90
-rw-r--r--health/notifications/awssns/metadata.yaml137
-rw-r--r--health/notifications/custom/metadata.yaml169
-rw-r--r--health/notifications/discord/metadata.yaml76
-rw-r--r--health/notifications/dynatrace/metadata.yaml92
-rw-r--r--health/notifications/email/metadata.yaml73
-rw-r--r--health/notifications/flock/metadata.yaml72
-rw-r--r--health/notifications/gotify/metadata.yaml60
-rw-r--r--health/notifications/irc/metadata.yaml100
-rw-r--r--health/notifications/kavenegar/metadata.yaml82
-rw-r--r--health/notifications/matrix/metadata.yaml91
-rw-r--r--health/notifications/messagebird/metadata.yaml79
-rw-r--r--health/notifications/msteams/metadata.yaml79
-rw-r--r--health/notifications/ntfy/metadata.yaml70
-rw-r--r--health/notifications/opsgenie/metadata.yaml60
-rw-r--r--health/notifications/pagerduty/metadata.yaml73
-rw-r--r--health/notifications/prowl/metadata.yaml71
-rw-r--r--health/notifications/pushbullet/metadata.yaml76
-rw-r--r--health/notifications/pushover/metadata.yaml78
-rw-r--r--health/notifications/rocketchat/metadata.yaml75
-rw-r--r--health/notifications/sample-metadata.yaml39
-rw-r--r--health/notifications/slack/metadata.yaml63
-rw-r--r--health/notifications/smstools3/metadata.yaml84
-rw-r--r--health/notifications/syslog/metadata.yaml88
-rw-r--r--health/notifications/telegram/metadata.yaml77
-rw-r--r--health/notifications/twilio/metadata.yaml83
56 files changed, 2439 insertions, 263 deletions
diff --git a/health/REFERENCE.md b/health/REFERENCE.md
index a36edd8c..e5179b4e 100644
--- a/health/REFERENCE.md
+++ b/health/REFERENCE.md
@@ -485,7 +485,7 @@ The full [database query API](https://github.com/netdata/netdata/blob/master/web
`,` or `|` instead of spaces)_ and the `match-ids` and `match-names` options affect the searches
for dimensions.
-- `foreach DIMENSIONS` is optional, will always be the last parameter, and uses the same `,`/`|`
+- `foreach DIMENSIONS` is optional and works only with [templates](#alarm-line-alarm-or-template), will always be the last parameter, and uses the same `,`/`|`
rules as the `of` parameter. Each dimension you specify in `foreach` will use the same rule
to trigger an alarm. If you set both `of` and `foreach`, Netdata will ignore the `of` parameter
and replace it with one of the dimensions you gave to `foreach`. This option allows you to
@@ -1068,18 +1068,18 @@ alarm to it.
Check if user or system dimension is using more than 50% of cpu:
```yaml
- alarm: dim_template
- on: system.cpu
- os: linux
-lookup: average -3s percentage foreach system,user
- units: %
- every: 10s
- warn: $this > 50
- crit: $this > 80
+template: cpu_template
+ on: system.cpu
+ os: linux
+ lookup: average -1m foreach system,user
+ units: %
+ every: 10s
+ warn: $this > 50
+ crit: $this > 80
```
-The `lookup` line will calculate the average CPU usage from system and user in the last 3 seconds. Because we have
-the foreach in the `lookup` line, Netdata will create two independent alarms called `dim_template_system`
+The `lookup` line will calculate the average CPU usage from system and user over the last minute. Because we have
+the foreach in the `lookup` line, Netdata will create two independent alarms called `cpu_template_system`
and `dim_template_user` that will have all the other parameters shared among them.
### Example 6 - CPU usage
@@ -1087,17 +1087,17 @@ and `dim_template_user` that will have all the other parameters shared among the
Check if all dimensions are using more than 50% of cpu:
```yaml
- alarm: dim_template
- on: system.cpu
- os: linux
-lookup: average -3s percentage foreach *
- units: %
- every: 10s
- warn: $this > 50
- crit: $this > 80
+template: cpu_template
+ on: system.cpu
+ os: linux
+ lookup: average -1m foreach *
+ units: %
+ every: 10s
+ warn: $this > 50
+ crit: $this > 80
```
-The `lookup` line will calculate the average of CPU usage from system and user in the last 3 seconds. In this case
+The `lookup` line will calculate the average of CPU usage from system and user over the last minute. In this case
Netdata will create alarms for all dimensions of the chart.
### Example 7 - Z-Score based alarm
@@ -1199,6 +1199,8 @@ Dimension templates can condense many individual entities into one—no more cop
### The fundamentals of `foreach`
+> **Note**: works only with [templates](#alarm-line-alarm-or-template).
+
Our dimension templates update creates a new `foreach` parameter to the
existing [`lookup` line](#alarm-line-lookup). This
is where the magic happens.
@@ -1224,38 +1226,38 @@ Before dimension templates, you would need the following three entities:
```yaml
alarm: cpu_system
on: system.cpu
-lookup: average -10m percentage of system
+lookup: average -10m of system
every: 1m
warn: $this > 50
crit: $this > 80
alarm: cpu_user
on: system.cpu
-lookup: average -10m percentage of user
+lookup: average -10m of user
every: 1m
warn: $this > 50
crit: $this > 80
alarm: cpu_nice
on: system.cpu
-lookup: average -10m percentage of nice
+lookup: average -10m of nice
every: 1m
warn: $this > 50
crit: $this > 80
```
-With dimension templates, you can condense these into a single alarm. Take note of the `alarm` and `lookup` lines.
+With dimension templates, you can condense these into a single template. Take note of the `alarm` and `lookup` lines.
```yaml
- alarm: cpu_template
- on: system.cpu
-lookup: average -10m percentage foreach system,user,nice
- every: 1m
- warn: $this > 50
- crit: $this > 80
+template: cpu_template
+ on: system.cpu
+ lookup: average -10m foreach system,user,nice
+ every: 1m
+ warn: $this > 50
+ crit: $this > 80
```
-The `alarm` line specifies the naming scheme Netdata will use. You can use whatever naming scheme you'd like, with `.`
+The `template` line specifies the naming scheme Netdata will use. You can use whatever naming scheme you'd like, with `.`
and `_` being the only allowed symbols.
The `lookup` line has changed from `of` to `foreach`, and we're now passing three dimensions.
diff --git a/health/health.c b/health/health.c
index eeed3a67..d94c3316 100644
--- a/health/health.c
+++ b/health/health.c
@@ -433,14 +433,14 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
// do not send notifications for internal statuses
- netdata_log_debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
goto done;
}
if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && (ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
// do not send notifications for disabled statuses
- netdata_log_debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
- netdata_log_health("[%s]: Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", rrdhost_hostname(host), ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_health("[%s]: Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)", rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
// mark it as run, so that we will send the same alarm if it happens again
goto done;
@@ -457,9 +457,9 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
// we have executed this alarm notification in the past
if(last_executed_status == ae->new_status && !(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING)) {
// don't send the notification for the same status again
- netdata_log_debug(D_HEALTH, "Health not sending again notification for alarm '%s.%s' status %s", ae_chart_name(ae), ae_name(ae)
+ netdata_log_debug(D_HEALTH, "Health not sending again notification for alarm '%s.%s' status %s", ae_chart_id(ae), ae_name(ae)
, rrdcalc_status2string(ae->new_status));
- netdata_log_health("[%s]: Health not sending again notification for alarm '%s.%s' status %s", rrdhost_hostname(host), ae_chart_name(ae), ae_name(ae)
+ netdata_log_health("[%s]: Health not sending again notification for alarm '%s.%s' status %s", rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae)
, rrdcalc_status2string(ae->new_status));
goto done;
}
@@ -470,7 +470,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
if(unlikely(ae->new_status == RRDCALC_STATUS_CLEAR)) {
if((!(ae->flags & HEALTH_ENTRY_RUN_ONCE)) || (ae->flags & HEALTH_ENTRY_RUN_ONCE && ae->old_status < RRDCALC_STATUS_RAISED) ) {
netdata_log_debug(D_HEALTH, "Health not sending notification for first initialization of alarm '%s.%s' status %s"
- , ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ , ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
goto done;
}
}
@@ -479,11 +479,11 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
// Check if alarm notifications are silenced
if (ae->flags & HEALTH_ENTRY_FLAG_SILENCED) {
- netdata_log_health("[%s]: Health not sending notification for alarm '%s.%s' status %s (command API has disabled notifications)", rrdhost_hostname(host), ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_health("[%s]: Health not sending notification for alarm '%s.%s' status %s (command API has disabled notifications)", rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
goto done;
}
- netdata_log_health("[%s]: Sending notification for alarm '%s.%s' status %s.", rrdhost_hostname(host), ae_chart_name(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_health("[%s]: Sending notification for alarm '%s.%s' status %s.", rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
const char *exec = (ae->exec) ? ae_exec(ae) : string2str(host->health.health_default_exec);
const char *recipient = (ae->recipient) ? ae_recipient(ae) : string2str(host->health.health_default_recipient);
@@ -562,7 +562,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
ae->alarm_event_id,
(unsigned long)ae->when,
ae_name(ae),
- ae->chart?ae_chart_name(ae):"NOCHART",
+ ae->chart?ae_chart_id(ae):"NOCHART",
ae->family?ae_family(ae):"NOFAMILY",
rrdcalc_status2string(ae->new_status),
rrdcalc_status2string(ae->old_status),
@@ -627,7 +627,7 @@ static inline void health_alarm_wait_for_execution(ALARM_ENTRY *ae) {
static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) {
netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' = " NETDATA_DOUBLE_FORMAT_AUTO " - changed status from %s to %s",
- ae->chart?ae_chart_name(ae):"NOCHART", ae_name(ae),
+ ae->chart?ae_chart_id(ae):"NOCHART", ae_name(ae),
ae->new_value,
rrdcalc_status2string(ae->old_status),
rrdcalc_status2string(ae->new_status)
@@ -844,8 +844,9 @@ static void initialize_health(RRDHOST *host)
host->health.health_default_exec = string_strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
host->health.health_default_recipient = string_strdupz("root");
- // TODO: This needs to go to the metadata thread
- // Health should wait before accessing the table (needs to be created by the metadata thread)
+ //if (!is_chart_name_populated(&host->host_uuid))
+ // chart_name_populate(&host->host_uuid);
+
sql_health_alarm_log_load(host);
// ------------------------------------------------------------------------
@@ -1156,6 +1157,7 @@ void *health_main(void *ptr) {
rc->name,
rc->rrdset->id,
rc->rrdset->context,
+ rc->rrdset->name,
rc->rrdset->family,
rc->classification,
rc->component,
@@ -1422,6 +1424,7 @@ void *health_main(void *ptr) {
rc->name,
rc->rrdset->id,
rc->rrdset->context,
+ rc->rrdset->name,
rc->rrdset->family,
rc->classification,
rc->component,
@@ -1446,7 +1449,7 @@ void *health_main(void *ptr) {
health_alarm_log_add_entry(host, ae);
- netdata_log_health("[%s]: Alert event for [%s.%s], value [%s], status [%s].", rrdhost_hostname(host), ae_chart_name(ae), ae_name(ae), ae_new_value_string(ae), rrdcalc_status2string(ae->new_status));
+ netdata_log_health("[%s]: Alert event for [%s.%s], value [%s], status [%s].", rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), ae_new_value_string(ae), rrdcalc_status2string(ae->new_status));
rc->last_status_change_value = rc->value;
rc->last_status_change = now;
@@ -1508,6 +1511,7 @@ void *health_main(void *ptr) {
rc->name,
rc->rrdset->id,
rc->rrdset->context,
+ rc->rrdset->name,
rc->rrdset->family,
rc->classification,
rc->component,
diff --git a/health/health.d/bcache.conf b/health/health.d/bcache.conf
index 3f92e80d..8492bb6c 100644
--- a/health/health.d/bcache.conf
+++ b/health/health.d/bcache.conf
@@ -12,7 +12,7 @@ component: Disk
info: number of times data was read from the cache, \
the bucket was reused and invalidated in the last 10 minutes \
(when this occurs the data is reread from the backing device)
- to: sysadmin
+ to: silent
template: bcache_cache_dirty
on: disk.bcache_cache_alloc
@@ -26,4 +26,4 @@ component: Disk
delay: up 1m down 1h multiplier 1.5 max 2h
info: percentage of cache space used for dirty data and metadata \
(this usually means your SSD cache is too small)
- to: sysadmin
+ to: silent
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
index 97b7a3a9..b2a50682 100644
--- a/health/health.d/btrfs.conf
+++ b/health/health.d/btrfs.conf
@@ -9,11 +9,10 @@ component: File system
calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? (90) : (95))
- crit: $this > (($status == $CRITICAL) ? (95) : (98))
+ warn: $this > (($status == $CRITICAL) ? (95) : (98))
delay: up 1m down 15m multiplier 1.5 max 1h
info: percentage of allocated BTRFS physical disk space
- to: sysadmin
+ to: silent
template: btrfs_data
on: btrfs.data
@@ -86,7 +85,7 @@ component: File system
hosts: *
units: errors
lookup: max -10m every 1m of write_errs
- warn: $this > 0
+ crit: $this > 0
delay: up 1m down 15m multiplier 1.5 max 1h
info: number of encountered BTRFS write errors
to: sysadmin
@@ -100,7 +99,7 @@ component: File system
hosts: *
units: errors
lookup: max -10m every 1m of flush_errs
- warn: $this > 0
+ crit: $this > 0
delay: up 1m down 15m multiplier 1.5 max 1h
info: number of encountered BTRFS flush errors
to: sysadmin
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
index f625e545..53a6ea00 100644
--- a/health/health.d/cgroups.conf
+++ b/health/health.d/cgroups.conf
@@ -11,11 +11,10 @@ component: CPU
lookup: average -10m unaligned
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (75) : (85))
- crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ warn: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average cgroup CPU utilization over the last 10 minutes
- to: sysadmin
+ to: silent
template: cgroup_ram_in_use
on: cgroup.mem_usage
@@ -31,44 +30,45 @@ component: Memory
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
info: cgroup memory utilization
- to: sysadmin
-
-# -----------------------------------------------------------------------------
-# check for packet storms
-
-# 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
-# 2. do the same for the last 10s
-# 3. raise an alarm if the later is 10x or 20x the first
-# we assume the minimum packet storm should at least have
-# 10000 packets/s, average of the last 10 seconds
-
- template: cgroup_1m_received_packets_rate
- on: cgroup.net_packets
- class: Workload
- type: Cgroups
-component: Network
- hosts: *
- lookup: average -1m unaligned of received
- units: packets
- every: 10s
- info: average number of packets received by the network interface ${label:device} over the last minute
-
- template: cgroup_10s_received_packets_storm
- on: cgroup.net_packets
- class: Workload
- type: Cgroups
-component: Network
- hosts: *
- lookup: average -10s unaligned of received
- calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
- every: 10s
- units: %
- warn: $this > (($status >= $WARNING)?(200):(5000))
- options: no-clear-notification
- info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
- compared to the rate over the last minute
- to: sysadmin
+ to: silent
+# FIXME COMMENTED DUE TO A BUG IN NETDATA
+## -----------------------------------------------------------------------------
+## check for packet storms
+#
+## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
+## 2. do the same for the last 10s
+## 3. raise an alarm if the later is 10x or 20x the first
+## we assume the minimum packet storm should at least have
+## 10000 packets/s, average of the last 10 seconds
+#
+# template: cgroup_1m_received_packets_rate
+# on: cgroup.net_packets
+# class: Workload
+# type: Cgroups
+#component: Network
+# hosts: *
+# lookup: average -1m unaligned of received
+# units: packets
+# every: 10s
+# info: average number of packets received by the network interface ${label:device} over the last minute
+#
+# template: cgroup_10s_received_packets_storm
+# on: cgroup.net_packets
+# class: Workload
+# type: Cgroups
+#component: Network
+# hosts: *
+# lookup: average -10s unaligned of received
+# calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
+# every: 10s
+# units: %
+# warn: $this > (($status >= $WARNING)?(200):(5000))
+# options: no-clear-notification
+# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
+# compared to the rate over the last minute
+# to: sysadmin
+#
# ---------------------------------K8s containers--------------------------------------------
template: k8s_cgroup_10min_cpu_usage
@@ -83,8 +83,9 @@ component: CPU
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
delay: down 15m multiplier 1.5 max 1h
- info: average cgroup CPU utilization over the last 10 minutes
- to: sysadmin
+ info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+ average CPU utilization over the last 10 minutes
+ to: silent
template: k8s_cgroup_ram_in_use
on: k8s.cgroup.mem_usage
@@ -99,40 +100,42 @@ component: Memory
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
- info: cgroup memory utilization
- to: sysadmin
+ info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+ memory utilization
+ to: silent
# check for packet storms
-# 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
-# 2. do the same for the last 10s
-# 3. raise an alarm if the later is 10x or 20x the first
-# we assume the minimum packet storm should at least have
-# 10000 packets/s, average of the last 10 seconds
-
- template: k8s_cgroup_1m_received_packets_rate
- on: k8s.cgroup.net_packets
- class: Workload
- type: Cgroups
-component: Network
- hosts: *
- lookup: average -1m unaligned of received
- units: packets
- every: 10s
- info: average number of packets received by the network interface ${label:device} over the last minute
-
- template: k8s_cgroup_10s_received_packets_storm
- on: k8s.cgroup.net_packets
- class: Workload
- type: Cgroups
-component: Network
- hosts: *
- lookup: average -10s unaligned of received
- calc: $this * 100 / (($k8s_cgroup_10s_received_packets_storm < 1000)?(1000):($k8s_cgroup_10s_received_packets_storm))
- every: 10s
- units: %
- warn: $this > (($status >= $WARNING)?(200):(5000))
- options: no-clear-notification
- info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
- compared to the rate over the last minute
- to: sysadmin
+# FIXME COMMENTED DUE TO A BUG IN NETDATA
+## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
+## 2. do the same for the last 10s
+## 3. raise an alarm if the later is 10x or 20x the first
+## we assume the minimum packet storm should at least have
+## 10000 packets/s, average of the last 10 seconds
+#
+# template: k8s_cgroup_1m_received_packets_rate
+# on: k8s.cgroup.net_packets
+# class: Workload
+# type: Cgroups
+#component: Network
+# hosts: *
+# lookup: average -1m unaligned of received
+# units: packets
+# every: 10s
+# info: average number of packets received by the network interface ${label:device} over the last minute
+#
+# template: k8s_cgroup_10s_received_packets_storm
+# on: k8s.cgroup.net_packets
+# class: Workload
+# type: Cgroups
+#component: Network
+# hosts: *
+# lookup: average -10s unaligned of received
+# calc: $this * 100 / (($k8s_cgroup_10s_received_packets_storm < 1000)?(1000):($k8s_cgroup_10s_received_packets_storm))
+# every: 10s
+# units: %
+# warn: $this > (($status >= $WARNING)?(200):(5000))
+# options: no-clear-notification
+# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
+# compared to the rate over the last minute
+# to: sysadmin
diff --git a/health/health.d/cpu.conf b/health/health.d/cpu.conf
index 907d6ff8..4de5edd7 100644
--- a/health/health.d/cpu.conf
+++ b/health/health.d/cpu.conf
@@ -15,7 +15,7 @@ component: CPU
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
- to: sysadmin
+ to: silent
template: 10min_cpu_iowait
on: system.cpu
@@ -28,9 +28,9 @@ component: CPU
units: %
every: 1m
warn: $this > (($status >= $WARNING) ? (20) : (40))
- delay: down 15m multiplier 1.5 max 1h
+ delay: up 30m down 30m multiplier 1.5 max 2h
info: average CPU iowait time over the last 10 minutes
- to: sysadmin
+ to: silent
template: 20min_steal_cpu
on: system.cpu
@@ -45,7 +45,7 @@ component: CPU
warn: $this > (($status >= $WARNING) ? (5) : (10))
delay: down 1h multiplier 1.5 max 2h
info: average CPU steal time over the last 20 minutes
- to: sysadmin
+ to: silent
## FreeBSD
template: 10min_cpu_usage
@@ -62,4 +62,4 @@ component: CPU
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average CPU utilization over the last 10 minutes (excluding nice)
- to: sysadmin
+ to: silent
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index 7bd4f120..27f5d669 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -21,7 +21,7 @@ chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
units: %
every: 1m
warn: $this > (($status >= $WARNING ) ? (80) : (90))
- crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
delay: up 1m down 15m multiplier 1.5 max 1h
info: disk ${label:mount_point} space utilization
to: sysadmin
@@ -55,33 +55,32 @@ chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
# we will use it in the next template to find
# the hours remaining
-# template: disk_fill_rate
-# on: disk.space
-# os: linux freebsd
-# hosts: *
-# lookup: min -10m at -50m unaligned of avail
-# calc: ($this - $avail) / (($now - $after) / 3600)
-# every: 1m
-# units: GB/hour
-# info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
-
+template: disk_fill_rate
+ on: disk.space
+ os: linux freebsd
+ hosts: *
+ lookup: min -10m at -50m unaligned of avail
+ calc: ($this - $avail) / (($now - $after) / 3600)
+ every: 1m
+ units: GB/hour
+ info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
# calculate the hours remaining
# if the disk continues to fill
# in this rate
-# template: out_of_disk_space_time
-# on: disk.space
-# os: linux freebsd
-# hosts: *
-# calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
-# units: hours
-# every: 10s
-# warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
-# crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-# delay: down 15m multiplier 1.2 max 1h
-# info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
-# to: sysadmin
+template: out_of_disk_space_time
+ on: disk.space
+ os: linux freebsd
+ hosts: *
+ calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
+ units: hours
+ every: 10s
+ warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
+ crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+ delay: down 15m multiplier 1.2 max 1h
+ info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
+ to: silent
# -----------------------------------------------------------------------------
@@ -95,32 +94,32 @@ chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
# we will use it in the next template to find
# the hours remaining
-# template: disk_inode_rate
-# on: disk.inodes
-# os: linux freebsd
-# hosts: *
-# lookup: min -10m at -50m unaligned of avail
-# calc: ($this - $avail) / (($now - $after) / 3600)
-# every: 1m
-# units: inodes/hour
-# info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
+template: disk_inode_rate
+ on: disk.inodes
+ os: linux freebsd
+ hosts: *
+ lookup: min -10m at -50m unaligned of avail
+ calc: ($this - $avail) / (($now - $after) / 3600)
+ every: 1m
+ units: inodes/hour
+ info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
# calculate the hours remaining
# if the disk inodes are allocated
# in this rate
-# template: out_of_disk_inodes_time
-# on: disk.inodes
-# os: linux freebsd
-# hosts: *
-# calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
-# units: hours
-# every: 10s
-# warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
-# crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-# delay: down 15m multiplier 1.2 max 1h
-# info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
-# to: sysadmin
+template: out_of_disk_inodes_time
+ on: disk.inodes
+ os: linux freebsd
+ hosts: *
+ calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
+ units: hours
+ every: 10s
+ warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
+ crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+ delay: down 15m multiplier 1.2 max 1h
+ info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
+ to: silent
# -----------------------------------------------------------------------------
diff --git a/health/health.d/file_descriptors.conf b/health/health.d/file_descriptors.conf
index d136ea51..60bb8d38 100644
--- a/health/health.d/file_descriptors.conf
+++ b/health/health.d/file_descriptors.conf
@@ -20,12 +20,12 @@
type: System
component: Process
os: linux
- module: !* *
+ module: *
hosts: *
- lookup: max -1m unaligned foreach *
+ lookup: max -10s unaligned foreach *
units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (85) : (90))
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
- info: maximum utilization of open files among all application group PIDs
+ info: open files percentage against the processes limits, among all PIDs in application group
to: sysadmin
diff --git a/health/health.d/ioping.conf b/health/health.d/ioping.conf
index 47ac4453..5fd785b8 100644
--- a/health/health.d/ioping.conf
+++ b/health/health.d/ioping.conf
@@ -10,4 +10,4 @@ component: Disk
warn: $this > $green
delay: down 30m multiplier 1.5 max 2h
info: average I/O latency over the last 10 seconds
- to: sysadmin
+ to: silent
diff --git a/health/health.d/ipmi.conf b/health/health.d/ipmi.conf
index 4d6478cc..1775783d 100644
--- a/health/health.d/ipmi.conf
+++ b/health/health.d/ipmi.conf
@@ -23,4 +23,4 @@ component: IPMI
warn: $this > 0
delay: up 5m down 15m multiplier 1.5 max 1h
info: number of events in the IPMI System Event Log (SEL)
- to: sysadmin
+ to: silent
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
index 4562122c..71a5be28 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/health/health.d/linux_power_supply.conf
@@ -11,4 +11,4 @@ component: Battery
warn: $this < 10
delay: up 30s down 5m multiplier 1.2 max 1h
info: percentage of remaining power supply capacity
- to: sysadmin
+ to: silent
diff --git a/health/health.d/load.conf b/health/health.d/load.conf
index 75989c57..20f6781c 100644
--- a/health/health.d/load.conf
+++ b/health/health.d/load.conf
@@ -34,7 +34,7 @@ component: Load
warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 175 : 200)
delay: down 15m multiplier 1.5 max 1h
info: system fifteen-minute load average
- to: sysadmin
+ to: silent
alarm: load_average_5
on: system.load
@@ -50,7 +50,7 @@ component: Load
warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 350 : 400)
delay: down 15m multiplier 1.5 max 1h
info: system five-minute load average
- to: sysadmin
+ to: silent
alarm: load_average_1
on: system.load
@@ -66,4 +66,4 @@ component: Load
warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 700 : 800)
delay: down 15m multiplier 1.5 max 1h
info: system one-minute load average
- to: sysadmin
+ to: silent
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index b90455a5..4dc0bf20 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -1,15 +1,3 @@
- template: mdstat_last_collected
- on: md.disks
- class: Latency
- type: System
-component: RAID
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- info: number of seconds since the last successful data collection
- to: sysadmin
template: mdstat_disks
on: md.disks
@@ -19,7 +7,7 @@ component: RAID
units: failed devices
every: 10s
calc: $down
- crit: $this > 0
+ warn: $this > 0
info: number of devices in the down state for the ${label:device} ${label:raid_level} array. \
Any number > 0 indicates that the array is degraded.
to: sysadmin
@@ -36,7 +24,7 @@ chart labels: raid_level=!raid1 !raid10 *
warn: $this > 1024
delay: up 30m
info: number of unsynchronized blocks for the ${label:device} ${label:raid_level} array
- to: sysadmin
+ to: silent
template: mdstat_nonredundant_last_collected
on: md.nonredundant
diff --git a/health/health.d/memory.conf b/health/health.d/memory.conf
index 010cbbd7..8badf09c 100644
--- a/health/health.d/memory.conf
+++ b/health/health.d/memory.conf
@@ -1,47 +1,80 @@
-
# you can disable an alarm notification by setting the 'to' line to: silent
- alarm: 1hour_ecc_memory_correctable
- on: mem.ecc_ce
+ alarm: 1hour_memory_hw_corrupted
+ on: mem.hwcorrupt
class: Errors
type: System
component: Memory
os: linux
hosts: *
- lookup: sum -10m unaligned
+ calc: $HardwareCorrupted
+ units: MB
+ every: 10s
+ warn: $this > 0
+ delay: down 1h multiplier 1.5 max 1h
+ info: amount of memory corrupted due to a hardware failure
+ to: sysadmin
+
+## ECC Controller
+
+ template: ecc_memory_mc_correctable
+ on: mem.edac_mc
+ class: Errors
+ type: System
+component: Memory
+ os: linux
+ hosts: *
+ lookup: sum -10m unaligned of correctable, correctable_noinfo
units: errors
every: 1m
warn: $this > 0
delay: down 1h multiplier 1.5 max 1h
- info: number of ECC correctable errors in the last 10 minutes
+ info: memory controller ${label:controller} ECC correctable errors in the last 10 minutes
to: sysadmin
- alarm: 1hour_ecc_memory_uncorrectable
- on: mem.ecc_ue
+ template: ecc_memory_mc_uncorrectable
+ on: mem.edac_mc
class: Errors
type: System
component: Memory
os: linux
hosts: *
- lookup: sum -10m unaligned
+ lookup: sum -10m unaligned of uncorrectable,uncorrectable_noinfo
units: errors
every: 1m
crit: $this > 0
delay: down 1h multiplier 1.5 max 1h
- info: number of ECC uncorrectable errors in the last 10 minutes
+ info: memory controller ${label:controller} ECC uncorrectable errors in the last 10 minutes
to: sysadmin
- alarm: 1hour_memory_hw_corrupted
- on: mem.hwcorrupt
+## ECC DIMM
+
+ template: ecc_memory_dimm_correctable
+ on: mem.edac_mc_dimm
class: Errors
type: System
component: Memory
os: linux
hosts: *
- calc: $HardwareCorrupted
- units: MB
- every: 10s
+ lookup: sum -10m unaligned of correctable
+ units: errors
+ every: 1m
warn: $this > 0
delay: down 1h multiplier 1.5 max 1h
- info: amount of memory corrupted due to a hardware failure
+ info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes
+ to: sysadmin
+
+ template: ecc_memory_dimm_uncorrectable
+ on: mem.edac_mc_dimm
+ class: Errors
+ type: System
+component: Memory
+ os: linux
+ hosts: *
+ lookup: sum -10m unaligned of uncorrectable
+ units: errors
+ every: 1m
+ crit: $this > 0
+ delay: down 1h multiplier 1.5 max 1h
+ info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes
to: sysadmin
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index 08a4eecb..095d488d 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -30,7 +30,7 @@ component: Network
warn: $this > (($status >= $WARNING) ? (85) : (90))
delay: up 1m down 1m multiplier 1.5 max 1h
info: average inbound utilization for the network interface ${label:device} over the last minute
- to: sysadmin
+ to: silent
template: 1m_sent_traffic_overflow
on: net.net
@@ -46,7 +46,7 @@ component: Network
warn: $this > (($status >= $WARNING) ? (85) : (90))
delay: up 1m down 1m multiplier 1.5 max 1h
info: average outbound utilization for the network interface ${label:device} over the last minute
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# dropped packets
@@ -97,7 +97,7 @@ chart labels: device=!wl* *
warn: $this >= 2
delay: up 1m down 1h multiplier 1.5 max 2h
info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
- to: sysadmin
+ to: silent
template: outbound_packets_dropped_ratio
on: net.packets
@@ -114,7 +114,7 @@ chart labels: device=!wl* *
warn: $this >= 2
delay: up 1m down 1h multiplier 1.5 max 2h
info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
- to: sysadmin
+ to: silent
template: wifi_inbound_packets_dropped_ratio
on: net.packets
@@ -131,7 +131,7 @@ chart labels: device=wl*
warn: $this >= 10
delay: up 1m down 1h multiplier 1.5 max 2h
info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
- to: sysadmin
+ to: silent
template: wifi_outbound_packets_dropped_ratio
on: net.packets
@@ -148,7 +148,7 @@ chart labels: device=wl*
warn: $this >= 10
delay: up 1m down 1h multiplier 1.5 max 2h
info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# interface errors
@@ -166,7 +166,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of inbound errors for the network interface ${label:device} in the last 10 minutes
- to: sysadmin
+ to: silent
template: interface_outbound_errors
on: net.errors
@@ -181,7 +181,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of outbound errors for the network interface ${label:device} in the last 10 minutes
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# FIFO errors
@@ -204,7 +204,7 @@ component: Network
warn: $this > 0
delay: down 1h multiplier 1.5 max 2h
info: number of FIFO errors for the network interface ${label:device} in the last 10 minutes
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# check for packet storms
@@ -243,4 +243,4 @@ component: Network
options: no-clear-notification
info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
compared to the rate over the last minute
- to: sysadmin
+ to: silent
diff --git a/health/health.d/qos.conf b/health/health.d/qos.conf
index 7290d15f..4b0a5cb9 100644
--- a/health/health.d/qos.conf
+++ b/health/health.d/qos.conf
@@ -5,14 +5,13 @@
# the alarm is checked every 10 seconds
# and examines the last minute of data
-#template: 10min_qos_packet_drops
-# on: tc.qos_dropped
-# os: linux
-# hosts: *
-# lookup: sum -10m unaligned absolute
-# every: 30s
-# warn: $this > 0
-# delay: up 0 down 30m multiplier 1.5 max 1h
-# units: packets
-# info: dropped packets in the last 30 minutes
-# to: sysadmin
+template: 10min_qos_packet_drops
+ on: tc.qos_dropped
+ os: linux
+ hosts: *
+ lookup: sum -5m unaligned absolute
+ every: 30s
+ warn: $this > 0
+ units: packets
+ info: dropped packets in the last 5 minutes
+ to: silent
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
index 34e5431a..c121264f 100644
--- a/health/health.d/ram.conf
+++ b/health/health.d/ram.conf
@@ -30,7 +30,7 @@ component: Memory
warn: $this < (($status >= $WARNING) ? (15) : (10))
delay: down 15m multiplier 1.5 max 1h
info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
- to: sysadmin
+ to: silent
alarm: oom_kill
on: mem.oom_kill
@@ -41,9 +41,8 @@ component: Memory
every: 5m
warn: $this > 0
delay: down 10m
-host labels: _is_k8s_node = false
info: number of out of memory kills in the last 30 minutes
- to: sysadmin
+ to: silent
## FreeBSD
alarm: ram_in_use
@@ -75,4 +74,4 @@ component: Memory
warn: $this < (($status >= $WARNING) ? (15) : (10))
delay: down 15m multiplier 1.5 max 1h
info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
- to: sysadmin
+ to: silent
diff --git a/health/health.d/softnet.conf b/health/health.d/softnet.conf
index 345f8750..b621d969 100644
--- a/health/health.d/softnet.conf
+++ b/health/health.d/softnet.conf
@@ -17,7 +17,7 @@ component: Network
delay: down 1h multiplier 1.5 max 2h
info: average number of dropped packets in the last minute \
due to exceeded net.core.netdev_max_backlog
- to: sysadmin
+ to: silent
alarm: 1min_netdev_budget_ran_outs
on: system.softnet_stat
@@ -51,4 +51,4 @@ component: Network
info: average number of drops in the last minute \
due to exceeded sysctl net.route.netisr_maxqlen \
(this can be a cause for dropped packets)
- to: sysadmin
+ to: silent
diff --git a/health/health.d/swap.conf b/health/health.d/swap.conf
index d30c74ce..3adcae9d 100644
--- a/health/health.d/swap.conf
+++ b/health/health.d/swap.conf
@@ -2,7 +2,7 @@
# you can disable an alarm notification by setting the 'to' line to: silent
alarm: 30min_ram_swapped_out
- on: system.swapio
+ on: mem.swapio
class: Workload
type: System
component: Memory
@@ -16,10 +16,10 @@ component: Memory
warn: $this > (($status >= $WARNING) ? (20) : (30))
delay: down 15m multiplier 1.5 max 1h
info: percentage of the system RAM swapped in the last 30 minutes
- to: sysadmin
+ to: silent
alarm: used_swap
- on: system.swap
+ on: mem.swap
class: Utilization
type: System
component: Memory
diff --git a/health/health.d/synchronization.conf b/health/health.d/synchronization.conf
index 417624ad..837bb1b3 100644
--- a/health/health.d/synchronization.conf
+++ b/health/health.d/synchronization.conf
@@ -9,4 +9,4 @@
info: number of sync() system calls. \
Every call causes all pending modifications to filesystem metadata and \
cached file data to be written to the underlying filesystems.
- to: sysadmin
+ to: silent
diff --git a/health/health.d/systemdunits.conf b/health/health.d/systemdunits.conf
index 531d62fa..aadf8452 100644
--- a/health/health.d/systemdunits.conf
+++ b/health/health.d/systemdunits.conf
@@ -6,6 +6,7 @@
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -20,6 +21,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -34,6 +36,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -48,6 +51,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -62,6 +66,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -76,6 +81,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -90,6 +96,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -104,6 +111,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -118,6 +126,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
@@ -132,6 +141,7 @@ component: Systemd units
class: Errors
type: Linux
component: Systemd units
+ module: !* *
calc: $failed
units: state
every: 10s
diff --git a/health/health.d/tcp_listen.conf b/health/health.d/tcp_listen.conf
index d4bcfa24..00ee055d 100644
--- a/health/health.d/tcp_listen.conf
+++ b/health/health.d/tcp_listen.conf
@@ -32,7 +32,7 @@ component: Network
crit: $this > (($status == $CRITICAL) ? (1) : (5))
delay: up 0 down 5m multiplier 1.5 max 1h
info: average number of overflows in the TCP accept queue over the last minute
- to: sysadmin
+ to: silent
# THIS IS TOO GENERIC
# CHECK: https://github.com/netdata/netdata/issues/3234#issuecomment-423935842
@@ -50,7 +50,7 @@ component: Network
crit: $this > (($status == $CRITICAL) ? (1) : (5))
delay: up 0 down 5m multiplier 1.5 max 1h
info: average number of dropped packets in the TCP accept queue over the last minute
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
@@ -76,7 +76,7 @@ component: Network
delay: up 10 down 5m multiplier 1.5 max 1h
info: average number of SYN requests was dropped due to the full TCP SYN queue over the last minute \
(SYN cookies were not enabled)
- to: sysadmin
+ to: silent
alarm: 1m_tcp_syn_queue_cookies
on: ip.tcp_syn_queue
@@ -92,5 +92,5 @@ component: Network
crit: $this > (($status == $CRITICAL) ? (0) : (5))
delay: up 10 down 5m multiplier 1.5 max 1h
info: average number of sent SYN cookies due to the full TCP SYN queue over the last minute
- to: sysadmin
+ to: silent
diff --git a/health/health.d/tcp_mem.conf b/health/health.d/tcp_mem.conf
index 318be20a..f472d953 100644
--- a/health/health.d/tcp_mem.conf
+++ b/health/health.d/tcp_mem.conf
@@ -20,4 +20,4 @@ component: Network
crit: ${mem} > (($status == $CRITICAL ) ? ( ${tcp_mem_pressure} ) : ( ${tcp_mem_high} * 0.9 ))
delay: up 0 down 5m multiplier 1.5 max 1h
info: TCP memory utilization
- to: sysadmin
+ to: silent
diff --git a/health/health.d/tcp_orphans.conf b/health/health.d/tcp_orphans.conf
index cbd628da..07022af3 100644
--- a/health/health.d/tcp_orphans.conf
+++ b/health/health.d/tcp_orphans.conf
@@ -21,4 +21,4 @@ component: Network
crit: $this > (($status == $CRITICAL) ? ( 25 ) : ( 50 ))
delay: up 0 down 5m multiplier 1.5 max 1h
info: orphan IPv4 TCP sockets utilization
- to: sysadmin
+ to: silent
diff --git a/health/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf
index ff116db6..089ac988 100644
--- a/health/health.d/tcp_resets.conf
+++ b/health/health.d/tcp_resets.conf
@@ -33,7 +33,7 @@ component: Network
This can indicate a port scan, \
or that a service running on this host has crashed. \
Netdata will not send a clear notification for this alarm.
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# tcp resets this host receives
@@ -66,4 +66,4 @@ component: Network
info: average number of received TCP RESETS over the last 10 seconds. \
This can be an indication that a service this host needs has crashed. \
Netdata will not send a clear notification for this alarm.
- to: sysadmin
+ to: silent
diff --git a/health/health.d/udp_errors.conf b/health/health.d/udp_errors.conf
index 64f47dfa..00593c58 100644
--- a/health/health.d/udp_errors.conf
+++ b/health/health.d/udp_errors.conf
@@ -17,7 +17,7 @@ component: Network
warn: $this > (($status >= $WARNING) ? (0) : (10))
info: average number of UDP receive buffer errors over the last minute
delay: up 1m down 60m multiplier 1.2 max 2h
- to: sysadmin
+ to: silent
# -----------------------------------------------------------------------------
# UDP send buffer errors
@@ -35,4 +35,4 @@ component: Network
warn: $this > (($status >= $WARNING) ? (0) : (10))
info: average number of UDP send buffer errors over the last minute
delay: up 1m down 60m multiplier 1.2 max 2h
- to: sysadmin
+ to: silent
diff --git a/health/health.d/windows.conf b/health/health.d/windows.conf
index 28a88638..9ef4c202 100644
--- a/health/health.d/windows.conf
+++ b/health/health.d/windows.conf
@@ -15,7 +15,7 @@ component: CPU
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average CPU utilization over the last 10 minutes
- to: sysadmin
+ to: silent
## Memory
@@ -52,7 +52,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of inbound discarded packets for the network interface in the last 10 minutes
- to: sysadmin
+ to: silent
template: windows_outbound_packets_discarded
on: windows.net_nic_discarded
@@ -67,7 +67,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of outbound discarded packets for the network interface in the last 10 minutes
- to: sysadmin
+ to: silent
template: windows_inbound_packets_errors
on: windows.net_nic_errors
@@ -82,7 +82,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of inbound errors for the network interface in the last 10 minutes
- to: sysadmin
+ to: silent
template: windows_outbound_packets_errors
on: windows.net_nic_errors
@@ -97,7 +97,7 @@ component: Network
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
info: number of outbound errors for the network interface in the last 10 minutes
- to: sysadmin
+ to: silent
## Disk
diff --git a/health/health.d/zfs.conf b/health/health.d/zfs.conf
index 7f8ea279..40ec4ce8 100644
--- a/health/health.d/zfs.conf
+++ b/health/health.d/zfs.conf
@@ -10,7 +10,7 @@ component: File system
warn: $this > 0
delay: down 1h multiplier 1.5 max 2h
info: number of times ZFS had to limit the ARC growth in the last 10 minutes
- to: sysadmin
+ to: silent
# ZFS pool state
diff --git a/health/health.h b/health/health.h
index 543bc56a..7ec966ff 100644
--- a/health/health.h
+++ b/health/health.h
@@ -70,6 +70,7 @@ ALARM_ENTRY* health_create_alarm_entry(
STRING *name,
STRING *chart,
STRING *chart_context,
+ STRING *chart_id,
STRING *family,
STRING *classification,
STRING *component,
diff --git a/health/health_log.c b/health/health_log.c
index 4cfbee60..933a452a 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -20,6 +20,7 @@ inline ALARM_ENTRY* health_create_alarm_entry(
STRING *name,
STRING *chart,
STRING *chart_context,
+ STRING *chart_name,
STRING *family,
STRING *class,
STRING *component,
@@ -43,6 +44,7 @@ inline ALARM_ENTRY* health_create_alarm_entry(
ae->name = string_dup(name);
ae->chart = string_dup(chart);
ae->chart_context = string_dup(chart_context);
+ ae->chart_name = string_dup(chart_name);
uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
diff --git a/health/notifications/alerta/metadata.yaml b/health/notifications/alerta/metadata.yaml
new file mode 100644
index 00000000..f815032b
--- /dev/null
+++ b/health/notifications/alerta/metadata.yaml
@@ -0,0 +1,90 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-alerta'
+ meta:
+ name: 'Alerta'
+ link: 'https://alerta.io/'
+ categories:
+ - notify.agent
+ icon_filename: 'alerta.png'
+ keywords:
+ - Alerta
+ overview:
+ notification_description: |
+ The [Alerta](https://alerta.io/) monitoring system is a tool used to consolidate and de-duplicate alerts from multiple sources for quick ‘at-a-glance’ visualization. With just one system you can monitor alerts from many other monitoring tools on a single screen.
+ You can send Netdata alerts to Alerta to see alerts coming from many Netdata hosts or also from a multi-host Netdata configuration.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A working Alerta instance
+ - An Alerta API key (if authentication in Alerta is enabled)
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_ALERTA'
+ default_value: ''
+ description: "Set `SEND_ALERTA` to YES"
+ required: true
+ - name: 'ALERTA_WEBHOOK_URL'
+ default_value: ''
+ description: "set `ALERTA_WEBHOOK_URL` to the API url you defined when you installed the Alerta server."
+ required: true
+ - name: 'ALERTA_API_KEY'
+ default_value: ''
+ description: "Set `ALERTA_API_KEY` to your API key."
+ required: true
+ detailed_description: |
+ You will need an API key to send messages from any source, if Alerta is configured to use authentication (recommended). To create a new API key:
+ 1. Go to Configuration > API Keys.
+ 2. Create a new API key called "netdata" with `write:alerts` permission.
+ - name: 'DEFAULT_RECIPIENT_ALERTA'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_ALERTA` to the default recipient environment you want the alert notifications to be sent to. All roles will default to this variable if left unconfigured."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_CUSTOM'
+ default_value: ''
+ description: "Set different recipient environments per role, by editing `DEFAULT_RECIPIENT_CUSTOM` with the environment name of your choice"
+ required: false
+ detailed_description: |
+ The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_alerta[sysadmin]="Systems"
+ role_recipients_alerta[domainadmin]="Domains"
+ role_recipients_alerta[dba]="Databases Systems"
+ role_recipients_alerta[webmaster]="Marketing Development"
+ role_recipients_alerta[proxyadmin]="Proxy"
+ role_recipients_alerta[sitemgr]="Sites"
+ ```
+
+ The values you provide should be defined as environments in `/etc/alertad.conf` with `ALLOWED_ENVIRONMENTS` option.
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # alerta (alerta.io) global notification options
+
+ SEND_ALERTA="YES"
+ ALERTA_WEBHOOK_URL="http://yourserver/alerta/api"
+ ALERTA_API_KEY="INSERT_YOUR_API_KEY_HERE"
+ DEFAULT_RECIPIENT_ALERTA="Production"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/awssns/metadata.yaml b/health/notifications/awssns/metadata.yaml
new file mode 100644
index 00000000..524a5f48
--- /dev/null
+++ b/health/notifications/awssns/metadata.yaml
@@ -0,0 +1,137 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-awssns'
+ meta:
+ name: 'AWS SNS'
+ link: 'https://aws.amazon.com/sns/'
+ categories:
+ - notify.agent
+ icon_filename: 'aws.svg'
+ keywords:
+ - AWS SNS
+ overview:
+ notification_description: |
+ As part of its AWS suite, Amazon provides a notification broker service called 'Simple Notification Service' (SNS). Amazon SNS works similarly to Netdata's own notification system, allowing to dispatch a single notification to multiple subscribers of different types. Among other things, SNS supports sending notifications to:
+ - Email addresses
+ - Mobile Phones via SMS
+ - HTTP or HTTPS web hooks
+ - AWS Lambda functions
+ - AWS SQS queues
+ - Mobile applications via push notifications
+ You can send notifications through Amazon SNS using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: |
+ - While Amazon SNS supports sending differently formatted messages for different delivery methods, Netdata does not currently support this functionality.
+ - For email notification support, we recommend using Netdata's email notifications, as it is has the following benefits:
+ - In most cases, it requires less configuration.
+ - Netdata's emails are nicely pre-formatted and support features like threading, which requires a lot of manual effort in SNS.
+ - It is less resource intensive and more cost-efficient than SNS.
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The [Amazon Web Services CLI tools](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) (awscli).
+ - An actual home directory for the user you run Netdata as, instead of just using `/` as a home directory. The setup depends on the distribution, but `/var/lib/netdata` is the recommended directory. If you are using Netdata as a dedicated user, the permissions will already be correct.
+ - An Amazon SNS topic to send notifications to with one or more subscribers. The Getting Started section of the Amazon SNS documentation covers the basics of how to set this up. Make note of the Topic ARN when you create the topic.
+ - While not mandatory, it is highly recommended to create a dedicated IAM user on your account for Netdata to send notifications. This user needs to have programmatic access, and should only allow access to SNS. For an additional layer of security, you can create one for each system or group of systems.
+ - Terminal access to the Agent you wish to configure.
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'aws path'
+ default_value: ''
+ description: "The full path of the aws command. If empty, the system `$PATH` will be searched for it. If not found, Amazon SNS notifications will be silently disabled."
+ required: true
+ - name: 'SEND_AWSNS'
+ default_value: 'YES'
+ description: "Set `SEND_AWSNS` to YES"
+ required: true
+ - name: 'AWSSNS_MESSAGE_FORMAT'
+ default_value: '${status} on ${host} at ${date}: ${chart} ${value_string}'
+ description: "Set `AWSSNS_MESSAGE_FORMAT` to to the string that you want the alert to be sent into."
+ required: true
+ detailed_description: |
+ The supported variables are:
+
+ | Variable name | Description |
+ |:---------------------------:|:---------------------------------------------------------------------------------|
+ | `${alarm}` | Like "name = value units" |
+ | `${status_message}` | Like "needs attention", "recovered", "is critical" |
+ | `${severity}` | Like "Escalated to CRITICAL", "Recovered from WARNING" |
+ | `${raised_for}` | Like "(alarm was raised for 10 minutes)" |
+ | `${host}` | The host generated this event |
+ | `${url_host}` | Same as ${host} but URL encoded |
+ | `${unique_id}` | The unique id of this event |
+ | `${alarm_id}` | The unique id of the alarm that generated this event |
+ | `${event_id}` | The incremental id of the event, for this alarm id |
+ | `${when}` | The timestamp this event occurred |
+ | `${name}` | The name of the alarm, as given in netdata health.d entries |
+ | `${url_name}` | Same as ${name} but URL encoded |
+ | `${chart}` | The name of the chart (type.id) |
+ | `${url_chart}` | Same as ${chart} but URL encoded |
+ | `${family}` | The family of the chart |
+ | `${url_family}` | Same as ${family} but URL encoded |
+ | `${status}` | The current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL |
+ | `${old_status}` | The previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL |
+ | `${value}` | The current value of the alarm |
+ | `${old_value}` | The previous value of the alarm |
+ | `${src}` | The line number and file the alarm has been configured |
+ | `${duration}` | The duration in seconds of the previous alarm state |
+ | `${duration_txt}` | Same as ${duration} for humans |
+ | `${non_clear_duration}` | The total duration in seconds this is/was non-clear |
+ | `${non_clear_duration_txt}` | Same as ${non_clear_duration} for humans |
+ | `${units}` | The units of the value |
+ | `${info}` | A short description of the alarm |
+ | `${value_string}` | Friendly value (with units) |
+ | `${old_value_string}` | Friendly old value (with units) |
+ | `${image}` | The URL of an image to represent the status of the alarm |
+ | `${color}` | A color in AABBCC format for the alarm |
+ | `${goto_url}` | The URL the user can click to see the netdata dashboard |
+ | `${calc_expression}` | The expression evaluated to provide the value for the alarm |
+ | `${calc_param_values}` | The value of the variables in the evaluated expression |
+ | `${total_warnings}` | The total number of alarms in WARNING state on the host |
+ | `${total_critical}` | The total number of alarms in CRITICAL state on the host |
+ - name: 'DEFAULT_RECIPIENT_AWSSNS'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_AWSSNS` to the Topic ARN you noted down upon creating the Topic."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ You can have different recipient Topics per **role**, by editing `DEFAULT_RECIPIENT_AWSSNS` with the Topic ARN you want, in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_awssns[sysadmin]="arn:aws:sns:us-east-2:123456789012:Systems"
+ role_recipients_awssns[domainadmin]="arn:aws:sns:us-east-2:123456789012:Domains"
+ role_recipients_awssns[dba]="arn:aws:sns:us-east-2:123456789012:Databases"
+ role_recipients_awssns[webmaster]="arn:aws:sns:us-east-2:123456789012:Development"
+ role_recipients_awssns[proxyadmin]="arn:aws:sns:us-east-2:123456789012:Proxy"
+ role_recipients_awssns[sitemgr]="arn:aws:sns:us-east-2:123456789012:Sites"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: 'An example working configuration would be:'
+ config: |
+ ```conf
+ #------------------------------------------------------------------------------
+ # Amazon SNS notifications
+
+ SEND_AWSSNS="YES"
+ AWSSNS_MESSAGE_FORMAT="${status} on ${host} at ${date}: ${chart} ${value_string}"
+ DEFAULT_RECIPIENT_AWSSNS="arn:aws:sns:us-east-2:123456789012:MyTopic"
+ ```
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/custom/metadata.yaml b/health/notifications/custom/metadata.yaml
new file mode 100644
index 00000000..c785fa2a
--- /dev/null
+++ b/health/notifications/custom/metadata.yaml
@@ -0,0 +1,169 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-custom'
+ meta:
+ name: 'Custom'
+ link: ''
+ categories:
+ - notify.agent
+ icon_filename: 'custom.png'
+ keywords:
+ - custom
+ overview:
+ notification_description: |
+ Netdata Agent's alert notification feature allows you to send custom notifications to any endpoint you choose.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_CUSTOM'
+ default_value: 'YES'
+ description: "Set `SEND_CUSTOM` to YES"
+ required: true
+ - name: 'DEFAULT_RECIPIENT_CUSTOM'
+ default_value: ''
+ description: "This value is dependent on how you handle the `${to}` variable inside the `custom_sender()` function."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured. You can edit `DEFAULT_RECIPIENT_CUSTOM` with the variable you want, in the following entries at the bottom of the same file:
+ ```
+ role_recipients_custom[sysadmin]="systems"
+ role_recipients_custom[domainadmin]="domains"
+ role_recipients_custom[dba]="databases systems"
+ role_recipients_custom[webmaster]="marketing development"
+ role_recipients_custom[proxyadmin]="proxy-admin"
+ role_recipients_custom[sitemgr]="sites"
+ ```
+ - name: 'custom_sender()'
+ default_value: ''
+ description: "You can look at the other senders in `/usr/libexec/netdata/plugins.d/alarm-notify.sh` for examples of how to modify the function in this configuration file."
+ required: false
+ detailed_description: |
+ The following is a sample custom_sender() function in health_alarm_notify.conf, to send an SMS via an imaginary HTTPS endpoint to the SMS gateway:
+ ```
+ custom_sender() {
+ # example human readable SMS
+ local msg="${host} ${status_message}: ${alarm} ${raised_for}"
+
+ # limit it to 160 characters and encode it for use in a URL
+ urlencode "${msg:0:160}" >/dev/null; msg="${REPLY}"
+
+ # a space separated list of the recipients to send alarms to
+ to="${1}"
+
+ for phone in ${to}; do
+ httpcode=$(docurl -X POST \
+ --data-urlencode "From=XXX" \
+ --data-urlencode "To=${phone}" \
+ --data-urlencode "Body=${msg}" \
+ -u "${accountsid}:${accounttoken}" \
+ https://domain.website.com/)
+
+ if [ "${httpcode}" = "200" ]; then
+ info "sent custom notification ${msg} to ${phone}"
+ sent=$((sent + 1))
+ else
+ error "failed to send custom notification ${msg} to ${phone} with HTTP error code ${httpcode}."
+ fi
+ done
+ }
+ ```
+
+ The supported variables that you can use for the function's `msg` variable are:
+
+ | Variable name | Description |
+ |:---------------------------:|:---------------------------------------------------------------------------------|
+ | `${alarm}` | Like "name = value units" |
+ | `${status_message}` | Like "needs attention", "recovered", "is critical" |
+ | `${severity}` | Like "Escalated to CRITICAL", "Recovered from WARNING" |
+ | `${raised_for}` | Like "(alarm was raised for 10 minutes)" |
+ | `${host}` | The host generated this event |
+ | `${url_host}` | Same as ${host} but URL encoded |
+ | `${unique_id}` | The unique id of this event |
+ | `${alarm_id}` | The unique id of the alarm that generated this event |
+ | `${event_id}` | The incremental id of the event, for this alarm id |
+ | `${when}` | The timestamp this event occurred |
+ | `${name}` | The name of the alarm, as given in netdata health.d entries |
+ | `${url_name}` | Same as ${name} but URL encoded |
+ | `${chart}` | The name of the chart (type.id) |
+ | `${url_chart}` | Same as ${chart} but URL encoded |
+ | `${family}` | The family of the chart |
+ | `${url_family}` | Same as ${family} but URL encoded |
+ | `${status}` | The current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL |
+ | `${old_status}` | The previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL |
+ | `${value}` | The current value of the alarm |
+ | `${old_value}` | The previous value of the alarm |
+ | `${src}` | The line number and file the alarm has been configured |
+ | `${duration}` | The duration in seconds of the previous alarm state |
+ | `${duration_txt}` | Same as ${duration} for humans |
+ | `${non_clear_duration}` | The total duration in seconds this is/was non-clear |
+ | `${non_clear_duration_txt}` | Same as ${non_clear_duration} for humans |
+ | `${units}` | The units of the value |
+ | `${info}` | A short description of the alarm |
+ | `${value_string}` | Friendly value (with units) |
+ | `${old_value_string}` | Friendly old value (with units) |
+ | `${image}` | The URL of an image to represent the status of the alarm |
+ | `${color}` | A color in AABBCC format for the alarm |
+ | `${goto_url}` | The URL the user can click to see the netdata dashboard |
+ | `${calc_expression}` | The expression evaluated to provide the value for the alarm |
+ | `${calc_param_values}` | The value of the variables in the evaluated expression |
+ | `${total_warnings}` | The total number of alarms in WARNING state on the host |
+ | `${total_critical}` | The total number of alarms in CRITICAL state on the host |
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # custom notifications
+
+ SEND_CUSTOM="YES"
+ DEFAULT_RECIPIENT_CUSTOM=""
+
+ # The custom_sender() is a custom function to do whatever you need to do
+ custom_sender() {
+ # example human readable SMS
+ local msg="${host} ${status_message}: ${alarm} ${raised_for}"
+
+ # limit it to 160 characters and encode it for use in a URL
+ urlencode "${msg:0:160}" >/dev/null; msg="${REPLY}"
+
+ # a space separated list of the recipients to send alarms to
+ to="${1}"
+
+ for phone in ${to}; do
+ httpcode=$(docurl -X POST \
+ --data-urlencode "From=XXX" \
+ --data-urlencode "To=${phone}" \
+ --data-urlencode "Body=${msg}" \
+ -u "${accountsid}:${accounttoken}" \
+ https://domain.website.com/)
+
+ if [ "${httpcode}" = "200" ]; then
+ info "sent custom notification ${msg} to ${phone}"
+ sent=$((sent + 1))
+ else
+ error "failed to send custom notification ${msg} to ${phone} with HTTP error code ${httpcode}."
+ fi
+ done
+ }
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/discord/metadata.yaml b/health/notifications/discord/metadata.yaml
new file mode 100644
index 00000000..a46a8ec9
--- /dev/null
+++ b/health/notifications/discord/metadata.yaml
@@ -0,0 +1,76 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-discord'
+ meta:
+ name: 'Discord'
+ link: 'https://discord.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'discord.png'
+ keywords:
+ - Discord
+ overview:
+ notification_description: |
+ Send notifications to Discord using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The incoming webhook URL as given by Discord. Create a webhook by following the official [Discord documentation](https://support.discord.com/hc/en-us/articles/228383668-Intro-to-Webhooks). You can use the same on all your Netdata servers (or you can have multiple if you like - your decision).
+ - One or more Discord channels to post the messages to
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_DISCORD'
+ default_value: 'YES'
+ description: "Set `SEND_DISCORD` to YES"
+ required: true
+ - name: 'DISCORD_WEBHOOK_URL'
+ default_value: ''
+ description: "set `DISCORD_WEBHOOK_URL` to your webhook URL."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_DISCORD'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_DISCORD` to the channel you want the alert notifications to be sent to. You can define multiple channels like this: `alerts` `systems`. "
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+ You can then have different channels per role, by editing `DEFAULT_RECIPIENT_DISCORD` with the channel you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_discord[sysadmin]="systems"
+ role_recipients_discord[domainadmin]="domains"
+ role_recipients_discord[dba]="databases systems"
+ role_recipients_discord[webmaster]="marketing development"
+ role_recipients_discord[proxyadmin]="proxy-admin"
+ role_recipients_discord[sitemgr]="sites"
+ ```
+
+ The values you provide should already exist as Discord channels in your server.
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # discord (discordapp.com) global notification options
+
+ SEND_DISCORD="YES"
+ DISCORD_WEBHOOK_URL="https://discord.com/api/webhooks/XXXXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ DEFAULT_RECIPIENT_DISCORD="alerts"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/dynatrace/metadata.yaml b/health/notifications/dynatrace/metadata.yaml
new file mode 100644
index 00000000..a88c766f
--- /dev/null
+++ b/health/notifications/dynatrace/metadata.yaml
@@ -0,0 +1,92 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-dynatrace'
+ meta:
+ name: 'Dynatrace'
+ link: 'https://dynatrace.com'
+ categories:
+ - notify.agent
+ icon_filename: 'dynatrace.svg'
+ keywords:
+ - Dynatrace
+ overview:
+ notification_description: |
+ Dynatrace allows you to receive notifications using their Events REST API. See the [Dynatrace documentation](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event) about POSTing an event in the Events API for more details.
+ You can send notifications to Dynatrace using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A Dynatrace Server. You can use the same on all your Netdata servers but make sure the server is network visible from your Netdata hosts. The Dynatrace server should be with protocol prefixed (http:// or https://), for example: https://monitor.example.com.
+ - An API Token. Generate a secure access API token that enables access to your Dynatrace monitoring data via the REST-based API. See [Dynatrace API - Authentication](https://www.dynatrace.com/support/help/extend-dynatrace/dynatrace-api/basics/dynatrace-api-authentication/) for more details.
+ - An API Space. This is the URL part of the page you have access in order to generate the API Token. For example, the URL for a generated API token might look like: https://monitor.illumineit.com/e/2a93fe0e-4cd5-469a-9d0d-1a064235cfce/#settings/integration/apikeys;gf=all In that case, the Space is 2a93fe0e-4cd5-469a-9d0d-1a064235cfce.
+ - A Server Tag. To generate one on your Dynatrace Server, go to Settings --> Tags --> Manually applied tags and create the Tag. The Netdata alarm is sent as a Dynatrace Event to be correlated with all those hosts tagged with this Tag you have created.
+ - Terminal access to the Agent you wish to configure
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_DYNATRACE'
+ default_value: 'YES'
+ description: "Set `SEND_DYNATRACE` to YES"
+ required: true
+ - name: 'DYNATRACE_SERVER'
+ default_value: ''
+ description: "Set `DYNATRACE_SERVER` to the Dynatrace server with the protocol prefix, for example `https://monitor.example.com`."
+ required: true
+ - name: 'DYNATRACE_TOKEN'
+ default_value: ''
+ description: "Set `DYNATRACE_TOKEN` to your Dynatrace API authentication token"
+ required: true
+ - name: 'DYNATRACE_SPACE'
+ default_value: ''
+ description: "Set `DYNATRACE_SPACE` to the API Space, it is the URL part of the page you have access in order to generate the API Token."
+ required: true
+ detailed_description: |
+ For example, the URL for a generated API token might look like: https://monitor.illumineit.com/e/2a93fe0e-4cd5-469a-9d0d-1a064235cfce/#settings/integration/apikeys;gf=all In that case, the Space is 2a93fe0e-4cd5-469a-9d0d-1a064235cfce.
+ - name: 'DYNATRACE_TAG_VALUE'
+ default_value: ''
+ description: "Set `DYNATRACE_TAG_VALUE` to your Dynatrace Server Tag."
+ required: true
+ - name: 'DYNATRACE_ANNOTATION_TYPE'
+ default_value: 'Netdata Alarm'
+ description: "`DYNATRACE_ANNOTATION_TYPE` can be left to its default value Netdata Alarm, but you can change it to better fit your needs."
+ required: false
+ - name: 'DYNATRACE_EVENT'
+ default_value: 'Netdata Alarm'
+ description: "Set `DYNATRACE_EVENT` to the Dynatrace eventType you want."
+ required: false
+ detailed_description: |
+ `AVAILABILITY_EVENT`, `CUSTOM_ALERT`, `CUSTOM_ANNOTATION`, `CUSTOM_CONFIGURATION`, `CUSTOM_DEPLOYMENT`, `CUSTOM_INFO`, `ERROR_EVENT`,
+ `MARKED_FOR_TERMINATION`, `PERFORMANCE_EVENT`, `RESOURCE_CONTENTION_EVENT`.
+ You can read more [here](https://www.dynatrace.com/support/help/dynatrace-api/environment-api/events-v2/post-event#request-body-objects).
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Dynatrace global notification options
+
+ SEND_DYNATRACE="YES"
+ DYNATRACE_SERVER="https://monitor.example.com"
+ DYNATRACE_TOKEN="XXXXXXX"
+ DYNATRACE_SPACE="2a93fe0e-4cd5-469a-9d0d-1a064235cfce"
+ DYNATRACE_TAG_VALUE="SERVERTAG"
+ DYNATRACE_ANNOTATION_TYPE="Netdata Alert"
+ DYNATRACE_EVENT="AVAILABILITY_EVENT"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/email/metadata.yaml b/health/notifications/email/metadata.yaml
new file mode 100644
index 00000000..f0d4a62a
--- /dev/null
+++ b/health/notifications/email/metadata.yaml
@@ -0,0 +1,73 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-email'
+ meta:
+ name: 'Email'
+ link: ''
+ categories:
+ - notify.agent
+ icon_filename: 'email.png'
+ keywords:
+ - email
+ overview:
+ notification_description: |
+ Send notifications via Email using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A working sendmail command is required for email alerts to work. Almost all MTAs provide a sendmail interface. Netdata sends all emails as user netdata, so make sure your sendmail works for local users.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'EMAIL_SENDER'
+ default_value: 'netdata'
+ description: "You can change `EMAIL_SENDER` to the email address sending the notifications."
+ required: false
+ - name: 'SEND_EMAIL'
+ default_value: 'YES'
+ description: "Set `SEND_EMAIL` to YES"
+ required: true
+ - name: 'DEFAULT_RECIPIENT_EMAIL'
+ default_value: 'root'
+ description: "Set `DEFAULT_RECIPIENT_EMAIL` to the email address you want the email to be sent by default. You can define multiple email addresses like this: `alarms@example.com` `systems@example.com`."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+ The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_email[sysadmin]="systems@example.com"
+ role_recipients_email[domainadmin]="domains@example.com"
+ role_recipients_email[dba]="databases@example.com systems@example.com"
+ role_recipients_email[webmaster]="marketing@example.com development@example.com"
+ role_recipients_email[proxyadmin]="proxy-admin@example.com"
+ role_recipients_email[sitemgr]="sites@example.com"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # email global notification options
+
+ EMAIL_SENDER="example@domain.com"
+ SEND_EMAIL="YES"
+ DEFAULT_RECIPIENT_EMAIL="recipient@example.com"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/flock/metadata.yaml b/health/notifications/flock/metadata.yaml
new file mode 100644
index 00000000..62e7f499
--- /dev/null
+++ b/health/notifications/flock/metadata.yaml
@@ -0,0 +1,72 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-flock'
+ meta:
+ name: 'Flock'
+ link: 'https://support.flock.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'flock.png'
+ keywords:
+ - Flock
+ overview:
+ notification_description: |
+ Send notifications to Flock using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The incoming webhook URL as given by flock.com. You can use the same on all your Netdata servers (or you can have multiple if you like). Read more about flock webhooks and how to get one [here](https://admin.flock.com/webhooks).
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_FLOCK'
+ default_value: 'YES'
+ description: "Set `SEND_FLOCK` to YES"
+ required: true
+ - name: 'FLOCK_WEBHOOK_URL'
+ default_value: ''
+ description: "set `FLOCK_WEBHOOK_URL` to your webhook URL."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_FLOCK'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_FLOCK` to the Flock channel you want the alert notifications to be sent to. All roles will default to this variable if left unconfigured."
+ required: true
+ detailed_description: |
+ You can have different channels per role, by editing DEFAULT_RECIPIENT_FLOCK with the channel you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_flock[sysadmin]="systems"
+ role_recipients_flock[domainadmin]="domains"
+ role_recipients_flock[dba]="databases systems"
+ role_recipients_flock[webmaster]="marketing development"
+ role_recipients_flock[proxyadmin]="proxy-admin"
+ role_recipients_flock[sitemgr]="sites"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # flock (flock.com) global notification options
+
+ SEND_FLOCK="YES"
+ FLOCK_WEBHOOK_URL="https://api.flock.com/hooks/sendMessage/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ DEFAULT_RECIPIENT_FLOCK="alarms"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/gotify/metadata.yaml b/health/notifications/gotify/metadata.yaml
new file mode 100644
index 00000000..4552de1c
--- /dev/null
+++ b/health/notifications/gotify/metadata.yaml
@@ -0,0 +1,60 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-gotify'
+ meta:
+ name: 'Gotify'
+ link: 'https://gotify.net/'
+ categories:
+ - notify.agent
+ icon_filename: 'gotify.png'
+ keywords:
+ - gotify
+ overview:
+ notification_description: |
+ [Gotify](https://gotify.net/) is a self-hosted push notification service created for sending and receiving messages in real time.
+ You can send alerts to your Gotify instance using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - An application token. You can generate a new token in the Gotify Web UI.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_GOTIFY'
+ default_value: 'YES'
+ description: "Set `SEND_GOTIFY` to YES"
+ required: true
+ - name: 'GOTIFY_APP_TOKEN'
+ default_value: ''
+ description: "set `GOTIFY_APP_TOKEN` to the app token you generated."
+ required: true
+ - name: 'GOTIFY_APP_URL'
+ default_value: ''
+ description: "Set `GOTIFY_APP_URL` to point to your Gotify instance, for example `https://push.example.domain/`"
+ required: true
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ SEND_GOTIFY="YES"
+ GOTIFY_APP_TOKEN="XXXXXXXXXXXXXXX"
+ GOTIFY_APP_URL="https://push.example.domain/"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/irc/metadata.yaml b/health/notifications/irc/metadata.yaml
new file mode 100644
index 00000000..aa2593f9
--- /dev/null
+++ b/health/notifications/irc/metadata.yaml
@@ -0,0 +1,100 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-irc'
+ meta:
+ name: 'IRC'
+ link: ''
+ categories:
+ - notify.agent
+ icon_filename: 'irc.png'
+ keywords:
+ - IRC
+ overview:
+ notification_description: |
+ Send notifications to IRC using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The `nc` utility. You can set the path to it, or Netdata will search for it in your system `$PATH`.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'nc path'
+ default_value: ''
+ description: "Set the path for nc, otherwise Netdata will search for it in your system $PATH"
+ required: true
+ detailed_description: |
+ ```sh
+ #------------------------------------------------------------------------------
+ # external commands
+ #
+ # The full path of the nc command.
+ # If empty, the system $PATH will be searched for it.
+ # If not found, irc notifications will be silently disabled.
+ nc="/usr/bin/nc"
+ ```
+ - name: 'SEND_IRC'
+ default_value: 'YES'
+ description: "Set `SEND_IRC` YES."
+ required: true
+ - name: 'IRC_NETWORK'
+ default_value: ''
+ description: "Set `IRC_NETWORK` to the IRC network which your preferred channels belong to."
+ required: true
+ - name: 'IRC_PORT '
+ default_value: ''
+ description: "Set `IRC_PORT` to the IRC port to which a connection will occur."
+ required: false
+ - name: 'IRC_NICKNAME'
+ default_value: ''
+ description: "Set `IRC_NICKNAME` to the IRC nickname which is required to send the notification. It must not be an already registered name as the connection's MODE is defined as a guest."
+ required: true
+ - name: 'IRC_REALNAME'
+ default_value: ''
+ description: "Set `IRC_REALNAME` to the IRC realname which is required in order to make the connection."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_IRC'
+ default_value: ''
+ description: "You can have different channels per role, by editing `DEFAULT_RECIPIENT_IRC` with the channel you want"
+ required: true
+ detailed_description: |
+ The `DEFAULT_RECIPIENT_IRC` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_irc[sysadmin]="#systems"
+ role_recipients_irc[domainadmin]="#domains"
+ role_recipients_irc[dba]="#databases #systems"
+ role_recipients_irc[webmaster]="#marketing #development"
+ role_recipients_irc[proxyadmin]="#proxy-admin"
+ role_recipients_irc[sitemgr]="#sites"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # irc notification options
+ #
+ SEND_IRC="YES"
+ DEFAULT_RECIPIENT_IRC="#system-alarms"
+ IRC_NETWORK="irc.freenode.net"
+ IRC_NICKNAME="netdata-alarm-user"
+ IRC_REALNAME="netdata-user"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/kavenegar/metadata.yaml b/health/notifications/kavenegar/metadata.yaml
new file mode 100644
index 00000000..559dbac0
--- /dev/null
+++ b/health/notifications/kavenegar/metadata.yaml
@@ -0,0 +1,82 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-kavenegar'
+ meta:
+ name: 'Kavenegar'
+ link: 'https://kavenegar.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'kavenegar.png'
+ keywords:
+ - Kavenegar
+ overview:
+ notification_description: |
+ [Kavenegar](https://kavenegar.com/) as service for software developers, based in Iran, provides send and receive SMS, calling voice by using its APIs.
+ You can send notifications to Kavenegar using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The APIKEY and Sender from http://panel.kavenegar.com/client/setting/account
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_KAVENEGAR'
+ default_value: 'YES'
+ description: "Set `SEND_KAVENEGAR` to YES"
+ required: true
+ - name: 'KAVENEGAR_API_KEY'
+ default_value: ''
+ description: "Set `KAVENEGAR_API_KEY` to your API key."
+ required: true
+ - name: 'KAVENEGAR_SENDER'
+ default_value: ''
+ description: "Set `KAVENEGAR_SENDER` to the value of your Sender."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_KAVENEGAR'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_KAVENEGAR` to the SMS recipient you want the alert notifications to be sent to. You can define multiple recipients like this: 09155555555 09177777777."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if lest unconfigured.
+
+ You can then have different SMS recipients per role, by editing `DEFAULT_RECIPIENT_KAVENEGAR` with the SMS recipients you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_kavenegar[sysadmin]="09100000000"
+ role_recipients_kavenegar[domainadmin]="09111111111"
+ role_recipients_kavenegar[dba]="0922222222"
+ role_recipients_kavenegar[webmaster]="0933333333"
+ role_recipients_kavenegar[proxyadmin]="0944444444"
+ role_recipients_kavenegar[sitemgr]="0955555555"
+ ```
+
+ The values you provide should be defined as environments in `/etc/alertad.conf` with `ALLOWED_ENVIRONMENTS` option.
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Kavenegar (Kavenegar.com) SMS options
+
+ SEND_KAVENEGAR="YES"
+ KAVENEGAR_API_KEY="XXXXXXXXXXXX"
+ KAVENEGAR_SENDER="YYYYYYYY"
+ DEFAULT_RECIPIENT_KAVENEGAR="0912345678"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/matrix/metadata.yaml b/health/notifications/matrix/metadata.yaml
new file mode 100644
index 00000000..17135aa3
--- /dev/null
+++ b/health/notifications/matrix/metadata.yaml
@@ -0,0 +1,91 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-matrix'
+ meta:
+ name: 'Matrix'
+ link: 'https://spec.matrix.org/unstable/push-gateway-api/'
+ categories:
+ - notify.agent
+ icon_filename: 'matrix.svg'
+ keywords:
+ - Matrix
+ overview:
+ notification_description: |
+ Send notifications to Matrix network rooms using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The url of the homeserver (`https://homeserver:port`).
+ - Credentials for connecting to the homeserver, in the form of a valid access token for your account (or for a dedicated notification account). These tokens usually don't expire.
+ - The room ids that you want to sent the notification to.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_MATRIX'
+ default_value: 'YES'
+ description: "Set `SEND_MATRIX` to YES"
+ required: true
+ - name: 'MATRIX_HOMESERVER'
+ default_value: ''
+ description: "set `MATRIX_HOMESERVER` to the URL of the Matrix homeserver."
+ required: true
+ - name: 'MATRIX_ACCESSTOKEN'
+ default_value: ''
+ description: "Set `MATRIX_ACCESSTOKEN` to the access token from your Matrix account."
+ required: true
+ detailed_description: |
+ To obtain the access token, you can use the following curl command:
+ ```
+ curl -XPOST -d '{"type":"m.login.password", "user":"example", "password":"wordpass"}' "https://homeserver:8448/_matrix/client/r0/login"
+ ```
+ - name: 'DEFAULT_RECIPIENT_MATRIX'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_MATRIX` to the rooms you want the alert notifications to be sent to. The format is `!roomid:homeservername`."
+ required: true
+ detailed_description: |
+ The room ids are unique identifiers and can be obtained from the room settings in a Matrix client (e.g. Riot).
+
+ You can define multiple rooms like this: `!roomid1:homeservername` `!roomid2:homeservername`.
+
+ All roles will default to this variable if left unconfigured.
+
+ You can have different rooms per role, by editing `DEFAULT_RECIPIENT_MATRIX` with the `!roomid:homeservername` you want, in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_matrix[sysadmin]="!roomid1:homeservername"
+ role_recipients_matrix[domainadmin]="!roomid2:homeservername"
+ role_recipients_matrix[dba]="!roomid3:homeservername"
+ role_recipients_matrix[webmaster]="!roomid4:homeservername"
+ role_recipients_matrix[proxyadmin]="!roomid5:homeservername"
+ role_recipients_matrix[sitemgr]="!roomid6:homeservername"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Matrix notifications
+
+ SEND_MATRIX="YES"
+ MATRIX_HOMESERVER="https://matrix.org:8448"
+ MATRIX_ACCESSTOKEN="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ DEFAULT_RECIPIENT_MATRIX="!XXXXXXXXXXXX:matrix.org"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/messagebird/metadata.yaml b/health/notifications/messagebird/metadata.yaml
new file mode 100644
index 00000000..a97cdc71
--- /dev/null
+++ b/health/notifications/messagebird/metadata.yaml
@@ -0,0 +1,79 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-messagebird'
+ meta:
+ name: 'MessageBird'
+ link: 'https://messagebird.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'messagebird.svg'
+ keywords:
+ - MessageBird
+ overview:
+ notification_description: |
+ Send notifications to MessageBird using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - An access key under 'API ACCESS (REST)' (you will want a live key), you can read more [here](https://developers.messagebird.com/quickstarts/sms/test-credits-api-keys/).
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_MESSAGEBIRD'
+ default_value: 'YES'
+ description: "Set `SEND_MESSAGEBIRD` to YES"
+ required: true
+ - name: 'MESSAGEBIRD_ACCESS_KEY'
+ default_value: ''
+ description: "Set `MESSAGEBIRD_ACCESS_KEY` to your API key."
+ required: true
+ - name: 'MESSAGEBIRD_NUMBER'
+ default_value: ''
+ description: "Set `MESSAGEBIRD_NUMBER` to the MessageBird number you want to use for the alert."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_MESSAGEBIRD'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_MESSAGEBIRD` to the number you want the alert notification to be sent as an SMS. You can define multiple recipients like this: +15555555555 +17777777777."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ You can then have different recipients per role, by editing `DEFAULT_RECIPIENT_MESSAGEBIRD` with the number you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_messagebird[sysadmin]="+15555555555"
+ role_recipients_messagebird[domainadmin]="+15555555556"
+ role_recipients_messagebird[dba]="+15555555557"
+ role_recipients_messagebird[webmaster]="+15555555558"
+ role_recipients_messagebird[proxyadmin]="+15555555559"
+ role_recipients_messagebird[sitemgr]="+15555555550"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Messagebird (messagebird.com) SMS options
+
+ SEND_MESSAGEBIRD="YES"
+ MESSAGEBIRD_ACCESS_KEY="XXXXXXXX"
+ MESSAGEBIRD_NUMBER="XXXXXXX"
+ DEFAULT_RECIPIENT_MESSAGEBIRD="+15555555555"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/msteams/metadata.yaml b/health/notifications/msteams/metadata.yaml
new file mode 100644
index 00000000..72de507a
--- /dev/null
+++ b/health/notifications/msteams/metadata.yaml
@@ -0,0 +1,79 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-teams'
+ meta:
+ name: 'Microsoft Teams'
+ link: 'https://www.microsoft.com/en-us/microsoft-teams/log-in'
+ categories:
+ - notify.agent
+ icon_filename: 'msteams.svg'
+ keywords:
+ - Microsoft
+ - Teams
+ - MS teams
+ overview:
+ notification_description: |
+ You can send Netdata alerts to Microsoft Teams using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The incoming webhook URL as given by Microsoft Teams. You can use the same on all your Netdata servers (or you can have multiple if you like).
+ - One or more channels to post the messages to
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_MSTEAMS'
+ default_value: 'YES'
+ description: "Set `SEND_MSTEAMS` to YES"
+ required: true
+ - name: 'MSTEAMS_WEBHOOK_URL'
+ default_value: ''
+ description: "set `MSTEAMS_WEBHOOK_URL` to the incoming webhook URL as given by Microsoft Teams."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_MSTEAMS'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_MSTEAMS` to the encoded Microsoft Teams channel name you want the alert notifications to be sent to."
+ required: true
+ detailed_description: |
+ In Microsoft Teams the channel name is encoded in the URI after `/IncomingWebhook/`. You can define multiple channels like this: `CHANNEL1` `CHANNEL2`.
+
+ All roles will default to this variable if left unconfigured.
+
+ You can have different channels per role, by editing `DEFAULT_RECIPIENT_MSTEAMS` with the channel you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_msteams[sysadmin]="CHANNEL1"
+ role_recipients_msteams[domainadmin]="CHANNEL2"
+ role_recipients_msteams[dba]="databases CHANNEL3"
+ role_recipients_msteams[webmaster]="CHANNEL4"
+ role_recipients_msteams[proxyadmin]="CHANNEL5"
+ role_recipients_msteams[sitemgr]="CHANNEL6"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Microsoft Teams (office.com) global notification options
+
+ SEND_MSTEAMS="YES"
+ MSTEAMS_WEBHOOK_URL="https://outlook.office.com/webhook/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/IncomingWebhook/CHANNEL/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"
+ DEFAULT_RECIPIENT_MSTEAMS="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/ntfy/metadata.yaml b/health/notifications/ntfy/metadata.yaml
new file mode 100644
index 00000000..cde57fd4
--- /dev/null
+++ b/health/notifications/ntfy/metadata.yaml
@@ -0,0 +1,70 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-ntfy'
+ meta:
+ name: 'ntfy'
+ link: 'https://ntfy.sh/'
+ categories:
+ - notify.agent
+ icon_filename: 'ntfy.svg'
+ keywords:
+ - ntfy
+ overview:
+ notification_description: |
+ [ntfy](https://ntfy.sh/) (pronounce: notify) is a simple HTTP-based [pub-sub](https://en.wikipedia.org/wiki/Publish%E2%80%93subscribe_pattern) notification service. It allows you to send notifications to your phone or desktop via scripts from any computer, entirely without signup, cost or setup. It's also [open source](https://github.com/binwiederhier/ntfy) if you want to run your own server.
+ You can send alerts to an ntfy server using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - (Optional) A [self-hosted ntfy server](https://docs.ntfy.sh/faq/#can-i-self-host-it), in case you don't want to use https://ntfy.sh
+ - A new [topic](https://ntfy.sh/#subscribe) for the notifications to be published to
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_NTFY'
+ default_value: 'YES'
+ description: "Set `SEND_NTFY` to YES"
+ required: true
+ - name: 'DEFAULT_RECIPIENT_NTFY'
+ default_value: ''
+ description: "URL formed by the server-topic combination you want the alert notifications to be sent to. Unless hosting your own server, the server should always be set to https://ntfy.sh."
+ required: true
+ detailed_description: |
+ You can define multiple recipient URLs like this: `https://SERVER1/TOPIC1` `https://SERVER2/TOPIC2`
+
+ All roles will default to this variable if left unconfigured.
+
+ You can then have different servers and/or topics per role, by editing DEFAULT_RECIPIENT_NTFY with the server-topic combination you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_ntfy[sysadmin]="https://SERVER1/TOPIC1"
+ role_recipients_ntfy[domainadmin]="https://SERVER2/TOPIC2"
+ role_recipients_ntfy[dba]="https://SERVER3/TOPIC3"
+ role_recipients_ntfy[webmaster]="https://SERVER4/TOPIC4"
+ role_recipients_ntfy[proxyadmin]="https://SERVER5/TOPIC5"
+ role_recipients_ntfy[sitemgr]="https://SERVER6/TOPIC6"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ SEND_NTFY="YES"
+ DEFAULT_RECIPIENT_NTFY="https://ntfy.sh/netdata-X7seHg7d3Tw9zGOk https://ntfy.sh/netdata-oIPm4IK1IlUtlA30"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/opsgenie/metadata.yaml b/health/notifications/opsgenie/metadata.yaml
new file mode 100644
index 00000000..78bd8c2b
--- /dev/null
+++ b/health/notifications/opsgenie/metadata.yaml
@@ -0,0 +1,60 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-opsgenie'
+ meta:
+ name: 'OpsGenie'
+ link: 'https://www.atlassian.com/software/opsgenie'
+ categories:
+ - notify.agent
+ icon_filename: 'opsgenie.png'
+ keywords:
+ - OpsGenie
+ overview:
+ notification_description: |
+ Opsgenie is an alerting and incident response tool. It is designed to group and filter alarms, build custom routing rules for on-call teams, and correlate deployments and commits to incidents.
+ You can send notifications to Opsgenie using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - An Opsgenie integration. You can create an [integration](https://docs.opsgenie.com/docs/api-integration) in the [Opsgenie](https://www.atlassian.com/software/opsgenie) dashboard.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_OPSGENIE'
+ default_value: 'YES'
+ description: "Set `SEND_OPSGENIE` to YES"
+ required: true
+ - name: 'OPSGENIE_API_KEY'
+ default_value: ''
+ description: "Set `OPSGENIE_API_KEY` to your API key."
+ required: true
+ - name: 'OPSGENIE_API_URL'
+ default_value: 'https://api.opsgenie.com'
+ description: "Set `OPSGENIE_API_URL` to the corresponding URL if required, for example there are region-specific API URLs such as `https://eu.api.opsgenie.com`."
+ required: false
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ SEND_OPSGENIE="YES"
+ OPSGENIE_API_KEY="11111111-2222-3333-4444-555555555555"
+ OPSGENIE_API_URL=""
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/pagerduty/metadata.yaml b/health/notifications/pagerduty/metadata.yaml
new file mode 100644
index 00000000..6fc1d640
--- /dev/null
+++ b/health/notifications/pagerduty/metadata.yaml
@@ -0,0 +1,73 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-pagerduty'
+ meta:
+ name: 'PagerDuty'
+ link: 'https://www.pagerduty.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'pagerduty.png'
+ keywords:
+ - PagerDuty
+ overview:
+ notification_description: |
+ PagerDuty is an enterprise incident resolution service that integrates with ITOps and DevOps monitoring stacks to improve operational reliability and agility. From enriching and aggregating events to correlating them into incidents, PagerDuty streamlines the incident management process by reducing alert noise and resolution times.
+ You can send notifications to PagerDuty using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - An installation of the [PagerDuty](https://www.pagerduty.com/docs/guides/agent-install-guide/) agent on the node running the Netdata Agent
+ - A PagerDuty Generic API service using either the `Events API v2` or `Events API v1`
+ - [Add a new service](https://support.pagerduty.com/docs/services-and-integrations#section-configuring-services-and-integrations) to PagerDuty. Click Use our API directly and select either `Events API v2` or `Events API v1`. Once you finish creating the service, click on the Integrations tab to find your Integration Key.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_PD'
+ default_value: 'YES'
+ description: "Set `SEND_PD` to YES"
+ required: true
+ - name: 'DEFAULT_RECIPIENT_PD'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_PD` to the PagerDuty service key you want the alert notifications to be sent to. You can define multiple service keys like this: `pd_service_key_1` `pd_service_key_2`."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_PD` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_pd[sysadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxa"
+ role_recipients_pd[domainadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxb"
+ role_recipients_pd[dba]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxc"
+ role_recipients_pd[webmaster]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxd"
+ role_recipients_pd[proxyadmin]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxe"
+ role_recipients_pd[sitemgr]="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxf"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # pagerduty.com notification options
+
+ SEND_PD="YES"
+ DEFAULT_RECIPIENT_PD="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ USE_PD_VERSION="2"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/prowl/metadata.yaml b/health/notifications/prowl/metadata.yaml
new file mode 100644
index 00000000..b3f0e0a1
--- /dev/null
+++ b/health/notifications/prowl/metadata.yaml
@@ -0,0 +1,71 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-prowl'
+ meta:
+ name: 'Prowl'
+ link: 'https://www.prowlapp.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'prowl.png'
+ keywords:
+ - Prowl
+ overview:
+ notification_description: |
+ Send notifications to Prowl using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: |
+ - Because of how Netdata integrates with Prowl, there is a hard limit of at most 1000 notifications per hour (starting from the first notification sent). Any alerts beyond the first thousand in an hour will be dropped.
+ - Warning messages will be sent with the 'High' priority, critical messages will be sent with the 'Emergency' priority, and all other messages will be sent with the normal priority. Opening the notification's associated URL will take you to the Netdata dashboard of the system that issued the alert, directly to the chart that it triggered on.
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A Prowl API key, which can be requested through the Prowl website after registering
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_PROWL'
+ default_value: 'YES'
+ description: "Set `SEND_PROWL` to YES"
+ required: true
+ - name: 'DEFAULT_RECIPIENT_PROWL'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_PROWL` to the Prowl API key you want the alert notifications to be sent to. You can define multiple API keys like this: `APIKEY1`, `APIKEY2`."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_PROWL` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_prowl[sysadmin]="AAAAAAAA"
+ role_recipients_prowl[domainadmin]="BBBBBBBBB"
+ role_recipients_prowl[dba]="CCCCCCCCC"
+ role_recipients_prowl[webmaster]="DDDDDDDDDD"
+ role_recipients_prowl[proxyadmin]="EEEEEEEEEE"
+ role_recipients_prowl[sitemgr]="FFFFFFFFFF"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # iOS Push Notifications
+
+ SEND_PROWL="YES"
+ DEFAULT_RECIPIENT_PROWL="XXXXXXXXXX"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/pushbullet/metadata.yaml b/health/notifications/pushbullet/metadata.yaml
new file mode 100644
index 00000000..430033cc
--- /dev/null
+++ b/health/notifications/pushbullet/metadata.yaml
@@ -0,0 +1,76 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-pushbullet'
+ meta:
+ name: 'Pushbullet'
+ link: 'https://www.pushbullet.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'pushbullet.png'
+ keywords:
+ - Pushbullet
+ overview:
+ notification_description: |
+ Send notifications to Pushbullet using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A Pushbullet access token that can be created in your [account settings](https://www.pushbullet.com/#settings/account).
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'Send_PUSHBULLET'
+ default_value: 'YES'
+ description: "Set `Send_PUSHBULLET` to YES"
+ required: true
+ - name: 'PUSHBULLET_ACCESS_TOKEN'
+ default_value: ''
+ description: "set `PUSHBULLET_ACCESS_TOKEN` to the access token you generated."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_PUSHBULLET'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_PUSHBULLET` to the email (e.g. `example@domain.com`) or the channel tag (e.g. `#channel`) you want the alert notifications to be sent to."
+ required: true
+ detailed_description: |
+ You can define multiple entries like this: user1@email.com user2@email.com.
+
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_PUSHBULLET` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_pushbullet[sysadmin]="user1@email.com"
+ role_recipients_pushbullet[domainadmin]="user2@mail.com"
+ role_recipients_pushbullet[dba]="#channel1"
+ role_recipients_pushbullet[webmaster]="#channel2"
+ role_recipients_pushbullet[proxyadmin]="user3@mail.com"
+ role_recipients_pushbullet[sitemgr]="user4@mail.com"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # pushbullet (pushbullet.com) push notification options
+
+ SEND_PUSHBULLET="YES"
+ PUSHBULLET_ACCESS_TOKEN="XXXXXXXXX"
+ DEFAULT_RECIPIENT_PUSHBULLET="admin1@example.com admin3@somemail.com #examplechanneltag #anotherchanneltag"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/pushover/metadata.yaml b/health/notifications/pushover/metadata.yaml
new file mode 100644
index 00000000..9af729ea
--- /dev/null
+++ b/health/notifications/pushover/metadata.yaml
@@ -0,0 +1,78 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-pushover'
+ meta:
+ name: 'PushOver'
+ link: 'https://pushover.net/'
+ categories:
+ - notify.agent
+ icon_filename: 'pushover.png'
+ keywords:
+ - PushOver
+ overview:
+ notification_description: |
+ Send notification to Pushover using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ - Netdata will send warning messages with priority 0 and critical messages with priority 1.
+ - Pushover allows you to select do-not-disturb hours. The way this is configured, critical notifications will ring and vibrate your phone, even during the do-not-disturb-hours.
+ - All other notifications will be delivered silently.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - An Application token. You can use the same on all your Netdata servers.
+ - A User token for each user you are going to send notifications to. This is the actual recipient of the notification.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_PUSHOVER'
+ default_value: 'YES'
+ description: "Set `SEND_PUSHOVER` to YES"
+ required: true
+ - name: 'PUSHOVER_WEBHOOK_URL'
+ default_value: ''
+ description: "set `PUSHOVER_WEBHOOK_URL` to your Pushover Application token."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_PUSHOVER'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_PUSHOVER` the Pushover User token you want the alert notifications to be sent to. You can define multiple User tokens like this: `USERTOKEN1` `USERTOKEN2`."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_PUSHOVER` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_pushover[sysadmin]="USERTOKEN1"
+ role_recipients_pushover[domainadmin]="USERTOKEN2"
+ role_recipients_pushover[dba]="USERTOKEN3 USERTOKEN4"
+ role_recipients_pushover[webmaster]="USERTOKEN5"
+ role_recipients_pushover[proxyadmin]="USERTOKEN6"
+ role_recipients_pushover[sitemgr]="USERTOKEN7"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # pushover (pushover.net) global notification options
+
+ SEND_PUSHOVER="YES"
+ PUSHOVER_APP_TOKEN="XXXXXXXXX"
+ DEFAULT_RECIPIENT_PUSHOVER="USERTOKEN"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/rocketchat/metadata.yaml b/health/notifications/rocketchat/metadata.yaml
new file mode 100644
index 00000000..f644b93e
--- /dev/null
+++ b/health/notifications/rocketchat/metadata.yaml
@@ -0,0 +1,75 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-rocketchat'
+ meta:
+ name: 'RocketChat'
+ link: 'https://rocket.chat/'
+ categories:
+ - notify.agent
+ icon_filename: 'rocketchat.png'
+ keywords:
+ - RocketChat
+ overview:
+ notification_description: |
+ Send notifications to Rocket.Chat using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - The incoming webhook URL as given by RocketChat. You can use the same on all your Netdata servers (or you can have multiple if you like - your decision).
+ - One or more channels to post the messages to
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_ROCKETCHAT'
+ default_value: 'YES'
+ description: "Set `SEND_ROCKETCHAT` to `YES`"
+ required: true
+ - name: 'ROCKETCHAT_WEBHOOK_URL'
+ default_value: ''
+ description: "set `ROCKETCHAT_WEBHOOK_URL` to your webhook URL."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_ROCKETCHAT'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_ROCKETCHAT` to the channel you want the alert notifications to be sent to. You can define multiple channels like this: `alerts` `systems`."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_ROCKETCHAT` can be edited in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_rocketchat[sysadmin]="systems"
+ role_recipients_rocketchat[domainadmin]="domains"
+ role_recipients_rocketchat[dba]="databases systems"
+ role_recipients_rocketchat[webmaster]="marketing development"
+ role_recipients_rocketchat[proxyadmin]="proxy_admin"
+ role_recipients_rocketchat[sitemgr]="sites"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # rocketchat (rocket.chat) global notification options
+
+ SEND_ROCKETCHAT="YES"
+ ROCKETCHAT_WEBHOOK_URL="<your_incoming_webhook_url>"
+ DEFAULT_RECIPIENT_ROCKETCHAT="monitoring_alarms"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/sample-metadata.yaml b/health/notifications/sample-metadata.yaml
new file mode 100644
index 00000000..41a287ae
--- /dev/null
+++ b/health/notifications/sample-metadata.yaml
@@ -0,0 +1,39 @@
+id: ''
+meta:
+ name: ''
+ link: ''
+ categories: []
+ icon_filename: ''
+keywords: []
+overview:
+ exporter_description: ''
+ exporter_limitations: ''
+setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: ''
+ configuration:
+ file:
+ name: ''
+ description: ''
+ options:
+ description: ''
+ folding:
+ title: ''
+ enabled: true
+ list:
+ - name: ''
+ default_value: ''
+ description: ''
+ required: false
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: ''
+ folding:
+ enabled: false
+ description: ''
+ config: ''
diff --git a/health/notifications/slack/metadata.yaml b/health/notifications/slack/metadata.yaml
new file mode 100644
index 00000000..226c7ca3
--- /dev/null
+++ b/health/notifications/slack/metadata.yaml
@@ -0,0 +1,63 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-slack'
+ meta:
+ name: 'Slack'
+ link: 'https://slack.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'slack.png'
+ keywords:
+ - Slack
+ overview:
+ notification_description: |
+ Send notifications to a Slack workspace using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - Slack app along with an incoming webhook, read Slack's guide on the topic [here](https://api.slack.com/messaging/webhooks).
+ - One or more channels to post the messages to
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_SLACK'
+ default_value: 'YES'
+ description: "Set `SEND_SLACK` to YES"
+ required: true
+ - name: 'SLACK_WEBHOOK_URL'
+ default_value: ''
+ description: "set `SLACK_WEBHOOK_URL` to your Slack app's webhook URL."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_SLACK'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_SLACK` to the Slack channel your Slack app is set to send messages to. The syntax for channels is `#channel` or `channel`."
+ required: true
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # slack (slack.com) global notification options
+
+ SEND_SLACK="YES"
+ SLACK_WEBHOOK_URL="https://hooks.slack.com/services/XXXXXXXX/XXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ DEFAULT_RECIPIENT_SLACK="#alarms"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/smstools3/metadata.yaml b/health/notifications/smstools3/metadata.yaml
new file mode 100644
index 00000000..3a29183a
--- /dev/null
+++ b/health/notifications/smstools3/metadata.yaml
@@ -0,0 +1,84 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-sms'
+ meta:
+ name: 'SMS'
+ link: 'http://smstools3.kekekasvi.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'sms.svg'
+ keywords:
+ - SMS tools 3
+ - SMS
+ - Messaging
+ overview:
+ notification_description: |
+ Send notifications to `smstools3` using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ The SMS Server Tools 3 is a SMS Gateway software which can send and receive short messages through GSM modems and mobile phones.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - [Install](http://smstools3.kekekasvi.com/index.php?p=compiling) and [configure](http://smstools3.kekekasvi.com/index.php?p=configure) `smsd`
+ - To ensure that the user `netdata` can execute `sendsms`. Any user executing `sendsms` needs to:
+ - Have write permissions to /tmp and /var/spool/sms/outgoing
+ - Be a member of group smsd
+ - To ensure that the steps above are successful, just su netdata and execute sendsms phone message.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'sendsms'
+ default_value: 'YES'
+ description: "Set the path for `sendsms`, otherwise Netdata will search for it in your system `$PATH:`"
+ required: true
+ detailed_description: |
+ # The full path of the sendsms command (smstools3).
+ # If empty, the system $PATH will be searched for it.
+ # If not found, SMS notifications will be silently disabled.
+ sendsms="/usr/bin/sendsms"
+ - name: 'SEND_SMS'
+ default_value: ''
+ description: "Set `SEND_SMS` to `YES`."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_SMS'
+ default_value: ''
+ description: "Set DEFAULT_RECIPIENT_SMS to the phone number you want the alert notifications to be sent to. You can define multiple phone numbers like this: PHONE1 PHONE2."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ You can then have different phone numbers per role, by editing `DEFAULT_RECIPIENT_SMS` with the phone number you want, in the following entries at the bottom of the same file:
+ ```conf
+ role_recipients_sms[sysadmin]="PHONE1"
+ role_recipients_sms[domainadmin]="PHONE2"
+ role_recipients_sms[dba]="PHONE3"
+ role_recipients_sms[webmaster]="PHONE4"
+ role_recipients_sms[proxyadmin]="PHONE5"
+ role_recipients_sms[sitemgr]="PHONE6"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # SMS Server Tools 3 (smstools3) global notification options
+ SEND_SMS="YES"
+ DEFAULT_RECIPIENT_SMS="1234567890"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/syslog/metadata.yaml b/health/notifications/syslog/metadata.yaml
new file mode 100644
index 00000000..c5f241e7
--- /dev/null
+++ b/health/notifications/syslog/metadata.yaml
@@ -0,0 +1,88 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-syslog'
+ meta:
+ name: 'syslog'
+ link: ''
+ categories:
+ - notify.agent
+ icon_filename: 'syslog.png'
+ keywords:
+ - syslog
+ overview:
+ notification_description: |
+ Send notifications to Syslog using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A working `logger` command for this to work. This is the case on pretty much every Linux system in existence, and most BSD systems.
+ - Access to the terminal where Netdata Agent is running
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SYSLOG_FACILITY'
+ default_value: ''
+ description: "Set `SYSLOG_FACILITY` to the facility used for logging, by default this value is set to `local6`."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_SYSLOG'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_SYSLOG` to the recipient you want the alert notifications to be sent to."
+ required: true
+ detailed_description: |
+ Targets are defined as follows:
+
+ ```
+ [[facility.level][@host[:port]]/]prefix
+ ```
+
+ prefix defines what the log messages are prefixed with. By default, all lines are prefixed with 'netdata'.
+
+ The facility and level are the standard syslog facility and level options, for more info on them see your local logger and syslog documentation. By default, Netdata will log to the local6 facility, with a log level dependent on the type of message (crit for CRITICAL, warning for WARNING, and info for everything else).
+
+ You can configure sending directly to remote log servers by specifying a host (and optionally a port). However, this has a somewhat high overhead, so it is much preferred to use your local syslog daemon to handle the forwarding of messages to remote systems (pretty much all of them allow at least simple forwarding, and most of the really popular ones support complex queueing and routing of messages to remote log servers).
+
+ You can define multiple recipients like this: daemon.notice@loghost:514/netdata daemon.notice@loghost2:514/netdata.
+ All roles will default to this variable if left unconfigured.
+ - name: 'SEND_SYSLOG '
+ default_value: ''
+ description: "Set SEND_SYSLOG to YES, make sure you have everything else configured before turning this on."
+ required: true
+ detailed_description: |
+ You can then have different recipients per role, by editing DEFAULT_RECIPIENT_SYSLOG with the recipient you want, in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_syslog[sysadmin]="daemon.notice@loghost1:514/netdata"
+ role_recipients_syslog[domainadmin]="daemon.notice@loghost2:514/netdata"
+ role_recipients_syslog[dba]="daemon.notice@loghost3:514/netdata"
+ role_recipients_syslog[webmaster]="daemon.notice@loghost4:514/netdata"
+ role_recipients_syslog[proxyadmin]="daemon.notice@loghost5:514/netdata"
+ role_recipients_syslog[sitemgr]="daemon.notice@loghost6:514/netdata"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # syslog notifications
+
+ SEND_SYSLOG="YES"
+ SYSLOG_FACILITY='local6'
+ DEFAULT_RECIPIENT_SYSLOG="daemon.notice@loghost6:514/netdata"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/telegram/metadata.yaml b/health/notifications/telegram/metadata.yaml
new file mode 100644
index 00000000..23fce2a8
--- /dev/null
+++ b/health/notifications/telegram/metadata.yaml
@@ -0,0 +1,77 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-telegram'
+ meta:
+ name: 'Telegram'
+ link: 'https://telegram.org/'
+ categories:
+ - notify.agent
+ icon_filename: 'telegram.svg'
+ keywords:
+ - Telegram
+ overview:
+ notification_description: |
+ Send notifications to Telegram using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - A bot token. To get one, contact the [@BotFather](https://t.me/BotFather) bot and send the command `/newbot` and follow the instructions. Start a conversation with your bot or invite it into a group where you want it to send messages.
+ - The chat ID for every chat you want to send messages to. Contact the [@myidbot](https://t.me/myidbot) bot and send the `/getid` command to get your personal chat ID or invite it into a group and use the `/getgroupid` command to get the group chat ID. Group IDs start with a hyphen, supergroup IDs start with `-100`.
+ - Alternatively, you can get the chat ID directly from the bot API. Send your bot a command in the chat you want to use, then check `https://api.telegram.org/bot{YourBotToken}/getUpdates`, eg. `https://api.telegram.org/bot111122223:7OpFlFFRzRBbrUUmIjj5HF9Ox2pYJZy5/getUpdates`
+ - Terminal access to the Agent you wish to configure
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_TELEGRAM'
+ default_value: 'YES'
+ description: "Set `SEND_TELEGRAM` to YES"
+ required: true
+ - name: 'TELEGRAM_BOT_TOKEN'
+ default_value: ''
+ description: "set `TELEGRAM_BOT_TOKEN` to your bot token."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_TELEGRAM'
+ default_value: ''
+ description: "Set `DEFAULT_RECIPIENT_TELEGRAM` to the chat ID you want the alert notifications to be sent to. You can define multiple chat IDs like this: 49999333322 -1009999222255."
+ required: true
+ detailed_description: |
+ All roles will default to this variable if left unconfigured.
+
+ The `DEFAULT_RECIPIENT_CUSTOM` can be edited in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_telegram[sysadmin]="49999333324"
+ role_recipients_telegram[domainadmin]="49999333389"
+ role_recipients_telegram[dba]="-1009999222255"
+ role_recipients_telegram[webmaster]="-1009999222255 49999333389"
+ role_recipients_telegram[proxyadmin]="49999333344"
+ role_recipients_telegram[sitemgr]="49999333876"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # telegram (telegram.org) global notification options
+
+ SEND_TELEGRAM="YES"
+ TELEGRAM_BOT_TOKEN="111122223:7OpFlFFRzRBbrUUmIjj5HF9Ox2pYJZy5"
+ DEFAULT_RECIPIENT_TELEGRAM="-100233335555"
+ troubleshooting:
+ problems:
+ list: []
diff --git a/health/notifications/twilio/metadata.yaml b/health/notifications/twilio/metadata.yaml
new file mode 100644
index 00000000..35fc3f04
--- /dev/null
+++ b/health/notifications/twilio/metadata.yaml
@@ -0,0 +1,83 @@
+# yamllint disable rule:line-length
+---
+- id: 'notify-twilio'
+ meta:
+ name: 'Twilio'
+ link: 'https://www.twilio.com/'
+ categories:
+ - notify.agent
+ icon_filename: 'twilio.png'
+ keywords:
+ - Twilio
+ overview:
+ notification_description: |
+ Send notifications to Twilio using Netdata's Agent alert notification feature, which supports dozens of endpoints, user roles, and more.
+ notification_limitations: ''
+ setup:
+ prerequisites:
+ list:
+ - title: ''
+ description: |
+ - Get your SID, and Token from https://www.twilio.com/console
+ - Terminal access to the Agent you wish to configure
+ configuration:
+ file:
+ name: 'health_alarm_notify.conf'
+ options:
+ description: 'The following options can be defined for this notification'
+ folding:
+ title: 'Config Options'
+ enabled: true
+ list:
+ - name: 'SEND_TWILIO'
+ default_value: 'YES'
+ description: "Set `SEND_TWILIO` to YES"
+ required: true
+ - name: 'TWILIO_ACCOUNT_SID'
+ default_value: ''
+ description: "set `TWILIO_ACCOUNT_SID` to your account SID."
+ required: true
+ - name: 'TWILIO_ACCOUNT_TOKEN '
+ default_value: ''
+ description: "Set `TWILIO_ACCOUNT_TOKEN` to your account token."
+ required: true
+ - name: 'TWILIO_NUMBER'
+ default_value: ''
+ description: "Set `TWILIO_NUMBER` to your account's number."
+ required: true
+ - name: 'DEFAULT_RECIPIENT_TWILIO'
+ default_value: ''
+ description: "Set DEFAULT_RECIPIENT_TWILIO to the number you want the alert notifications to be sent to. You can define multiple numbers like this: +15555555555 +17777777777."
+ required: true
+ detailed_description: |
+ You can then have different recipients per role, by editing DEFAULT_RECIPIENT_TWILIO with the recipient's number you want, in the following entries at the bottom of the same file:
+
+ ```conf
+ role_recipients_twilio[sysadmin]="+15555555555"
+ role_recipients_twilio[domainadmin]="+15555555556"
+ role_recipients_twilio[dba]="+15555555557"
+ role_recipients_twilio[webmaster]="+15555555558"
+ role_recipients_twilio[proxyadmin]="+15555555559"
+ role_recipients_twilio[sitemgr]="+15555555550"
+ ```
+ examples:
+ folding:
+ enabled: true
+ title: ''
+ list:
+ - name: 'Basic Configuration'
+ folding:
+ enabled: false
+ description: ''
+ config: |
+ #------------------------------------------------------------------------------
+ # Twilio (twilio.com) SMS options
+
+ SEND_TWILIO="YES"
+ TWILIO_ACCOUNT_SID="xxxxxxxxx"
+ TWILIO_ACCOUNT_TOKEN="xxxxxxxxxx"
+ TWILIO_NUMBER="xxxxxxxxxxx"
+ DEFAULT_RECIPIENT_TWILIO="+15555555555"
+ troubleshooting:
+ problems:
+ list: []