summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2021-02-18 15:22:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2021-02-18 15:22:14 +0000
commit970987c0d6e99ef0db866a09396d5613042a94d1 (patch)
treeaf21bdbc146083814d19b8d553f15e671ae7fd24 /health
parentReleasing debian version 1.29.1-3. (diff)
downloadnetdata-970987c0d6e99ef0db866a09396d5613042a94d1.tar.xz
netdata-970987c0d6e99ef0db866a09396d5613042a94d1.zip
Merging upstream version 1.29.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'health')
-rw-r--r--health/health.d/tcp_resets.conf4
-rw-r--r--health/health_config.c8
-rw-r--r--health/health_json.c15
-rwxr-xr-xhealth/notifications/alarm-notify.sh.in8
-rwxr-xr-xhealth/notifications/health_alarm_notify.conf1
-rw-r--r--health/notifications/opsgenie/README.md6
6 files changed, 28 insertions, 14 deletions
diff --git a/health/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf
index 91dad3c6..36a550a5 100644
--- a/health/health.d/tcp_resets.conf
+++ b/health/health.d/tcp_resets.conf
@@ -36,7 +36,7 @@
units: tcp resets/s
every: 10s
warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING) ? (1) : (20)))
- delay: up 0 down 60m multiplier 1.2 max 2h
+ delay: up 20s down 60m multiplier 1.2 max 2h
options: no-clear-notification
info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed; clear notification for this alarm will not be sent)
to: sysadmin
@@ -61,7 +61,7 @@
units: tcp resets/s
every: 10s
warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING) ? (1) : (10)))
- delay: up 0 down 60m multiplier 1.2 max 2h
+ delay: up 20s down 60m multiplier 1.2 max 2h
options: no-clear-notification
info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed; clear notification for this alarm will not be sent)
to: sysadmin
diff --git a/health/health_config.c b/health/health_config.c
index a200a0db..1acf3693 100644
--- a/health/health_config.c
+++ b/health/health_config.c
@@ -1023,5 +1023,13 @@ void health_readdir(RRDHOST *host, const char *user_path, const char *stock_path
return;
}
+ int stock_enabled = (int)config_get_boolean(CONFIG_SECTION_HEALTH, "enable stock health configuration",
+ CONFIG_BOOLEAN_YES);
+
+ if (!stock_enabled) {
+ info("Netdata will not load stock alarms.");
+ stock_path = user_path;
+ }
+
recursive_config_double_dir_load(user_path, stock_path, subpath, health_readfile, (void *) host, 0);
}
diff --git a/health/health_json.c b/health/health_json.c
index d068b542..7b5a1e3c 100644
--- a/health/health_json.c
+++ b/health/health_json.c
@@ -352,14 +352,15 @@ void health_active_log_alarms_2json(RRDHOST *host, BUFFER *wb) {
unsigned int count = 0;
ALARM_ENTRY *ae;
for(ae = host->health_log.alarms; ae && count < max ; ae = ae->next) {
-
- if(likely(!((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL)
- && !ae->updated_by_id)))
- continue;
-
- if(likely(count)) buffer_strcat(wb, ",");
+ if (!ae->updated_by_id &&
+ ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
+ ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) &&
+ ae->new_status == RRDCALC_STATUS_REMOVED))) {
+ if (likely(count))
+ buffer_strcat(wb, ",");
health_alarm_entry2json_nolock(wb, ae, host);
- count++;
+ count++;
+ }
}
buffer_strcat(wb, "]");
diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in
index 456e20cc..3bf8db5f 100755
--- a/health/notifications/alarm-notify.sh.in
+++ b/health/notifications/alarm-notify.sh.in
@@ -411,6 +411,8 @@ else
done
fi
+OPSGENIE_API_URL=${OPSGENIE_API_URL:-"https://api.opsgenie.com"}
+
# If we didn't autodetect the character set for e-mail and it wasn't
# set by the user, we need to set it to a reasonable default. UTF-8
# should be correct for almost all modern UNIX systems.
@@ -853,7 +855,7 @@ send_email() {
fi
[ -n "${sender_email}" ] && opts+=(-f "${sender_email}")
- [ -n "${sender_name}" ] && opts+=(-F "${sender_name}")
+ [ -n "${sender_name}" ] && sendmail --help 2>&1 | grep -q "\-F " && opts+=(-F "${sender_name}")
if [ "${debug}" = "1" ]; then
echo >&2 "--- BEGIN sendmail command ---"
@@ -2052,7 +2054,7 @@ send_dynatrace() {
local dynatrace_url="${DYNATRACE_SERVER}/e/${DYNATRACE_SPACE}/api/v1/events"
local description="NetData Notification for: ${host} ${chart}.${name} is ${status}"
local payload=""
-
+
payload=$(cat <<EOF
{
"title": "NetData Alarm from ${host}",
@@ -2179,7 +2181,7 @@ send_opsgenie() {
EOF
)
- httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "https://api.opsgenie.com/v1/json/integrations/webhooks/netdata?apiKey=${OPSGENIE_API_KEY}")
+ httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "${OPSGENIE_API_URL}/v1/json/integrations/webhooks/netdata?apiKey=${OPSGENIE_API_KEY}")
# https://docs.opsgenie.com/docs/alert-api#create-alert
if [ "${httpcode}" = "200" ]; then
info "sent opsgenie notification for: ${host} ${chart}.${name} is ${status}"
diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf
index 827a47d9..be669e13 100755
--- a/health/notifications/health_alarm_notify.conf
+++ b/health/notifications/health_alarm_notify.conf
@@ -284,6 +284,7 @@ SEND_OPSGENIE="YES"
# Api key
OPSGENIE_API_KEY=""
+OPSGENIE_API_URL=""
DEFAULT_RECIPIENT_OPSGENIE=""
diff --git a/health/notifications/opsgenie/README.md b/health/notifications/opsgenie/README.md
index aeb31548..7ae409df 100644
--- a/health/notifications/opsgenie/README.md
+++ b/health/notifications/opsgenie/README.md
@@ -20,14 +20,16 @@ directory](/docs/configure/nodes.md):
./edit-config health_alarm_notify.conf
```
-Change the variable `OPSGENIE_API_KEY` with the API key you got from Opsgenie.
+Change the variable `OPSGENIE_API_KEY` with the API key you got from Opsgenie.
+`OPSGENIE_API_URL` defaults to https://api.opsgenie.com, however there are region specific API URLs such as https://eu.api.opsgenie.com, so set this if required.
```
SEND_OPSGENIE="YES"
# Api key
-# Default Opsgenie APi
+# Default Opsgenie API
OPSGENIE_API_KEY="11111111-2222-3333-4444-555555555555"
+OPSGENIE_API_URL=""
```
Changes to `health_alarm_notify.conf` do not require a Netdata restart. You can test your Opsgenie notifications