From e970e0b37b8bd7f246feb3f70c4136418225e434 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 1 Dec 2021 07:15:04 +0100 Subject: Adding upstream version 1.32.0. Signed-off-by: Daniel Baumann --- health/notifications/alarm-notify.sh.in | 888 ++++++++++++++++++++++++++++---- 1 file changed, 787 insertions(+), 101 deletions(-) (limited to 'health/notifications/alarm-notify.sh.in') diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in index 9a3a80ad6..08a32ff10 100755 --- a/health/notifications/alarm-notify.sh.in +++ b/health/notifications/alarm-notify.sh.in @@ -239,6 +239,11 @@ else calc_param_values="${22}" # the values of the parameters in the expression, at the time of the evaluation total_warnings="${23}" # Total number of alarms in WARNING state total_critical="${24}" # Total number of alarms in CRITICAL state + total_warn_alarms="${25}" # List of alarms in warning state + total_crit_alarms="${26}" # List of alarms in critical state + classification="${27}" # The class field from .conf files + edit_command_line="${28}" # The command to edit the alarm, with the line number + sender_host="${29}" # The host sending this notification fi # ----------------------------------------------------------------------------- @@ -252,6 +257,17 @@ else host="${args_host}" fi +# ----------------------------------------------------------------------------- +# Do the same for sender_host (find a suitable hostname to use, if netdata did not supply a hostname) + +if [ -z ${sender_host} ]; then + this_host=$(hostname -s 2>/dev/null) + s_host="${this_host}" + sender_host="${this_host}" +else + s_host="${sender_host}" +fi + # ----------------------------------------------------------------------------- # screen statuses we don't need to send a notification @@ -303,7 +319,7 @@ SLACK_WEBHOOK_URL= # Microsoft Teams configs MSTEAMS_WEBHOOK_URL= -# Legacy Microsoft Teams configs for backwards compatability: +# Legacy Microsoft Teams configs for backwards compatibility: declare -A role_recipients_msteam # rocketchat configs @@ -810,6 +826,14 @@ date=$(date --date=@${when} "${date_format}" 2>/dev/null) [ -z "${date}" ] && date=$(date --date=@${when} 2>/dev/null) [ -z "${date}" ] && date=$(date 2>/dev/null) +# ----------------------------------------------------------------------------- +# get the date in utc the alarm happened + +date_utc=$(date --date=@${when} "${date_format}" -u 2>/dev/null) +[ -z "${date_utc}" ] && date_utc=$(date -u "${date_format}" 2>/dev/null) +[ -z "${date_utc}" ] && date_utc=$(date -u --date=@${when} 2>/dev/null) +[ -z "${date_utc}" ] && date_utc=$(date -u 2>/dev/null) + # ---------------------------------------------------------------------------- # prepare some extra headers if we've been asked to thread e-mails if [ "${SEND_EMAIL}" == "YES" ] && [ "${EMAIL_THREADING}" != "NO" ]; then @@ -915,7 +939,7 @@ send_email() { fi [ -n "${sender_email}" ] && opts+=(-f "${sender_email}") - [ -n "${sender_name}" ] && sendmail --help 2>&1 | grep -q "\-F " && opts+=(-F "${sender_name}") + [ -n "${sender_name}" ] && ${sendmail} -F 2>&1 | head -1 | grep -qv "sendmail: unrecognized option: F" && opts+=(-F "${sender_name}") if [ "${debug}" = "1" ]; then echo >&2 "--- BEGIN sendmail command ---" @@ -1364,15 +1388,15 @@ EOF )" # Replacing in the webhook CHANNEL string by the MS Teams channel name from conf file. - webhook="${webhook//CHANNEL/${channel}}" + cur_webhook="${webhook//CHANNEL/${channel}}" - httpcode=$(docurl -H "Content-Type: application/json" -d "${payload}" "${webhook}") + httpcode=$(docurl -H "Content-Type: application/json" -d "${payload}" "${cur_webhook}") if [ "${httpcode}" = "200" ]; then - info "sent Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${webhook}'" + info "sent Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${cur_webhook}'" sent=$((sent + 1)) else - error "failed to send Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${webhook}', with HTTP response status code ${httpcode}." + error "failed to send Microsoft team notification for: ${host} ${chart}.${name} is ${status} to '${cur_webhook}', with HTTP response status code ${httpcode}." fi done @@ -2113,12 +2137,12 @@ send_dynatrace() { [ "${SEND_DYNATRACE}" != "YES" ] && return 1 local dynatrace_url="${DYNATRACE_SERVER}/e/${DYNATRACE_SPACE}/api/v1/events" - local description="NetData Notification for: ${host} ${chart}.${name} is ${status}" + local description="Netdata Notification for: ${host} ${chart}.${name} is ${status}" local payload="" payload=$(cat </dev/null url_family="${REPLY}" urlencode "${name}" >/dev/null url_name="${REPLY}" +urlencode "${value_string}" >/dev/null +url_value_string="${REPLY}" -redirect_params="host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}&alarm_when=${when}" +redirect_params="host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}&alarm_when=${when}&alarm_status=${status}&alarm_chart=${chart}&alarm_value=${url_value_string}" GOTOCLOUD=0 if [ "${NETDATA_REGISTRY_URL}" == "https://registry.my-netdata.io" ]; then @@ -2284,9 +2310,9 @@ fi if [ ${GOTOCLOUD} -eq 0 ]; then goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?${redirect_params}" else - # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud - #goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentID=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}" - goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}" + # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud + # Re-allow alarm redirection, for alarms 2.0, new template + goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}" fi # the severity of the alarm @@ -2311,48 +2337,79 @@ alarm="${name//_/ } = ${value_string}" # the image of the alarm image="${images_base_url}/images/banner-icon-144x144.png" +# have a default email status, in case the following case does not catch it +status_email_subject="${status}" + # prepare the title based on status case "${status}" in CRITICAL) image="${images_base_url}/images/alert-128-red.png" + alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_critical.png" status_message="is critical" + status_email_subject="Critical" color="#ca414b" + rich_status_raised_for="Raised to critical, for ${non_clear_duration_txt}" + background_color="#FFEBEF" + border_color="#FF4136" + text_color="#FF4136" + action_text_color="#FFFFFF" ;; WARNING) image="${images_base_url}/images/alert-128-orange.png" + alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_warning.png" status_message="needs attention" + status_email_subject="Warning" color="#ffc107" + rich_status_raised_for="Raised to warning, for ${non_clear_duration_txt}" + background_color="#FFF8E1" + border_color="#FFC300" + text_color="#536775" + action_text_color="#35414A" ;; CLEAR) image="${images_base_url}/images/check-mark-2-128-green.png" + alarm_badge="${NETDATA_REGISTRY_CLOUD_BASE_URL}/static/email/img/label_recovered.png" status_message="recovered" + status_email_subject="Clear" color="#77ca6d" + rich_status_raised_for= + background_color="#E5F5E8" + border_color="#68C47D" + text_color="#00AB44" + action_text_color="#FFFFFF" ;; esac +# the html email subject +html_email_subject="${status_email_subject}, ${name} = ${value_string}, on ${host}" + if [ "${status}" = "CLEAR" ]; then severity="Recovered from ${old_status}" if [ ${non_clear_duration} -gt ${duration} ]; then raised_for="(alarm was raised for ${non_clear_duration_txt})" fi + rich_status_raised_for="Recovered from ${old_status,,}, ${raised_for}" # don't show the value when the status is CLEAR # for certain alarms, this value might not have any meaning alarm="${name//_/ } ${raised_for}" + html_email_subject="${status_email_subject}, ${name} ${raised_for}, on ${host}" elif { [ "${old_status}" = "WARNING" ] && [ "${status}" = "CRITICAL" ]; }; then severity="Escalated to ${status}" if [ ${non_clear_duration} -gt ${duration} ]; then raised_for="(alarm is raised for ${non_clear_duration_txt})" fi + rich_status_raised_for="Escalated to critical, ${raised_for}" elif { [ "${old_status}" = "CRITICAL" ] && [ "${status}" = "WARNING" ]; }; then severity="Demoted to ${status}" if [ ${non_clear_duration} -gt ${duration} ]; then raised_for="(alarm is raised for ${non_clear_duration_txt})" fi + rich_status_raised_for="Demoted to warning, ${raised_for}" else raised_for= @@ -2628,6 +2685,13 @@ Subject: ${host} ${status_message} - ${name//_/ } - ${chart} MIME-Version: 1.0 Content-Type: multipart/alternative; boundary="multipart-boundary" ${email_thread_headers} +X-Netdata-Severity: ${status,,} +X-Netdata-Alert-Name: $name +X-Netdata-Chart: $chart +X-Netdata-Family: $family +X-Netdata-Classification: $classification +X-Netdata-Host: $host +X-Netdata-Role: $roles This is a MIME-encoded multipart message @@ -2638,120 +2702,742 @@ EOF else +now=$(date "+%s") + +if [ -n "$total_warn_alarms" ]; then + while read -d, -r pair; do + IFS='=' read -r key val <<<"$pair" + + date_w=$(date --date=@${val} "${date_format}" 2>/dev/null) + [ -z "${date_w}" ] && date_w=$(date "${date_format}" 2>/dev/null) + [ -z "${date_w}" ] && date_w=$(date --date=@${val} 2>/dev/null) + [ -z "${date_w}" ] && date_w=$(date 2>/dev/null) + + elapsed=$((now - val)) + + duration4human ${elapsed} >/dev/null + elapsed_txt="${REPLY}" + + WARN_ALARMS+=" +
+ + + + + + +
+ +
+ + + + + + + + + +
+
${key}
+
+
${date_w}
+
+
+ +
+ + + + + + +
+ + + + + + +
+
+ Warning for ${elapsed_txt} +
+
+
+
+ +
+
+ " + + done <<<"$total_warn_alarms," +fi + +if [ -n "$total_crit_alarms" ]; then + while read -d, -r pair; do + IFS='=' read -r key val <<<"$pair" + + date_c=$(date --date=@${val} "${date_format}" 2>/dev/null) + [ -z "${date_c}" ] && date_c=$(date "${date_format}" 2>/dev/null) + [ -z "${date_c}" ] && date_c=$(date --date=@${val} 2>/dev/null) + [ -z "${date_c}" ] && date_c=$(date 2>/dev/null) + + elapsed=$((now - val)) + + duration4human ${elapsed} >/dev/null + elapsed_txt="${REPLY}" + + CRIT_ALARMS+=" +
+ + + + + + +
+ +
+ + + + + + + + + +
+
${key}
+
+
${date_c}
+
+
+ +
+ + + + + + +
+ + + + + + +
+
+ Critical for ${elapsed_txt} +
+
+
+
+ +
+
+ " + + done <<<"$total_crit_alarms," +fi + +if [ -n "$edit_command_line" ]; then + IFS='=' read -r edit_command line <<<"$edit_command_line" +fi + IFS='' read -r -d '' email_html_part < - - - - - - - - - -
-
- + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + +
+ +
+ + + + + + +
+ - + + +
-
netdata notification
-
+ Netdata Logo +
+
+
+ +
+ + + + + + +
+ + - + + +
-

${host} ${status_message}

+
+
Notification
+
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+ +
+ + + + + + +
+
${name}
+
+
+ +
+ + + + + + +
+ + + + + + +
+ +
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+
on ${host}
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+
${value_string} +
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+
Details: ${info}
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+ + + + +
+

+ GO TO CHART +

+
+
+
+ +
+
+ +
+
+ +
+ +
+ + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
Chart: + ${chart}
+
+
Family: + ${family}
+
+
${rich_status_raised_for}
+
+

+

+ +
+
On + ${date}
+
+
By: + ${host}
+
+
Global time: + ${date_utc}
+
+

+

+ +
+
Classification: + ${classification}
+
+
Role: + ${roles}
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+ + + + + + +
+ + + + + + +
+ +
+
+
+
+ +
+ + + + + + +
+ + + + + + + + +
+
Want to know more about this alert?
+
+
Discuss and troubleshoot with others on the Netdata community forums
+
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+ + - + + +
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- ${chart} - Chart -
- ${alarm}${info_html} - Alarm -
- ${family} - Family -
- ${severity} - Severity -
${date} - ${raised_for_html} Time -
- ${calc_expression} - Evaluated Expression -
- ${calc_param_values} - Expression Variables -
- The host has ${total_warnings} WARNING and ${total_critical} CRITICAL alarm(s) raised. -
- View Netdata -
The source of this alarm is line ${src}
(alarms are configurable, edit this file to adapt the alarm to your needs) -
Sent by - netdata, the real-time performance and health monitoring, on ${host}. -
-
+
+ + + + + + +
+ +
+
+
+
+ +
+ + + + + + +
+ + + + + + + + + + + + +
+
Need to configure this alert?
+
+
Edit this alert's configuration file by logging into $s_host and running the following command:
+
+
${edit_command}
+ The alarm to edit is at line {${line}}
+
+
+
+ +
+
+ +
+ + + + + + +
+ +
+ + + + + + +
+ +
+ + + + + + +
+
The node has + ${total_warnings} warning + and + ${total_critical} critical + additional active alert(s)
+
+ +
+
+ ${CRIT_ALARMS} + ${WARN_ALARMS} + +
+
+ +
+ + + + + + +
+ +
+ + + +
+ + + + -
+
© Netdata 2021 - The real-time performance and health monitoring
+
- +
+
+
+
+ + + + + + EOF send_email <