summaryrefslogtreecommitdiffstats
path: root/plugins.d
diff options
context:
space:
mode:
Diffstat (limited to 'plugins.d')
-rw-r--r--plugins.d/Makefile.am1
-rw-r--r--plugins.d/Makefile.in13
-rwxr-xr-xplugins.d/alarm-email.sh264
-rwxr-xr-xplugins.d/alarm-notify.sh767
-rwxr-xr-xplugins.d/cgroup-name.sh2
-rwxr-xr-xplugins.d/charts.d.plugin99
-rw-r--r--plugins.d/loopsleepms.sh.inc175
-rwxr-xr-xplugins.d/tc-qos-helper.sh7
8 files changed, 964 insertions, 364 deletions
diff --git a/plugins.d/Makefile.am b/plugins.d/Makefile.am
index b8a28610a..4bc0dc447 100644
--- a/plugins.d/Makefile.am
+++ b/plugins.d/Makefile.am
@@ -9,6 +9,7 @@ dist_plugins_DATA = \
dist_plugins_SCRIPTS = \
alarm-email.sh \
+ alarm-notify.sh \
cgroup-name.sh \
charts.d.dryrun-helper.sh \
charts.d.plugin \
diff --git a/plugins.d/Makefile.in b/plugins.d/Makefile.in
index 06211d51c..1854ea861 100644
--- a/plugins.d/Makefile.in
+++ b/plugins.d/Makefile.in
@@ -83,8 +83,13 @@ subdir = plugins.d
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(dist_plugins_SCRIPTS) $(dist_plugins_DATA)
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \
- $(top_srcdir)/configure.ac
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \
+ $(top_srcdir)/m4/ax_c__generic.m4 \
+ $(top_srcdir)/m4/ax_c_mallinfo.m4 \
+ $(top_srcdir)/m4/ax_c_mallopt.m4 \
+ $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/jemalloc.m4 \
+ $(top_srcdir)/m4/tcmalloc.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
@@ -208,6 +213,7 @@ PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SSE_CANDIDATE = @SSE_CANDIDATE@
STRIP = @STRIP@
UUID_CFLAGS = @UUID_CFLAGS@
UUID_LIBS = @UUID_LIBS@
@@ -240,6 +246,8 @@ datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
+has_jemalloc = @has_jemalloc@
+has_tcmalloc = @has_tcmalloc@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
@@ -286,6 +294,7 @@ dist_plugins_DATA = \
dist_plugins_SCRIPTS = \
alarm-email.sh \
+ alarm-notify.sh \
cgroup-name.sh \
charts.d.dryrun-helper.sh \
charts.d.plugin \
diff --git a/plugins.d/alarm-email.sh b/plugins.d/alarm-email.sh
index 78c79ccdb..df083c655 100755
--- a/plugins.d/alarm-email.sh
+++ b/plugins.d/alarm-email.sh
@@ -1,264 +1,6 @@
#!/usr/bin/env bash
-me="${0}"
+# OBSOLETE - REPLACED WITH
+# alarm-notify.sh
-sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)"
-if [ -z "${sendmail}" ]
-then
- echo >&2 "I cannot send emails - there is no sendmail command available."
-fi
-
-sendmail_from_pipe() {
- "${sendmail}" -t
-
- if [ $? -eq 0 ]
- then
- echo >&2 "${me}: Sent notification email for ${status} on '${chart}.${name}'"
- return 0
- else
- echo >&2 "${me}: FAILED to send notification email for ${status} on '${chart}.${name}'"
- return 1
- fi
-}
-
-name="${1}" # the name of the alarm, as given in netdata health.d entries
-chart="${2}" # the name of the chart (type.id)
-family="${3}" # the family of the chart
-status="${4}" # the current status : UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
-old_status="${5}" # the previous status: UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
-value="${6}" # the current value
-old_value="${7}" # the previous value
-src="${8}" # the line number and file the alarm has been configured
-duration="${9}" # the duration in seconds the previous state took
-non_clear_duration="${10}" # the total duration in seconds this is non-clear
-units="${11}" # the units of the value
-info="${12}" # a short description of the alarm
-
-[ ! -z "${info}" ] && info=" <small><br/>${info}</small>"
-
-# get the system hostname
-hostname="${NETDATA_HOSTNAME}"
-[ -z "${hostname}" ] && hostname="${NETDATA_REGISTRY_HOSTNAME}"
-[ -z "${hostname}" ] && hostname="$(hostname)"
-
-goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?machine_guid=${NETDATA_REGISTRY_UNIQUE_ID}&chart=${chart}&family=${family}"
-
-# get the current date
-date="$(date)"
-
-duration4human() {
- local s="${1}" d=0 h=0 m=0 ds="day" hs="hour" ms="minute" ss="second"
- d=$(( s / 86400 ))
- s=$(( s - (d * 86400) ))
- h=$(( s / 3600 ))
- s=$(( s - (h * 3600) ))
- m=$(( s / 60 ))
- s=$(( s - (m * 60) ))
-
- if [ ${d} -gt 0 ]
- then
- [ ${m} -ge 30 ] && h=$(( h + 1 ))
- [ ${d} -gt 1 ] && ds="days"
- [ ${h} -gt 1 ] && hs="hours"
- if [ ${h} -gt 0 ]
- then
- echo "${d} ${ds} and ${h} ${hs}"
- else
- echo "${d} ${ds}"
- fi
- elif [ ${h} -gt 0 ]
- then
- [ ${s} -ge 30 ] && m=$(( m + 1 ))
- [ ${h} -gt 1 ] && hs="hours"
- [ ${m} -gt 1 ] && ms="minutes"
- if [ ${m} -gt 0 ]
- then
- echo "${h} ${hs} and ${m} ${ms}"
- else
- echo "${h} ${hs}"
- fi
- elif [ ${m} -gt 0 ]
- then
- [ ${m} -gt 1 ] && ms="minutes"
- [ ${s} -gt 1 ] && ss="seconds"
- if [ ${s} -gt 0 ]
- then
- echo "${m} ${ms} and ${s} ${ss}"
- else
- echo "${m} ${ms}"
- fi
- else
- [ ${s} -gt 1 ] && ss="seconds"
- echo "${s} ${ss}"
- fi
-}
-
-severity="${status}"
-raised_for="<br/>(was ${old_status,,} for $(duration4human ${duration}))"
-status_message="status unknown"
-color="grey"
-alarm="${name} = ${value} ${units}"
-
-# prepare the title based on status
-case "${status}" in
- CRITICAL)
- status_message="is critical"
- color="#ca414b"
- ;;
-
- WARNING)
- status_message="needs attention"
- color="#caca4b"
- ;;
-
- CLEAR)
- status_message="recovered"
- color="#77ca6d"
-
- # don't show the value when the status is CLEAR
- # for certain alarms, this value might not have any meaning
- alarm="${name}"
- ;;
-esac
-
-if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ]
-then
- # don't do anything if this is not WARNING, CRITICAL or CLEAR
- echo >&2 "${me}: not sending notification email for ${status} on '${chart}.${name}'"
- exit 0
-elif [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ]
-then
- # don't do anything if this is CLEAR, but it was not WARNING or CRITICAL
- echo >&2 "${me}: not sending notification email for ${status} on '${chart}.${name}' (last status was ${old_status})"
- exit 0
-elif [ "${status}" = "CLEAR" ]
-then
- severity="Recovered from ${old_status}"
- if [ $non_clear_duration -gt $duration ]
- then
- raised_for="<br/>(had issues for $(duration4human ${non_clear_duration}))"
- fi
-
-elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ]
-then
- severity="Escalated to ${status}"
- if [ $non_clear_duration -gt $duration ]
- then
- raised_for="<br/>(has issues for $(duration4human ${non_clear_duration}))"
- fi
-
-elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ]
-then
- severity="Demoted to ${status}"
- if [ $non_clear_duration -gt $duration ]
- then
- raised_for="<br/>(has issues for $(duration4human ${non_clear_duration}))"
- fi
-
-else
- raised_for=
-fi
-
-# send the email
-cat <<EOF | sendmail_from_pipe
-To: root
-Subject: ${hostname} ${status_message} - ${chart}.${name}
-Content-Type: text/html
-
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0; padding: 0;">
-<body style="font-family:'Helvetica Neue','Helvetica',Helvetica,Arial,sans-serif;font-size:14px;width:100%!important;min-height:100%;line-height:1.6;background:#f6f6f6;margin:0;padding:0">
-<table>
- <tbody>
- <tr>
- <td style="vertical-align:top;" valign="top"></td>
- <td width="700" style="vertical-align:top;display:block!important;max-width:700px!important;clear:both!important;margin:0 auto;padding:0" valign="top">
- <div style="max-width:700px;display:block;margin:0 auto;padding:20px">
- <table width="100%" cellpadding="0" cellspacing="0"
- style="background:#fff;border:1px solid #e9e9e9">
- <tbody>
- <tr>
- <td bgcolor="#eee"
- style="padding: 5px 20px 5px 20px;background-color:#eee;">
- <div style="font-size:20px;color:#777;font-weight: bold;">netdata notification</div>
- </td>
- </tr>
- <tr>
- <td bgcolor="${color}"
- style="font-size:16px;vertical-align:top;font-weight:400;text-align:center;margin:0;padding:10px;color:#ffffff;background:${color}!important;border:1px solid ${color};border-top-color:${color}" align="center" valign="top">
- <h1 style="font-weight:400;margin:0">${hostname} ${status_message}</h1>
- </td>
- </tr>
- <tr>
- <td style="vertical-align:top" valign="top">
- <div style="margin:0;padding:20px;max-width:700px">
- <table width="100%" cellpadding="0" cellspacing="0" style="max-width:700px">
- <tbody>
- <tr>
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px"
- align="left" valign="top">
- <span>${chart}</span>
- <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Chart</span>
- </td>
- </tr>
- <tr style="margin:0;padding:0">
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px"
- align="left" valign="top">
- <span><b>${alarm}</b>${info}</span>
- <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Alarm</span>
- </td>
- </tr>
- <tr>
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px"
- align="left" valign="top">
- <span>${family}</span>
- <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Family</span>
- </td>
- </tr>
- <tr style="margin:0;padding:0">
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px"
- align="left" valign="top">
- <span>${severity}</span>
- <span style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Severity</span>
- </td>
- </tr>
- <tr style="margin:0;padding:0">
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px"
- align="left" valign="top"><span>${date}</span>
- <span>${raised_for}</span> <span
- style="display:block;color:#666666;font-size:12px;font-weight:300;line-height:1;text-transform:uppercase">Time</span>
- </td>
- </tr>
- <!--
- <tr style="margin:0;padding:0">
- <td style="font-size:18px;vertical-align:top;margin:0;padding:0 0 20px">
- <a href="${goto_url}" style="font-size:14px;color:#ffffff;text-decoration:none;line-height:1.5;font-weight:bold;text-align:center;display:inline-block;text-transform:capitalize;background:#35568d;border-width:1px;border-style:solid;border-color:#2b4c86;margin:0;padding:10px 15px" target="_blank">View Netdata</a>
- </td>
- </tr>
- -->
- <tr style="text-align:center;margin:0;padding:0">
- <td style="font-size:11px;vertical-align:top;margin:0;padding:10px 0 0 0;color:#666666"
- align="center" valign="bottom">The source of this alarm is line <code>${src}</code>
- </td>
- </tr>
- <tr style="text-align:center;margin:0;padding:0">
- <td style="font-size:12px;vertical-align:top;margin:0;padding:20px 0 0 0;color:#666666;border-top:1px solid #f0f0f0"
- align="center" valign="bottom">Sent by
- <a href="https://mynetdata.io/" target="_blank">netdata</a>, the real-time performance monitoring.
- </td>
- </tr>
- </tbody>
- </table>
- </div>
- </td>
- </tr>
- </tbody>
- </table>
- </div>
- </td>
- </tr>
- </tbody>
-</table>
-</body>
-</html>
-EOF
+${0/alarm-email.sh/alarm-notify.sh} "${@}"
diff --git a/plugins.d/alarm-notify.sh b/plugins.d/alarm-notify.sh
new file mode 100755
index 000000000..feec6ceae
--- /dev/null
+++ b/plugins.d/alarm-notify.sh
@@ -0,0 +1,767 @@
+#!/usr/bin/env bash
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+#
+# Script the send alarm notifications for netdata
+#
+# Features:
+# - multiple notification methods
+# - multiple roles per alarm
+# - multiple recipients per role
+# - severity filtering per recipient
+#
+# Supported notification methods:
+# - emails
+# - pushover.net notifications
+# - slack.com notifications
+# - telegram.org notifications
+#
+
+me="${0}"
+
+# check for BASH v4+ (required for associative arrays)
+[ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] && \
+ echo >&2 "${me}: BASH version 4 or later is required (this is ${BASH_VERSION})." && \
+ exit 1
+
+# defaults to allow running this script by hand
+NETDATA_CONFIG_DIR="${NETDATA_CONFIG_DIR-/etc/netdata}"
+NETDATA_CACHE_DIR="${NETDATA_CACHE_DIR-/var/cache/netdata}"
+[ -z "${NETDATA_REGISTRY_URL}" ] && NETDATA_REGISTRY_URL="https://registry.my-netdata.io"
+[ -z "${NETDATA_HOSTNAME}" ] && NETDATA_HOSTNAME="$(hostname)"
+[ -z "${NETDATA_REGISTRY_HOSTNAME}" ] && NETDATA_REGISTRY_HOSTNAME="${NETDATA_HOSTNAME}"
+
+# -----------------------------------------------------------------------------
+# parse command line parameters
+
+roles="${1}" # the roles that should be notified for this event
+host="${2}" # the host generated this event
+unique_id="${3}" # the unique id of this event
+alarm_id="${4}" # the unique id of the alarm that generated this event
+event_id="${5}" # the incremental id of the event, for this alarm id
+when="${6}" # the timestamp this event occurred
+name="${7}" # the name of the alarm, as given in netdata health.d entries
+chart="${8}" # the name of the chart (type.id)
+family="${9}" # the family of the chart
+status="${10}" # the current status : REMOVED, UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
+old_status="${11}" # the previous status: REMOVED, UNITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
+value="${12}" # the current value of the alarm
+old_value="${13}" # the previous value of the alarm
+src="${14}" # the line number and file the alarm has been configured
+duration="${15}" # the duration in seconds of the previous alarm state
+non_clear_duration="${16}" # the total duration in seconds this is/was non-clear
+units="${17}" # the units of the value
+info="${18}" # a short description of the alarm
+
+# -----------------------------------------------------------------------------
+# screen statuses we don't need to send a notification
+
+# don't do anything if this is not WARNING, CRITICAL or CLEAR
+if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ]
+then
+ echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}'"
+ exit 1
+fi
+
+# don't do anything if this is CLEAR, but it was not WARNING or CRITICAL
+if [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ]
+then
+ echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}' (last status was ${old_status})"
+ exit 1
+fi
+
+# -----------------------------------------------------------------------------
+# load configuration
+
+# By default fetch images from the global public registry.
+# This is required by default, since all notification methods need to download
+# images via the Internet, and private registries might not be reachable.
+# This can be overwritten at the configuration file.
+images_base_url="https://registry.my-netdata.io"
+
+# needed commands
+# if empty they will be searched in the system path
+curl=
+sendmail=
+
+# enable / disable features
+SEND_SLACK="YES"
+SEND_PUSHOVER="YES"
+SEND_TELEGRAM="YES"
+SEND_EMAIL="YES"
+
+# slack configs
+SLACK_WEBHOOK_URL=
+DEFAULT_RECIPIENT_SLACK=
+declare -A role_recipients_slack=()
+
+# pushover configs
+PUSHOVER_APP_TOKEN=
+DEFAULT_RECIPIENT_PUSHOVER=
+declare -A role_recipients_pushover=()
+
+# telegram configs
+TELEGRAM_BOT_TOKEN=
+DEFAULT_RECIPIENT_TELEGRAM=
+declare -A role_recipients_telegram=()
+
+# email configs
+DEFAULT_RECIPIENT_EMAIL="root"
+declare -A role_recipients_email=()
+
+# load the user configuration
+# this will overwrite the variables above
+if [ -f "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf" ]
+ then
+ source "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf"
+fi
+
+# -----------------------------------------------------------------------------
+# filter a recipient based on alarm event severity
+
+filter_recipient_by_criticality() {
+ local method="${1}" x="${2}" r s
+ shift
+
+ r="${x/|*/}" # the recipient
+ s="${x/*|/}" # the severity required for notifying this recipient
+
+ # no severity filtering for this person
+ [ "${r}" = "${s}" ] && return 0
+
+ # the severity is invalid
+ s="${s^^}"
+ [ "${s}" != "CRITICAL" ] && return 0
+
+ # the new or the old status matches the severity
+ if [ "${s}" = "${status}" -o "${s}" = "${old_status}" ]
+ then
+ [ ! -d "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}" ] && \
+ mkdir -p "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}"
+
+ # we need to keep track of the notifications we sent
+ # so that the same user will receive the recovery
+ # even if old_status does not match the required severity
+ touch "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}"
+ return 0
+ fi
+
+ # it is a cleared alarm we have sent notification for
+ if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a -f "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}" ]
+ then
+ rm "${NETDATA_CACHE_DIR}/alarm-notify/${method}/${r}/${alarm_id}"
+ return 0
+ fi
+
+ return 1
+}
+
+# -----------------------------------------------------------------------------
+# find the recipients' addresses per method
+
+declare -A arr_slack=()
+declare -A arr_pushover=()
+declare -A arr_telegram=()
+declare -A arr_email=()
+
+# netdata may call us with multiple roles, and roles may have multiple but
+# overlapping recipients - so, here we find the unique recipients.
+for x in ${roles//,/ }
+do
+ # the roles 'silent' and 'disabled' mean:
+ # don't send a notification for this role
+ [ "${x}" = "silent" -o "${x}" = "disabled" ] && continue
+
+ # email
+ a="${role_recipients_email[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_EMAIL}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality email "${r}" && arr_email[${r/|*/}]="1"
+ done
+
+ # pushover
+ a="${role_recipients_pushover[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PUSHOVER}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality pushover "${r}" && arr_pushover[${r/|*/}]="1"
+ done
+
+ # telegram
+ a="${role_recipients_telegram[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_TELEGRAM}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality telegram "${r}" && arr_telegram[${r/|*/}]="1"
+ done
+
+ # slack
+ a="${role_recipients_slack[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_SLACK}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
+ done
+done
+
+# build the list of slack recipients (channels)
+to_slack="${!arr_slack[*]}"
+[ -z "${to_slack}" ] && SEND_SLACK="NO"
+
+# build the list of pushover recipients (user tokens)
+to_pushover="${!arr_pushover[*]}"
+[ -z "${to_pushover}" ] && SEND_PUSHOVER="NO"
+
+# check array of telegram recipients (chat ids)
+to_telegram="${!arr_telegram[*]}"
+[ -z "${to_telegram}" ] && SEND_TELEGRAM="NO"
+
+# build the list of email recipients (email addresses)
+to_email=
+for x in "${!arr_email[@]}"
+do
+ [ ! -z "${to_email}" ] && to_email="${to_email}, "
+ to_email="${to_email}${x}"
+done
+[ -z "${to_email}" ] && SEND_EMAIL="NO"
+
+
+# -----------------------------------------------------------------------------
+# verify the delivery methods supported
+
+# check slack
+[ -z "${SLACK_WEBHOOK_URL}" ] && SEND_SLACK="NO"
+
+# check pushover
+[ -z "${PUSHOVER_APP_TOKEN}" ] && SEND_PUSHOVER="NO"
+
+# check telegram
+[ -z "${TELEGRAM_BOT_TOKEN}" ] && SEND_TELEGRAM="NO"
+
+if [ \( "${SEND_PUSHOVER}" = "YES" -o "${SEND_SLACK}" = "YES" -o "${SEND_TELEGRAM}" = "YES" \) -a -z "${curl}" ]
+ then
+ curl="$(which curl 2>/dev/null || command -v curl 2>/dev/null)"
+ if [ -z "${curl}" ]
+ then
+ SEND_PUSHOVER="NO"
+ SEND_TELEGRAM="NO"
+ SEND_SLACK="NO"
+ fi
+fi
+
+if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ]
+ then
+ sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)"
+ [ -z "${sendmail}" ] && SEND_EMAIL="NO"
+fi
+
+# check that we have at least a method enabled
+if [ "${SEND_EMAIL}" != "YES" -a "${SEND_PUSHOVER}" != "YES" -a "${SEND_TELEGRAM}" != "YES" -a "${SEND_SLACK}" != "YES" ]
+ then
+ echo >&2 "All notification methods are disabled. Not sending a notification."
+ exit 1
+fi
+
+# -----------------------------------------------------------------------------
+# get the system hostname
+
+[ -z "${host}" ] && host="${NETDATA_HOSTNAME}"
+[ -z "${host}" ] && host="${NETDATA_REGISTRY_HOSTNAME}"
+[ -z "${host}" ] && host="$(hostname 2>/dev/null)"
+
+# -----------------------------------------------------------------------------
+# get the date the alarm happened
+
+date="$(date --date=@${when} 2>/dev/null)"
+[ -z "${date}" ] && date="$(date 2>/dev/null)"
+
+# -----------------------------------------------------------------------------
+# URL encode a string
+
+urlencode() {
+ local string="${1}" strlen encoded pos c o
+
+ strlen=${#string}
+ for (( pos=0 ; pos<strlen ; pos++ ))
+ do
+ c=${string:$pos:1}
+ case "$c" in
+ [-_.~a-zA-Z0-9])
+ o="${c}"
+ ;;
+
+ *)
+ printf -v o '%%%02x' "'$c"
+ ;;
+ esac
+ encoded+="${o}"
+ done
+
+ REPLY="${encoded}"
+ echo "${REPLY}"
+}
+
+# -----------------------------------------------------------------------------
+# convert a duration in seconds, to a human readable duration
+# using DAYS, MINUTES, SECONDS
+
+duration4human() {
+ local s="${1}" d=0 h=0 m=0 ds="day" hs="hour" ms="minute" ss="second" ret
+ d=$(( s / 86400 ))
+ s=$(( s - (d * 86400) ))
+ h=$(( s / 3600 ))
+ s=$(( s - (h * 3600) ))
+ m=$(( s / 60 ))
+ s=$(( s - (m * 60) ))
+
+ if [ ${d} -gt 0 ]
+ then
+ [ ${m} -ge 30 ] && h=$(( h + 1 ))
+ [ ${d} -gt 1 ] && ds="days"
+ [ ${h} -gt 1 ] && hs="hours"
+ if [ ${h} -gt 0 ]
+ then
+ ret="${d} ${ds} and ${h} ${hs}"
+ else
+ ret="${d} ${ds}"
+ fi
+ elif [ ${h} -gt 0 ]
+ then
+ [ ${s} -ge 30 ] && m=$(( m + 1 ))
+ [ ${h} -gt 1 ] && hs="hours"
+ [ ${m} -gt 1 ] && ms="minutes"
+ if [ ${m} -gt 0 ]
+ then
+ ret="${h} ${hs} and ${m} ${ms}"
+ else
+ ret="${h} ${hs}"
+ fi
+ elif [ ${m} -gt 0 ]
+ then
+ [ ${m} -gt 1 ] && ms="minutes"
+ [ ${s} -gt 1 ] && ss="seconds"
+ if [ ${s} -gt 0 ]
+ then
+ ret="${m} ${ms} and ${s} ${ss}"
+ else
+ ret="${m} ${ms}"
+ fi
+ else
+ [ ${s} -gt 1 ] && ss="seconds"
+ ret="${s} ${ss}"
+ fi
+
+ REPLY="${ret}"
+ echo "${REPLY}"
+}
+
+# -----------------------------------------------------------------------------
+# email sender
+
+send_email() {
+ local ret=
+ if [ "${SEND_EMAIL}" = "YES" ]
+ then
+
+ "${sendmail}" -t
+ ret=$?
+
+ if [ $ret -eq 0 ]
+ then
+ echo >&2 "${me}: Sent email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}'"
+ return 0
+ else
+ echo >&2 "${me}: Failed to send email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}' with error code ${ret}."
+ return 1
+ fi
+ fi
+
+ return 1
+}
+
+# -----------------------------------------------------------------------------
+# pushover sender
+
+send_pushover() {
+ local apptoken="${1}" usertokens="${2}" when="${3}" url="${4}" status="${5}" title="${6}" message="${7}" httpcode sent=0 user priority
+
+ if [ "${SEND_PUSHOVER}" = "YES" -a ! -z "${apptoken}" -a ! -z "${usertokens}" -a ! -z "${title}" -a ! -z "${message}" ]
+ then
+
+ # https://pushover.net/api
+ priority=-2
+ case "${status}" in
+ CLEAR) priority=-1;; # low priority: no sound or vibration
+ WARNING) priotity=0;; # normal priority: respect quiet hours
+ CRITICAL) priority=1;; # high priority: bypass quiet hours
+ *) priority=-2;; # lowest priority: no notification at all
+ esac
+
+ for user in ${usertokens}
+ do
+ httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null \
+ --form-string "token=${apptoken}" \
+ --form-string "user=${user}" \
+ --form-string "html=1" \
+ --form-string "title=${title}" \
+ --form-string "message=${message}" \
+ --form-string "timestamp=${when}" \
+ --form-string "url=${url}" \
+ --form-string "url_title=Open netdata dashboard to view the alarm" \
+ --form-string "priority=${priority}" \
+ https://api.pushover.net/1/messages.json)
+
+ if [ "${httpcode}" == "200" ]
+ then
+ echo >&2 "${me}: Sent pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}'"
+ sent=$((sent + 1))
+ else
+ echo >&2 "${me}: Failed to send pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}."
+ fi
+ done
+
+ [ ${sent} -gt 0 ] && return 0
+ fi
+
+ return 1
+}
+
+
+# -----------------------------------------------------------------------------
+# telegram sender
+
+send_telegram() {
+ local bottoken="${1}" chatids="${2}" message="${3}" httpcode sent=0 chatid disableNotification=""
+
+ if [ "${status}" = "CLEAR" ]; then disableNotification="--data-urlencode disable_notification=true"; fi
+
+ if [ "${SEND_TELEGRAM}" = "YES" -a ! -z "${bottoken}" -a ! -z "${chatids}" -a ! -z "${message}" ];
+ then
+ for chatid in ${chatids}
+ do
+ # https://core.telegram.org/bots/api#sendmessage
+ httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null ${disableNotification} \
+ --data-urlencode "parse_mode=HTML" \
+ --data-urlencode "disable_web_page_preview=true" \
+ --data-urlencode "text=$message" \
+ "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=$chatid")
+
+ if [ "${httpcode}" == "200" ]
+ then
+ echo >&2 "${me}: Sent telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}'"
+ sent=$((sent + 1))
+ elif [ "${httpcode}" == "401" ]
+ then
+ echo >&2 "${me}: Failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}': Wrong bot token."
+ else
+ echo >&2 "${me}: Failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}' with HTTP error code ${httpcode}."
+ fi
+ done
+
+ [ ${sent} -gt 0 ] && return 0
+ fi
+
+ return 1
+}
+
+# -----------------------------------------------------------------------------
+# slack sender
+
+send_slack() {
+ local webhook="${1}" channels="${2}" httpcode sent=0 channel color payload
+
+ [ "${SEND_SLACK}" != "YES" ] && return 1
+
+ case "${status}" in
+ WARNING) color="warning" ;;
+ CRITICAL) color="danger" ;;
+ CLEAR) color="good" ;;
+ *) color="#777777" ;;
+ esac
+
+ for channel in ${channels}
+ do
+ payload="$(cat <<EOF
+ {
+ "channel": "#${channel}",
+ "username": "netdata on ${host}",
+ "icon_url": "${images_base_url}/images/seo-performance-128.png",
+ "text": "${host} ${status_message}, \`${chart}\` (_${family}_), *${alarm}*",
+ "attachments": [
+ {
+ "fallback": "${alarm} - ${chart} (${family}) - ${info}",
+ "color": "${color}",
+ "title": "${alarm}",
+ "title_link": "${goto_url}",
+ "text": "${info}",
+ "fields": [
+ {
+ "title": "${chart}",
+ "short": true
+ },
+ {
+ "title": "${family}",
+ "short": true
+ }
+ ],
+ "thumb_url": "${image}",
+ "footer": "<${goto_url}|${host}>",
+ "ts": ${when}
+ }
+ ]
+ }
+EOF
+ )"
+
+ httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null -X POST --data-urlencode "payload=${payload}" "${webhook}")
+ if [ "${httpcode}" == "200" ]
+ then
+ echo >&2 "${me}: Sent slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}'"
+ sent=$((sent + 1))
+ else
+ echo >&2 "${me}: Failed to send slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}', with HTTP error code ${httpcode}."
+ fi
+ done
+
+ [ ${sent} -gt 0 ] && return 0
+
+ return 1
+}
+
+
+# -----------------------------------------------------------------------------
+# prepare the content of the notification
+
+# the url to send the user on click
+urlencode "${NETDATA_REGISTRY_HOSTNAME}" >/dev/null; url_host="${REPLY}"
+urlencode "${chart}" >/dev/null; url_chart="${REPLY}"
+urlencode "${family}" >/dev/null; url_family="${REPLY}"
+urlencode "${name}" >/dev/null; url_name="${REPLY}"
+goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}"
+
+# the severity of the alarm
+severity="${status}"
+
+# the time the alarm was raised
+duration4human ${duration} >/dev/null; duration_txt="${REPLY}"
+duration4human ${non_clear_duration} >/dev/null; non_clear_duration_txt="${REPLY}"
+raised_for="(was ${old_status,,} for ${duration_txt})"
+
+# the key status message
+status_message="status unknown"
+
+# the color of the alarm
+color="grey"
+
+# the alarm value
+alarm="${name//_/ } = ${value} ${units}"
+
+# the image of the alarm
+image="${images_base_url}/images/seo-performance-128.png"
+
+# prepare the title based on status
+case "${status}" in
+ CRITICAL)
+ image="${images_base_url}/images/alert-128-red.png"
+ status_message="is critical"
+ color="#ca414b"
+ ;;
+
+ WARNING)
+ image="${images_base_url}/images/alert-128-orange.png"
+ status_message="needs attention"
+ color="#caca4b"
+ ;;
+
+ CLEAR)
+ image="${images_base_url}/images/check-mark-2-128-green.png"
+ status_message="recovered"
+ color="#77ca6d"
+
+ # don't show the value when the status is CLEAR
+ # for certain alarms, this value might not have any meaning
+ alarm="${name//_/ } ${raised_for}"
+ ;;
+esac
+
+if [ "${status}" = "CLEAR" ]
+then
+ severity="Recovered from ${old_status}"
+ if [ $non_clear_duration -gt $duration ]
+ then
+ raised_for="(alarm was raised for ${non_clear_duration_txt})"
+ fi
+
+elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ]
+then
+ severity="Escalated to ${status}"
+ if [ $non_clear_duration -gt $duration ]
+ then
+ raised_for="(alarm is raised for ${non_clear_duration_txt})"
+ fi
+
+elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ]
+then
+ severity="Demoted to ${status}"
+ if [ $non_clear_duration -gt $duration ]
+ then
+ raised_for="(alarm is raised for ${non_clear_duration_txt})"
+ fi
+
+else
+ raised_for=
+fi
+
+# prepare HTML versions of elements
+info_html=
+[ ! -z "${info}" ] && info_html=" <small><br/>${info}</small>"
+
+raised_for_html=
+[ ! -z "${raised_for}" ] && raised_for_html="<br/><small>${raised_for}</small>"
+
+# -----------------------------------------------------------------------------
+# send the slack notification
+
+# slack aggregates posts from the same username
+# so we use "${host} ${status}" as the bot username, to make them diff
+
+send_slack "${SLACK_WEBHOOK_URL}" "${to_slack}"
+SENT_SLACK=$?
+
+# -----------------------------------------------------------------------------
+# send the pushover notification
+
+send_pushover "${PUSHOVER_APP_TOKEN}" "${to_pushover}" "${when}" "${goto_url}" "${status}" "${host} ${status_message} - ${name//_/ } - ${chart}" "
+<font color=\"${color}\"><b>${alarm}</b></font>${info_html}<br/>&nbsp;
+<small><b>${chart}</b><br/>Chart<br/>&nbsp;</small>
+<small><b>${family}</b><br/>Family<br/>&nbsp;</small>
+<small><b>${severity}</b><br/>Severity<br/>&nbsp;</small>
+<small><b>${date}${raised_for_html}</b><br/>Time<br/>&nbsp;</small>
+<a href=\"${goto_url}\">View Netdata</a><br/>&nbsp;
+<small><small>The source of this alarm is line ${src}</small></small>
+"
+
+SENT_PUSHOVER=$?
+
+# -----------------------------------------------------------------------------
+# send the telegram.org message
+
+# https://core.telegram.org/bots/api#formatting-options
+telegram_message="<b>${severity}"
+[ "${status_message}" != "recovered" ] && telegram_message="${telegram_message}, ${status_message}"
+telegram_message="${telegram_message}
+${chart} (${family})</b>
+<a href=\"${goto_url}\">${alarm}</a>
+<i>${info}</i>"
+
+send_telegram "${TELEGRAM_BOT_TOKEN}" "${to_telegram}" "${telegram_message}"
+
+SENT_TELEGRAM=$?
+
+# -----------------------------------------------------------------------------
+# send the email
+
+send_email <<EOF
+To: ${to_email}
+Subject: ${host} ${status_message} - ${name//_/ } - ${chart}
+Content-Type: text/html
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0; padding: 0;">
+<body style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 14px; width: 100% !important; min-height: 100%; line-height: 1.6; background: #f6f6f6; margin:0; padding: 0;">
+<table>
+ <tbody>
+ <tr>
+ <td style="vertical-align: top;" valign="top"></td>
+ <td width="700" style="vertical-align: top; display: block !important; max-width: 700px !important; clear: both !important; margin: 0 auto; padding: 0;" valign="top">
+ <div style="max-width: 700px; display: block; margin: 0 auto; padding: 20px;">
+ <table width="100%" cellpadding="0" cellspacing="0" style="background: #fff; border: 1px solid #e9e9e9;">
+ <tbody>
+ <tr>
+ <td bgcolor="#eee" style="padding: 5px 20px 5px 20px; background-color: #eee;">
+ <div style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 20px; color: #777; font-weight: bold;">netdata notification</div>
+ </td>
+ </tr>
+ <tr>
+ <td bgcolor="${color}" style="font-size: 16px; vertical-align: top; font-weight: 400; text-align: center; margin: 0; padding: 10px; color: #ffffff; background: ${color} !important; border: 1px solid ${color}; border-top-color: ${color};" align="center" valign="top">
+ <h1 style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-weight: 400; margin: 0;">${host} ${status_message}</h1>
+ </td>
+ </tr>
+ <tr>
+ <td style="vertical-align: top;" valign="top">
+ <div style="margin: 0; padding: 20px; max-width: 700px;">
+ <table width="100%" cellpadding="0" cellspacing="0" style="max-width:700px">
+ <tbody>
+ <tr>
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding:0 0 20px;" align="left" valign="top">
+ <span>${chart}</span>
+ <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Chart</span>
+ </td>
+ </tr>
+ <tr style="margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
+ <span><b>${alarm}</b>${info_html}</span>
+ <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Alarm</span>
+ </td>
+ </tr>
+ <tr>
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
+ <span>${family}</span>
+ <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Family</span>
+ </td>
+ </tr>
+ <tr style="margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top">
+ <span>${severity}</span>
+ <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Severity</span>
+ </td>
+ </tr>
+ <tr style="margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;" align="left" valign="top"><span>${date}</span>
+ <span>${raised_for_html}</span> <span style="display: block; color: #666666; font-size: 12px; font-weight: 300; line-height: 1; text-transform: uppercase;">Time</span>
+ </td>
+ </tr>
+ <tr style="margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 18px; vertical-align: top; margin: 0; padding: 0 0 20px;">
+ <a href="${goto_url}" style="font-size: 14px; color: #ffffff; text-decoration: none; line-height: 1.5; font-weight: bold; text-align: center; display: inline-block; text-transform: capitalize; background: #35568d; border-width: 1px; border-style: solid; border-color: #2b4c86; margin: 0; padding: 10px 15px;" target="_blank">View Netdata</a>
+ </td>
+ </tr>
+ <tr style="text-align: center; margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 11px; vertical-align: top; margin: 0; padding: 10px 0 0 0; color: #666666;" align="center" valign="bottom">The source of this alarm is line <code>${src}</code>
+ </td>
+ </tr>
+ <tr style="text-align: center; margin: 0; padding: 0;">
+ <td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 12px; vertical-align: top; margin:0; padding: 20px 0 0 0; color: #666666; border-top: 1px solid #f0f0f0;" align="center" valign="bottom">Sent by
+ <a href="https://mynetdata.io/" target="_blank">netdata</a>, the real-time performance monitoring.
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </td>
+ </tr>
+ </tbody>
+</table>
+</body>
+</html>
+EOF
+
+SENT_EMAIL=$?
+
+# -----------------------------------------------------------------------------
+# let netdata know
+
+# we did send something
+[ ${SENT_EMAIL} -eq 0 -o ${SENT_PUSHOVER} -eq 0 -o ${SENT_TELEGRAM} -eq 0 -o ${SENT_SLACK} -eq 0 ] && exit 0
+
+# we did not send anything
+exit 1
diff --git a/plugins.d/cgroup-name.sh b/plugins.d/cgroup-name.sh
index 8bfc984c2..1c6f564b4 100755
--- a/plugins.d/cgroup-name.sh
+++ b/plugins.d/cgroup-name.sh
@@ -71,7 +71,7 @@ if [ -z "${NAME}" ]
fi
[ -z "${NAME}" ] && NAME="${CGROUP}"
- [ ${#NAME} -gt 50 ] && NAME="${NAME:0:50}"
+ [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}"
fi
echo >&2 "${0}: cgroup '${CGROUP}' is called '${NAME}'"
diff --git a/plugins.d/charts.d.plugin b/plugins.d/charts.d.plugin
index 9aaadc168..df9998ece 100755
--- a/plugins.d/charts.d.plugin
+++ b/plugins.d/charts.d.plugin
@@ -48,10 +48,6 @@ require_cmd awk || exit 1
# insternal defaults
# netdata exposes a few environment variables for us
-pause_method="sleep" # use either "suspend" or "sleep"
- # DO NOT USE SUSPEND - LINUX WILL SUSPEND NETDATA TOO
- # THE WHOLE PROCESS GROUP - NOT JUST THE SHELL
-
pluginsd="${NETDATA_PLUGINS_DIR}"
[ -z "$pluginsd" ] && pluginsd="$( dirname $PROGRAM_FILE )"
@@ -154,6 +150,24 @@ done
# -----------------------------------------------------------------------------
+# loop control
+
+# default sleep function
+LOOPSLEEPMS_HIGHRES=0
+now_ms=
+current_time_ms_default() {
+ now_ms="$(date +'%s')000"
+}
+current_time_ms="current_time_ms_default"
+current_time_ms_accuracy=1
+mysleep="sleep"
+
+# if found and included, this file overwrites loopsleepms()
+# and current_time_ms() with a high resolution timer function
+# for precise looping.
+. "$pluginsd/loopsleepms.sh.inc"
+
+# -----------------------------------------------------------------------------
# load my configuration
if [ -f "$myconfig" ]
@@ -172,13 +186,6 @@ else
echo >&2 "$PROGRAM_NAME: configuration file '$myconfig' not found. Using defaults."
fi
-if [ "$pause_method" = "suspend" ]
-then
- # enable bash job control
- # this is required for suspend to work
- set -m
-fi
-
# we check for the timeout command, after we load our
# configuration, so that the user may overwrite the
# timeout command we use, providing a function that
@@ -205,27 +212,6 @@ fi
# -----------------------------------------------------------------------------
-# loop control
-
-# default sleep function
-LOOPSLEEPMS_HIGHRES=0
-loopsleepms() {
- [ "$1" = "tellwork" ] && shift
- sleep $1
-}
-
-now_ms=
-current_time_ms() {
- now_ms="$(date +'%s')000"
-}
-
-# if found and included, this file overwrites loopsleepms()
-# and current_time_ms() with a high resolution timer function
-# for precise looping.
-. "$pluginsd/loopsleepms.sh.inc"
-
-
-# -----------------------------------------------------------------------------
# library functions
fixid() {
@@ -537,7 +523,7 @@ global_update() {
next_ms x seconds millis
# return the current time in ms in $now_ms
- current_time_ms
+ ${current_time_ms}
exit_at=$(( now_ms + (restart_timeout * 1000) ))
@@ -562,16 +548,16 @@ global_update() {
next_charts=()
# return the current time in ms in $now_ms
- current_time_ms
+ ${current_time_ms}
for chart in "${now_charts[@]}"
do
- # echo >&2 "DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}"
+ #echo >&2 " DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}"
if [ ${now_ms} -ge ${charts_next_update[$chart]} ]
then
last_ms=${charts_last_update[$chart]}
dt=$(( (now_ms - last_ms) ))
- # echo >&2 "DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}, dt: ${dt}"
+ #echo >&2 " DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}, dt: ${dt}"
charts_last_update[$chart]=${now_ms}
@@ -590,11 +576,12 @@ global_update() {
fi
exec_start_ms=$now_ms
+ #echo >&2 " EXEC: $chart$charts_update $dt"
$chart$charts_update $dt
ret=$?
# return the current time in ms in $now_ms
- current_time_ms; exec_end_ms=$now_ms
+ ${current_time_ms}; exec_end_ms=$now_ms
echo "BEGIN netdata.plugin_chartsd_$chart $dt"
echo "SET run_time = $(( exec_end_ms - exec_start_ms ))"
@@ -620,27 +607,29 @@ global_update() {
fi
done
- if [ "$pause_method" = "suspend" ]
- then
- echo "STOPPING_WAKE_ME_UP_PLEASE"
- suspend || ( echo >&2 "$PROGRAM_NAME: suspend returned error $?, falling back to sleep."; loopsleepms $debug_time $update_every $time_divisor)
- else
- # wait the time you are required to
- next_ms=$((now_ms + (update_every * 1000 * 100) ))
- for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done
- next_ms=$((next_ms - now_ms))
+ # wait the time you are required to
+ next_ms=$((now_ms + (update_every * 1000 * 100) ))
+ for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done
+ next_ms=$((next_ms - now_ms))
- if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ]
+ if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ]
+ then
+ next_ms=$(( next_ms + current_time_ms_accuracy ))
+ seconds=$(( next_ms / 1000 ))
+ millis=$(( next_ms % 1000 ))
+ if [ ${millis} -lt 10 ]
then
- seconds=$(( next_ms / 1000 ))
- millis=$(( next_ms % 1000 ))
- [ ${millis} -lt 10 ] && millis="0${millis}"
- [ ${millis} -lt 100 ] && millis="0${millis}"
- [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: sleeping for ${seconds}.${millis} seconds."
- sleep ${seconds}.${millis}
- else
- sleep $update_every
+ millis="00${millis}"
+ elif [ ${millis} -lt 100 ]
+ then
+ millis="0${millis}"
fi
+
+ [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: sleeping for ${seconds}.${millis} seconds."
+ ${mysleep} ${seconds}.${millis}
+ else
+ [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: sleeping for ${update_every} seconds."
+ ${mysleep} $update_every
fi
test ${now_ms} -ge ${exit_at} && exit 0
diff --git a/plugins.d/loopsleepms.sh.inc b/plugins.d/loopsleepms.sh.inc
index 02ab694d2..6de93043c 100644
--- a/plugins.d/loopsleepms.sh.inc
+++ b/plugins.d/loopsleepms.sh.inc
@@ -1,10 +1,5 @@
# no need for shebang - this file is included from other scripts
-# this function is used to sleep a fraction of a second
-# it calculates the difference between every time is called
-# and tries to align the sleep time to give you exactly the
-# loop you need.
-
LOOPSLEEP_DATE="$(which date)"
if [ -z "$LOOPSLEEP_DATE" ]
then
@@ -12,17 +7,14 @@ if [ -z "$LOOPSLEEP_DATE" ]
exit 1
fi
-LOOPSLEEPMS_LASTRUN=0
-LOOPSLEEPMS_LASTSLEEP=0
-LOOPSLEEPMS_LASTWORK=0
+# -----------------------------------------------------------------------------
+# use the date command as a high resolution timer
+now_ms=
LOOPSLEEPMS_HIGHRES=1
test "$($LOOPSLEEP_DATE +%N)" = "%N" && LOOPSLEEPMS_HIGHRES=0
-
-now_ms=
-current_time_ms() {
- # if high resolution is not supported
- # just sleep the time requested, in seconds
+test -z "$($LOOPSLEEP_DATE +%N)" && LOOPSLEEPMS_HIGHRES=0
+current_time_ms_from_date() {
if [ $LOOPSLEEPMS_HIGHRES -eq 0 ]
then
now_ms="$($LOOPSLEEP_DATE +'%s')000"
@@ -31,60 +23,167 @@ current_time_ms() {
fi
}
+# -----------------------------------------------------------------------------
+# use /proc/uptime as a high resolution timer
+
+current_time_ms_from_date
+current_time_ms_from_uptime_started="${now_ms}"
+current_time_ms_from_uptime_last="${now_ms}"
+current_time_ms_from_uptime_first=0
+current_time_ms_from_uptime() {
+ local up rest arr=() n
+
+ read up rest </proc/uptime
+ if [ $? -ne 0 ]
+ then
+ echo >&2 "$0: Cannot read /proc/uptime - falling back to current_time_ms_from_date()."
+ current_time_ms="current_time_ms_from_date"
+ current_time_ms_from_date
+ current_time_ms_accuracy=1
+ return
+ fi
+
+ arr=(${up//./ })
+
+ if [ ${#arr[1]} -lt 1 ]
+ then
+ n="${arr[0]}000"
+ elif [ ${#arr[1]} -lt 2 ]
+ then
+ n="${arr[0]}${arr[1]}00"
+ elif [ ${#arr[1]} -lt 3 ]
+ then
+ n="${arr[0]}${arr[1]}0"
+ else
+ n="${arr[0]}${arr[1]}"
+ fi
+
+ now_ms=$((current_time_ms_from_uptime_started - current_time_ms_from_uptime_first + n))
+
+ if [ "${now_ms}" -lt "${current_time_ms_from_uptime_last}" ]
+ then
+ echo >&2 "$0: Cannot use current_time_ms_from_uptime() - new time ${now_ms} is older than the last ${current_time_ms_from_uptime_last} - falling back to current_time_ms_from_date()."
+ current_time_ms="current_time_ms_from_date"
+ current_time_ms_from_date
+ current_time_ms_accuracy=1
+ fi
+
+ current_time_ms_from_uptime_last="${now_ms}"
+}
+current_time_ms_from_uptime
+current_time_ms_from_uptime_first="$((now_ms - current_time_ms_from_uptime_started))"
+current_time_ms_from_uptime_last="${current_time_ms_from_uptime_first}"
+current_time_ms="current_time_ms_from_uptime"
+current_time_ms_accuracy=10
+if [ "${current_time_ms_from_uptime_first}" -eq 0 ]
+ then
+ echo >&2 "$0: Invalid setup for current_time_ms_from_uptime() - falling back to current_time_ms_from_date()."
+ current_time_ms="current_time_ms_from_date"
+ current_time_ms_accuracy=1
+fi
+
+# -----------------------------------------------------------------------------
+# use read with timeout for sleep
+
+mysleep="mysleep_read"
+
+mysleep_fifo="${NETDATA_CACHE_DIR-/tmp}/.netdata_bash_sleep_timer_fifo"
+[ ! -e "${mysleep_fifo}" ] && mkfifo "${mysleep_fifo}"
+[ ! -e "${mysleep_fifo}" ] && mysleep="sleep"
+
+mysleep_read() {
+ read -t "${1}" <>"${mysleep_fifo}"
+ ret=$?
+ if [ $ret -le 128 ]
+ then
+ echo >&2 "$0: Cannot use read for sleeping (return code ${ret})."
+ mysleep="sleep"
+ ${mysleep} "${1}"
+ fi
+}
+
+
+# -----------------------------------------------------------------------------
+# this function is used to sleep a fraction of a second
+# it calculates the difference between every time is called
+# and tries to align the sleep time to give you exactly the
+# loop you need.
+
+LOOPSLEEPMS_LASTRUN=0
+LOOPSLEEPMS_NEXTRUN=0
+LOOPSLEEPMS_LASTSLEEP=0
+LOOPSLEEPMS_LASTWORK=0
+
loopsleepms() {
- local tellwork=0 t="$1" div s m now mstosleep
+ local tellwork=0 t="${1}" div s m now mstosleep
- if [ "$t" = "tellwork" ]
+ if [ "${t}" = "tellwork" ]
then
tellwork=1
shift
- t="$1"
+ t="${1}"
fi
- div="${2-100}"
# $t = the time in seconds to wait
# if high resolution is not supported
# just sleep the time requested, in seconds
- if [ $LOOPSLEEPMS_HIGHRES -eq 0 ]
+ if [ ${LOOPSLEEPMS_HIGHRES} -eq 0 ]
then
- sleep $t
+ sleep ${t}
return
fi
- # get the current time, in ms
- # milliseconds since epoch (1-1-1970)
- now="$(( $( $LOOPSLEEP_DATE +'%s * 1000 + %-N / 1000000' ) ))"
-
- # calculate required sleep in ms
- t=$((t * 1000 * div / 100))
-
- # this is our first run
- # just wait the requested time
- test $LOOPSLEEPMS_LASTRUN -eq 0 && LOOPSLEEPMS_LASTRUN=$now
+ # get the current time, in ms in ${now_ms}
+ ${current_time_ms}
# calculate ms since last run
- LOOPSLEEPMS_LASTWORK=$((now - LOOPSLEEPMS_LASTRUN - LOOPSLEEPMS_LASTSLEEP))
+ [ ${LOOPSLEEPMS_LASTRUN} -gt 0 ] && \
+ LOOPSLEEPMS_LASTWORK=$((now_ms - LOOPSLEEPMS_LASTRUN - LOOPSLEEPMS_LASTSLEEP))
# echo "# last loop's work took $LOOPSLEEPMS_LASTWORK ms"
+
+ # remember this run
+ LOOPSLEEPMS_LASTRUN=${now_ms}
+
+ # calculate the next run
+ LOOPSLEEPMS_NEXTRUN=$(( ( now_ms - ( now_ms % ( t * 1000 ) ) ) + ( t * 1000 ) ))
# calculate ms to sleep
- mstosleep=$(( t - LOOPSLEEPMS_LASTWORK ))
+ mstosleep=$(( LOOPSLEEPMS_NEXTRUN - now_ms + current_time_ms_accuracy ))
# echo "# mstosleep is $mstosleep ms"
# if we are too slow, sleep some time
- test $mstosleep -lt 200 && mstosleep=200
+ test ${mstosleep} -lt 200 && mstosleep=200
- s=$((mstosleep / 1000))
- m=$((mstosleep - (s * 1000)))
+ s=$(( mstosleep / 1000 ))
+ m=$(( mstosleep - (s * 1000) ))
+ [ "${m}" -lt 100 ] && m="0${m}"
+ [ "${m}" -lt 10 ] && m="0${m}"
- test $tellwork -eq 1 && echo >&2 " >>> PERFORMANCE >>> WORK TOOK $LOOPSLEEPMS_LASTWORK ms ( $((LOOPSLEEPMS_LASTWORK * 100 / 1000)).$((LOOPSLEEPMS_LASTWORK % 10))% cpu ) >>> SLEEPING $mstosleep ms"
+ test $tellwork -eq 1 && echo >&2 " >>> PERFORMANCE >>> WORK TOOK ${LOOPSLEEPMS_LASTWORK} ms ( $((LOOPSLEEPMS_LASTWORK * 100 / 1000)).$((LOOPSLEEPMS_LASTWORK % 10))% cpu ) >>> SLEEPING ${mstosleep} ms"
- # echo "# sleeping $s.$m"
+ # echo "# sleeping ${s}.${m}"
# echo
- sleep $s.$m
+ ${mysleep} ${s}.${m}
# keep the values we need
# for our next run
- LOOPSLEEPMS_LASTRUN=$now
LOOPSLEEPMS_LASTSLEEP=$mstosleep
}
+
+# test it
+#while [ 1 ]
+#do
+# r=$(( (RANDOM * 2000 / 32767) ))
+# s=$((r / 1000))
+# m=$((r - (s * 1000)))
+# [ "${m}" -lt 100 ] && m="0${m}"
+# [ "${m}" -lt 10 ] && m="0${m}"
+# echo "${r} = ${s}.${m}"
+#
+# # the work
+# ${mysleep} ${s}.${m}
+#
+# # the alignment loop
+# loopsleepms tellwork 1
+#done
diff --git a/plugins.d/tc-qos-helper.sh b/plugins.d/tc-qos-helper.sh
index bff5217d2..9caef85f7 100755
--- a/plugins.d/tc-qos-helper.sh
+++ b/plugins.d/tc-qos-helper.sh
@@ -27,16 +27,9 @@ if [ -f "${config_dir}/tc-qos-helper.conf" ]
source "${config_dir}/tc-qos-helper.conf"
fi
-# default time function
-now_ms=
-current_time_ms() {
- now_ms="$(date +'%s')000"
-}
-
# default sleep function
LOOPSLEEPMS_LASTWORK=0
loopsleepms() {
- [ "$1" = "tellwork" ] && shift
sleep $1
}