summaryrefslogtreecommitdiffstats
path: root/plugins.d
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xplugins.d/alarm-notify.sh172
-rwxr-xr-xplugins.d/cgroup-name.sh52
-rwxr-xr-xplugins.d/cgroup-network-helper.sh13
-rwxr-xr-xplugins.d/charts.d.plugin11
-rwxr-xr-xplugins.d/python.d.plugin6
5 files changed, 223 insertions, 31 deletions
diff --git a/plugins.d/alarm-notify.sh b/plugins.d/alarm-notify.sh
index 0af98095..3e23a164 100755
--- a/plugins.d/alarm-notify.sh
+++ b/plugins.d/alarm-notify.sh
@@ -16,6 +16,7 @@
# Supported notification methods:
# - emails by @ktsaou
# - slack.com notifications by @ktsaou
+# - alerta.io notifications by @kattunga
# - discordapp.com notifications by @lowfive
# - pushover.net notifications by @ktsaou
# - pushbullet.com push notifications by Tiago Peralta @tperalta82 #1070
@@ -119,7 +120,7 @@ docurl() {
echo >&2 "--- END curl command ---"
local out=$(mktemp /tmp/netdata-health-alarm-notify-XXXXXXXX)
- local code=$(${curl} --write-out %{http_code} --output "${out}" --silent --show-error "${@}")
+ local code=$(${curl} ${curl_options} --write-out %{http_code} --output "${out}" --silent --show-error "${@}")
local ret=$?
echo >&2 "--- BEGIN received response ---"
cat >&2 "${out}"
@@ -131,7 +132,7 @@ docurl() {
return ${ret}
fi
- ${curl} --write-out %{http_code} --output /dev/null --silent --show-error "${@}"
+ ${curl} ${curl_options} --write-out %{http_code} --output /dev/null --silent --show-error "${@}"
return $?
}
@@ -212,6 +213,9 @@ fi
# This can be overwritten at the configuration file.
images_base_url="https://registry.my-netdata.io"
+# curl options to use
+curl_options=
+
# needed commands
# if empty they will be searched in the system path
curl=
@@ -219,6 +223,7 @@ sendmail=
# enable / disable features
SEND_SLACK="YES"
+SEND_ALERTA="YES"
SEND_FLOCK="YES"
SEND_DISCORD="YES"
SEND_PUSHOVER="YES"
@@ -231,6 +236,7 @@ SEND_EMAIL="YES"
SEND_PUSHBULLET="YES"
SEND_KAFKA="YES"
SEND_PD="YES"
+SEND_IRC="YES"
SEND_CUSTOM="YES"
# slack configs
@@ -238,6 +244,12 @@ SLACK_WEBHOOK_URL=
DEFAULT_RECIPIENT_SLACK=
declare -A role_recipients_slack=()
+# alerta configs
+ALERTA_WEBHOOK_URL=
+ALERTA_API_KEY=
+DEFAULT_RECIPIENT_ALERTA=
+declare -A role_recipients_alerta=()
+
# flock configs
FLOCK_WEBHOOK_URL=
DEFAULT_RECIPIENT_FLOCK=
@@ -308,6 +320,13 @@ DEFAULT_RECIPIENT_EMAIL="root"
EMAIL_CHARSET=$(locale charmap 2>/dev/null)
declare -A role_recipients_email=()
+# irc configs
+IRC_NICKNAME=
+IRC_REALNAME=
+DEFAULT_RECIPIENT_IRC=
+IRC_NETWORK=
+declare -A role_recipients_irc=()
+
# load the user configuration
# this will overwrite the variables above
if [ -f "${NETDATA_CONFIG_DIR}/health_alarm_notify.conf" ]
@@ -386,6 +405,7 @@ filter_recipient_by_criticality() {
# find the recipients' addresses per method
declare -A arr_slack=()
+declare -A arr_alerta=()
declare -A arr_flock=()
declare -A arr_discord=()
declare -A arr_pushover=()
@@ -398,6 +418,7 @@ declare -A arr_email=()
declare -A arr_custom=()
declare -A arr_messagebird=()
declare -A arr_kavenegar=()
+declare -A arr_irc=()
# netdata may call us with multiple roles, and roles may have multiple but
# overlapping recipients - so, here we find the unique recipients.
@@ -479,6 +500,14 @@ do
[ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1"
done
+ # alerta
+ a="${role_recipients_alerta[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_ALERTA}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality alerta "${r}" && arr_alerta[${r/|*/}]="1"
+ done
+
# flock
a="${role_recipients_flock[${x}]}"
[ -z "${a}" ] && a="${DEFAULT_RECIPIENT_FLOCK}"
@@ -502,6 +531,14 @@ do
do
[ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1"
done
+
+ # irc
+ a="${role_recipients_irc[${x}]}"
+ [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_IRC}"
+ for r in ${a//,/ }
+ do
+ [ "${r}" != "disabled" ] && filter_recipient_by_criticality irc "${r}" && arr_irc[${r/|*/}]="1"
+ done
# custom
a="${role_recipients_custom[${x}]}"
@@ -517,6 +554,10 @@ done
to_slack="${!arr_slack[*]}"
[ -z "${to_slack}" ] && SEND_SLACK="NO"
+# build the list of alerta recipients (channels)
+to_alerta="${!arr_alerta[*]}"
+[ -z "${to_alerta}" ] && SEND_ALERTA="NO"
+
# build the list of flock recipients (channels)
to_flock="${!arr_flock[*]}"
[ -z "${to_flock}" ] && SEND_FLOCK="NO"
@@ -570,6 +611,9 @@ do
done
[ -z "${to_email}" ] && SEND_EMAIL="NO"
+# build the list of irc recipients (channels)
+to_irc="${!arr_irc[*]}"
+[ -z "${to_irc}" ] && SEND_IRC="NO"
# -----------------------------------------------------------------------------
# verify the delivery methods supported
@@ -577,6 +621,9 @@ done
# check slack
[ -z "${SLACK_WEBHOOK_URL}" ] && SEND_SLACK="NO"
+# check alerta
+[ -z "${ALERTA_WEBHOOK_URL}" ] && SEND_ALERTA="NO"
+
# check flock
[ -z "${FLOCK_WEBHOOK_URL}" ] && SEND_FLOCK="NO"
@@ -607,6 +654,9 @@ done
# check kafka
[ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO"
+# check irc
+[ -z "${IRC_NETWORK}" ] && SEND_IRC="NO"
+
# check pagerduty.com
# if we need pd-send, check for the pd-send command
# https://www.pagerduty.com/docs/guides/agent-install-guide/
@@ -624,6 +674,7 @@ fi
if [ \( \
"${SEND_PUSHOVER}" = "YES" \
-o "${SEND_SLACK}" = "YES" \
+ -o "${SEND_ALERTA}" = "YES" \
-o "${SEND_FLOCK}" = "YES" \
-o "${SEND_DISCORD}" = "YES" \
-o "${SEND_HIPCHAT}" = "YES" \
@@ -644,6 +695,7 @@ if [ \( \
SEND_PUSHBULLET="NO"
SEND_TELEGRAM="NO"
SEND_SLACK="NO"
+ SEND_ALERTA="NO"
SEND_FLOCK="NO"
SEND_DISCORD="NO"
SEND_TWILIO="NO"
@@ -671,6 +723,7 @@ if [ "${SEND_EMAIL}" != "YES" \
-a "${SEND_PUSHOVER}" != "YES" \
-a "${SEND_TELEGRAM}" != "YES" \
-a "${SEND_SLACK}" != "YES" \
+ -a "${SEND_ALERTA}" != "YES" \
-a "${SEND_FLOCK}" != "YES" \
-a "${SEND_DISCORD}" != "YES" \
-a "${SEND_TWILIO}" != "YES" \
@@ -681,6 +734,7 @@ if [ "${SEND_EMAIL}" != "YES" \
-a "${SEND_KAFKA}" != "YES" \
-a "${SEND_PD}" != "YES" \
-a "${SEND_CUSTOM}" != "YES" \
+ -a "${SEND_IRC}" != "YES" \
]
then
fatal "All notification methods are disabled. Not sending notification for host '${host}', chart '${chart}' to '${roles}' for '${name}' = '${value}' for status '${status}'."
@@ -954,7 +1008,7 @@ send_pd() {
${pd_send} -k ${PD_SERVICE_KEY} \
-t ${t} \
-d "${d}" \
- -i ${alarm_id} \
+ -i ${host}:${chart}:${name} \
-f 'info'="${info}" \
-f 'value_w_units'="${value_string}" \
-f 'when'="${when}" \
@@ -1029,6 +1083,10 @@ send_twilio() {
send_hipchat() {
local authtoken="${1}" recipients="${2}" message="${3}" httpcode sent=0 room color sender msg_format notify
+ # remove <small></small> from the message
+ message="${message//<small>/}"
+ message="${message//<\/small>/}"
+
if [ "${SEND_HIPCHAT}" = "YES" -a ! -z "${HIPCHAT_SERVER}" -a ! -z "${authtoken}" -a ! -z "${recipients}" -a ! -z "${message}" ]
then
# A label to be shown in addition to the sender's name
@@ -1248,6 +1306,53 @@ EOF
}
# -----------------------------------------------------------------------------
+# alerta sender
+
+send_alerta() {
+ local webhook="${1}" channels="${2}" httpcode sent=0 channel severity content
+
+ [ "${SEND_ALERTA}" != "YES" ] && return 1
+
+ case "${status}" in
+ WARNING) severity="warning" ;;
+ CRITICAL) severity="critical" ;;
+ CLEAR) severity="cleared" ;;
+ *) severity="unknown" ;;
+ esac
+
+ info=$( echo -n ${info})
+
+ # the "event" property must be unique and repetible between states to let alerta do automatic correlation using severity value
+ for channel in ${channels}
+ do
+ content="{"
+ content="$content \"environment\": \"${channel}\","
+ content="$content \"service\": [\"${host}\"],"
+ content="$content \"resource\": \"${host}\","
+ content="$content \"event\": \"${name}.${chart} (${family})\","
+ content="$content \"severity\": \"${severity}\","
+ content="$content \"value\": \"${alarm}\","
+ content="$content \"text\": \"${info}\""
+ content="$content }"
+
+
+ httpcode=$(docurl -X POST "${webhook}/alert" -H "Content-Type: application/json" -H "Authorization: Key $ALERTA_API_KEY" -d "$content" )
+
+ if [[ "${httpcode}" = "200" || "${httpcode}" = "201" ]]
+ then
+ info "sent alerta notification for: ${host} ${chart}.${name} is ${status} to '${channel}'"
+ sent=$((sent + 1))
+ else
+ error "failed to send alerta notification for: ${host} ${chart}.${name} is ${status} to '${channel}', with HTTP error code ${httpcode}."
+ fi
+ done
+
+ [ ${sent} -gt 0 ] && return 0
+
+ return 1
+}
+
+# -----------------------------------------------------------------------------
# flock sender
send_flock() {
@@ -1365,6 +1470,46 @@ EOF
return 1
}
+# -----------------------------------------------------------------------------
+# irc sender
+
+send_irc() {
+ local NICKNAME="${1}" REALNAME="${2}" CHANNELS="${3}" NETWORK="${4}" SERVERNAME="${5}" MESSAGE="${6}" sent=0 channel color send_alarm reply_codes error
+
+ if [ "${SEND_IRC}" = "YES" -a ! -z "${NICKNAME}" -a ! -z "${REALNAME}" -a ! -z "${CHANNELS}" -a ! -z "${NETWORK}" -a ! -z "${SERVERNAME}" ]
+ then
+ case "${status}" in
+ WARNING) color="warning" ;;
+ CRITICAL) color="danger" ;;
+ CLEAR) color="good" ;;
+ *) color="#777777" ;;
+ esac
+
+ for CHANNEL in ${CHANNELS}
+ do
+ error=0
+ send_alarm=$(echo -e "USER ${NICKNAME} guest ${REALNAME} ${SERVERNAME}\nNICK ${NICKNAME}\nJOIN ${CHANNEL}\nPRIVMSG ${CHANNEL} :${MESSAGE}\nQUIT\n" \ | nc ${NETWORK} 6667)
+ reply_codes=$(echo ${send_alarm} | cut -d ' ' -f 2 | grep -o '[0-9]*')
+ for code in ${reply_codes}
+ do
+ [ "${code}" -ge 400 -a "${code}" -le 599 ] && error=1 && break
+ done
+
+ if [ "${error}" -eq 0 ]
+ then
+ info "sent irc notification for: ${host} ${chart}.${name} is ${status} to '${CHANNEL}'"
+ sent=$((sent + 1))
+ else
+ error "failed to send irc notification for: ${host} ${chart}.${name} is ${status} to '${CHANNEL}', with error code ${code}."
+ fi
+ done
+ fi
+
+ [ ${sent} -gt 0 ] && return 0
+
+ return 1
+}
+
# -----------------------------------------------------------------------------
# prepare the content of the notification
@@ -1466,6 +1611,15 @@ send_slack "${SLACK_WEBHOOK_URL}" "${to_slack}"
SENT_SLACK=$?
# -----------------------------------------------------------------------------
+# send the alerta notification
+
+# alerta aggregates posts from the same username
+# so we use "${host} ${status}" as the bot username, to make them diff
+
+send_alerta "${ALERTA_WEBHOOK_URL}" "${to_alerta}"
+SENT_ALERTA=$?
+
+# -----------------------------------------------------------------------------
# send the flock notification
# flock aggregates posts from the same username
@@ -1570,6 +1724,16 @@ SENT_KAFKA=$?
send_pd "${to_pd}"
SENT_PD=$?
+# -----------------------------------------------------------------------------
+# send the irc message
+
+send_irc "${IRC_NICKNAME}" "${IRC_REALNAME}" "${to_irc}" "${IRC_NETWORK}" "${host}" "${host} ${status_message} - ${name//_/ } - ${chart} ----- ${alarm}
+Severity: ${severity}
+Chart: ${chart}
+Family: ${family}
+${info}"
+
+SENT_IRC=$?
# -----------------------------------------------------------------------------
# send the custom message
@@ -1733,6 +1897,7 @@ if [ ${SENT_EMAIL} -eq 0 \
-o ${SENT_PUSHOVER} -eq 0 \
-o ${SENT_TELEGRAM} -eq 0 \
-o ${SENT_SLACK} -eq 0 \
+ -o ${SENT_ALERTA} -eq 0 \
-o ${SENT_FLOCK} -eq 0 \
-o ${SENT_DISCORD} -eq 0 \
-o ${SENT_TWILIO} -eq 0 \
@@ -1742,6 +1907,7 @@ if [ ${SENT_EMAIL} -eq 0 \
-o ${SENT_PUSHBULLET} -eq 0 \
-o ${SENT_KAFKA} -eq 0 \
-o ${SENT_PD} -eq 0 \
+ -o ${SENT_IRC} -eq 0 \
-o ${SENT_CUSTOM} -eq 0 \
]
then
diff --git a/plugins.d/cgroup-name.sh b/plugins.d/cgroup-name.sh
index acdd6f4f..3c8ad720 100755
--- a/plugins.d/cgroup-name.sh
+++ b/plugins.d/cgroup-name.sh
@@ -94,6 +94,23 @@ function docker_get_name_api {
return 0
}
+function docker_get_name {
+ local id="${1}"
+ if hash docker 2>/dev/null
+ then
+ docker_get_name_classic "${id}"
+ else
+ docker_get_name_api "${id}" || docker_get_name_classic "${id}"
+ fi
+ if [ -z "${NAME}" ]
+ then
+ warning "cannot find the name of docker container '${id}'"
+ NAME="${id:0:12}"
+ else
+ info "docker container '${id}' is named '${NAME}'"
+ fi
+}
+
if [ -z "${NAME}" ]
then
if [[ "${CGROUP}" =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]
@@ -105,20 +122,29 @@ if [ -z "${NAME}" ]
if [ ! -z "${DOCKERID}" -a \( ${#DOCKERID} -eq 64 -o ${#DOCKERID} -eq 12 \) ]
then
- if hash docker 2>/dev/null
- then
- docker_get_name_classic ${DOCKERID}
- else
- docker_get_name_api ${DOCKERID} || docker_get_name_classic ${DOCKERID}
- fi
- if [ -z "${NAME}" ]
- then
- warning "cannot find the name of docker container '${DOCKERID}'"
- NAME="${DOCKERID:0:12}"
- else
- info "docker container '${DOCKERID}' is named '${NAME}'"
- fi
+ docker_get_name "${DOCKERID}"
+ else
+ error "a docker id cannot be extracted from docker cgroup '${CGROUP}'."
+ fi
+ elif [[ "${CGROUP}" =~ ^.*kubepods[_/].*[_/]pod[a-fA-F0-9-]+[_/][a-fA-F0-9]+$ ]]
+ then
+ # kubernetes
+
+ DOCKERID="$( echo "${CGROUP}" | sed "s|^.*kubepods[_/].*[_/]pod[a-fA-F0-9-]\+[_/]\([a-fA-F0-9]\+\)$|\1|" )"
+ # echo "DOCKERID=${DOCKERID}"
+
+ if [ ! -z "${DOCKERID}" -a \( ${#DOCKERID} -eq 64 -o ${#DOCKERID} -eq 12 \) ]
+ then
+ docker_get_name "${DOCKERID}"
+ else
+ error "a docker id cannot be extracted from kubernetes cgroup '${CGROUP}'."
fi
+ elif [[ "${CGROUP}" =~ machine.slice[_/].*\.service ]]
+ then
+ # systemd-nspawn
+
+ NAME="$(echo ${CGROUP} | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')"
+
elif [[ "${CGROUP}" =~ machine.slice_machine.*-qemu ]]
then
# libvirtd / qemu virtual machines
diff --git a/plugins.d/cgroup-network-helper.sh b/plugins.d/cgroup-network-helper.sh
index d93fe356..f0705998 100755
--- a/plugins.d/cgroup-network-helper.sh
+++ b/plugins.d/cgroup-network-helper.sh
@@ -22,7 +22,9 @@
# -----------------------------------------------------------------------------
-export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
+# the system path is cleared by cgroup-network
+[ -f /etc/profile ] && source /etc/profile
+
export LC_ALL=C
PROGRAM_NAME="$(basename "${0}")"
@@ -68,13 +70,6 @@ debug() {
fatal "BASH version 4 or later is required (this is ${BASH_VERSION})."
# -----------------------------------------------------------------------------
-# defaults to allow running this script by hand
-
-[ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")"
-[ -z "${NETDATA_CONFIG_DIR}" ] && NETDATA_CONFIG_DIR="$(dirname "${0}")/../../../../etc/netdata"
-[ -z "${NETDATA_CACHE_DIR}" ] && NETDATA_CACHE_DIR="$(dirname "${0}")/../../../../var/cache/netdata"
-
-# -----------------------------------------------------------------------------
# parse the arguments
pid=
@@ -172,7 +167,7 @@ virsh_find_all_interfaces_for_cgroup() {
# match only 'network' interfaces from virsh output
set_source "virsh"
- "${virsh}" domiflist ${d} |\
+ "${virsh}" -r domiflist ${d} |\
sed -n \
-e "s|^\([^[:space:]]\+\)[[:space:]]\+network[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" \
-e "s|^\([^[:space:]]\+\)[[:space:]]\+bridge[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p"
diff --git a/plugins.d/charts.d.plugin b/plugins.d/charts.d.plugin
index c36a0cde..9bd03fd4 100755
--- a/plugins.d/charts.d.plugin
+++ b/plugins.d/charts.d.plugin
@@ -477,6 +477,7 @@ all_enabled_charts() {
# -----------------------------------------------------------------------------
# load the charts
+suffix_retries="_retries"
suffix_update_every="_update_every"
active_charts=
for chart in $( all_enabled_charts )
@@ -582,7 +583,7 @@ debug "run_charts='$run_charts'"
[ -z "$run_charts" ] && fatal "No charts to collect data from."
-declare -A charts_last_update=() charts_update_every=() charts_next_update=() charts_run_counter=() charts_serial_failures=()
+declare -A charts_last_update=() charts_update_every=() charts_retries=() charts_next_update=() charts_run_counter=() charts_serial_failures=()
global_update() {
local exit_at \
c=0 dt ret last_ms exec_start_ms exec_end_ms \
@@ -597,7 +598,11 @@ global_update() {
for chart in $run_charts
do
eval "charts_update_every[$chart]=\$$chart$suffix_update_every"
- test -z "${charts_update_every[$chart]}" && charts_update_every[$charts]=$update_every
+ test -z "${charts_update_every[$chart]}" && charts_update_every[$chart]=$update_every
+
+ eval "charts_retries[$chart]=\$$chart$suffix_retries"
+ test -z "${charts_retries[$chart]}" && charts_retries[$chart]=10
+
charts_last_update[$chart]=$((now_ms - (now_ms % (charts_update_every[$chart] * 1000) ) ))
charts_next_update[$chart]=$(( charts_last_update[$chart] + (charts_update_every[$chart] * 1000) ))
charts_run_counter[$chart]=0
@@ -660,7 +665,7 @@ global_update() {
else
charts_serial_failures[$chart]=$(( charts_serial_failures[$chart] + 1 ))
- if [ ${charts_serial_failures[$chart]} -gt 10 ]
+ if [ ${charts_serial_failures[$chart]} -gt ${charts_retries[$chart]} ]
then
error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it."
else
diff --git a/plugins.d/python.d.plugin b/plugins.d/python.d.plugin
index 855080e8..c9b26016 100755
--- a/plugins.d/python.d.plugin
+++ b/plugins.d/python.d.plugin
@@ -196,7 +196,7 @@ class Plugin(object):
self.runs_counter = 0
self.config, error = self.loader.load_config_from_file(PLUGIN_CONFIG_DIR + 'python.d.conf')
if error:
- run_and_exit(Logger.error)(error)
+ Logger.error('"python.d.conf" configuration file not found. Using defaults.')
if not self.config.get('enabled', True):
run_and_exit(Logger.info)('DISABLED in configuration file.')
@@ -316,10 +316,10 @@ class Plugin(object):
job.checked = True
continue
if not job.is_autodetect() or ok is None:
- job.error('check() => [FAILED]')
+ job.info('check() => [FAILED]')
self.delete_job(job)
else:
- job.error('check() => [RECHECK] (autodetection_retry: {0})'.format(job.recheck_every))
+ job.info('check() => [RECHECK] (autodetection_retry: {0})'.format(job.recheck_every))
def run_create(self):
for job in self.jobs: