diff options
author | Lennart Weller <lhw@ring0.de> | 2017-01-24 15:21:16 +0000 |
---|---|---|
committer | Lennart Weller <lhw@ring0.de> | 2017-01-24 15:21:16 +0000 |
commit | ef0c127e7f95d2db2715b9e99fe758eebc7dabd3 (patch) | |
tree | ea5d62342aba06f376f3be63aab898503b56f3ec /plugins.d | |
parent | update watch file and files-exclude (diff) | |
parent | New upstream version 1.5.0+dfsg (diff) | |
download | netdata-ef0c127e7f95d2db2715b9e99fe758eebc7dabd3.tar.xz netdata-ef0c127e7f95d2db2715b9e99fe758eebc7dabd3.zip |
Merge tag 'upstream/1.5.0+dfsg'
Upstream version 1.5.0+dfsg
Diffstat (limited to 'plugins.d')
-rw-r--r-- | plugins.d/Makefile.am | 2 | ||||
-rw-r--r-- | plugins.d/Makefile.in | 27 | ||||
-rwxr-xr-x | plugins.d/alarm-notify.sh | 652 | ||||
-rwxr-xr-x | plugins.d/alarm-test.sh | 12 | ||||
-rwxr-xr-x | plugins.d/cgroup-name.sh | 69 | ||||
-rwxr-xr-x | plugins.d/charts.d.plugin | 271 | ||||
-rwxr-xr-x | plugins.d/fping.plugin | 178 | ||||
-rw-r--r-- | plugins.d/loopsleepms.sh.inc | 4 | ||||
-rwxr-xr-x | plugins.d/node.d.plugin | 19 | ||||
-rwxr-xr-x | plugins.d/python.d.plugin | 39 | ||||
-rwxr-xr-x | plugins.d/tc-qos-helper.sh | 127 |
11 files changed, 1197 insertions, 203 deletions
diff --git a/plugins.d/Makefile.am b/plugins.d/Makefile.am index 4bc0dc447..7d3bc44f7 100644 --- a/plugins.d/Makefile.am +++ b/plugins.d/Makefile.am @@ -10,9 +10,11 @@ dist_plugins_DATA = \ dist_plugins_SCRIPTS = \ alarm-email.sh \ alarm-notify.sh \ + alarm-test.sh \ cgroup-name.sh \ charts.d.dryrun-helper.sh \ charts.d.plugin \ + fping.plugin \ node.d.plugin \ python.d.plugin \ tc-qos-helper.sh \ diff --git a/plugins.d/Makefile.in b/plugins.d/Makefile.in index 1854ea861..7e90c9808 100644 --- a/plugins.d/Makefile.in +++ b/plugins.d/Makefile.in @@ -1,7 +1,7 @@ -# Makefile.in generated by automake 1.14.1 from Makefile.am. +# Makefile.in generated by automake 1.15 from Makefile.am. # @configure_input@ -# Copyright (C) 1994-2013 Free Software Foundation, Inc. +# Copyright (C) 1994-2014 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -16,7 +16,17 @@ VPATH = @srcdir@ -am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ @@ -80,18 +90,19 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = plugins.d -DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ - $(dist_plugins_SCRIPTS) $(dist_plugins_DATA) ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \ $(top_srcdir)/m4/ax_c__generic.m4 \ $(top_srcdir)/m4/ax_c_mallinfo.m4 \ $(top_srcdir)/m4/ax_c_mallopt.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/jemalloc.m4 \ $(top_srcdir)/m4/tcmalloc.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_plugins_SCRIPTS) \ + $(dist_plugins_DATA) $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = @@ -146,6 +157,7 @@ am__can_run_installinfo = \ esac DATA = $(dist_plugins_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ @@ -295,9 +307,11 @@ dist_plugins_DATA = \ dist_plugins_SCRIPTS = \ alarm-email.sh \ alarm-notify.sh \ + alarm-test.sh \ cgroup-name.sh \ charts.d.dryrun-helper.sh \ charts.d.plugin \ + fping.plugin \ node.d.plugin \ python.d.plugin \ tc-qos-helper.sh \ @@ -319,7 +333,6 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu plugins.d/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu plugins.d/Makefile -.PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ @@ -549,6 +562,8 @@ uninstall-am: uninstall-dist_pluginsDATA uninstall-dist_pluginsSCRIPTS pdf-am ps ps-am tags-am uninstall uninstall-am \ uninstall-dist_pluginsDATA uninstall-dist_pluginsSCRIPTS +.PRECIOUS: Makefile + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/plugins.d/alarm-notify.sh b/plugins.d/alarm-notify.sh index feec6ceae..d6f3d8b2a 100755 --- a/plugins.d/alarm-notify.sh +++ b/plugins.d/alarm-notify.sh @@ -5,7 +5,7 @@ # (C) 2016 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # -# Script the send alarm notifications for netdata +# Script to send alarm notifications for netdata # # Features: # - multiple notification methods @@ -14,20 +14,103 @@ # - severity filtering per recipient # # Supported notification methods: -# - emails -# - pushover.net notifications -# - slack.com notifications -# - telegram.org notifications -# +# - emails by @ktsaou +# - slack.com notifications by @ktsaou +# - pushover.net notifications by @ktsaou +# - pushbullet.com push notifications by Tiago Peralta @tperalta82 PR #1070 +# - telegram.org notifications by @hashworks PR #1002 +# - twilio.com notifications by Levi Blaney @shadycuz PR #1211 +# - kafka notifications by @ktsaou #1342 +# - pagerduty.com notifications by Jim Cooley @jimcooley PR #1373 +# - messagebird.com notifications by @tech_no_logical #1453 +# - hipchart notifications by @ktsaou #1561 + +# ----------------------------------------------------------------------------- +# testing notifications + +if [ \( "${1}" = "test" -o "${2}" = "test" \) -a "${#}" -le 2 ] +then + if [ "${2}" = "test" ] + then + recipient="${1}" + else + recipient="${2}" + fi + + [ -z "${recipient}" ] && recipient="sysadmin" + + id=1 + last="CLEAR" + for x in "CRITICAL" "WARNING" "CLEAR" + do + echo >&2 + echo >&2 "# SENDING TEST ${x} ALARM TO ROLE: ${recipient}" + + "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work" + if [ $? -ne 0 ] + then + echo >&2 "# FAILED" + else + echo >&2 "# OK" + fi + + last="${x}" + id=$((id + 1)) + done + + exit 1 +fi + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" -me="${0}" +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ ${debug} -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- # check for BASH v4+ (required for associative arrays) [ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] && \ - echo >&2 "${me}: BASH version 4 or later is required (this is ${BASH_VERSION})." && \ - exit 1 + fatal "BASH version 4 or later is required (this is ${BASH_VERSION})." +# ----------------------------------------------------------------------------- # defaults to allow running this script by hand + NETDATA_CONFIG_DIR="${NETDATA_CONFIG_DIR-/etc/netdata}" NETDATA_CACHE_DIR="${NETDATA_CACHE_DIR-/var/cache/netdata}" [ -z "${NETDATA_REGISTRY_URL}" ] && NETDATA_REGISTRY_URL="https://registry.my-netdata.io" @@ -62,14 +145,14 @@ info="${18}" # a short description of the alarm # don't do anything if this is not WARNING, CRITICAL or CLEAR if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ] then - echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}'" + info "not sending notification for ${status} on '${chart}.${name}'" exit 1 fi # don't do anything if this is CLEAR, but it was not WARNING or CRITICAL if [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ] then - echo >&2 "${me}: not sending notification for ${status} on '${chart}.${name}' (last status was ${old_status})" + info "not sending notification for ${status} on '${chart}.${name}' (last status was ${old_status})" exit 1 fi @@ -90,8 +173,14 @@ sendmail= # enable / disable features SEND_SLACK="YES" SEND_PUSHOVER="YES" +SEND_TWILIO="YES" +SEND_HIPCHAT="YES" +SEND_MESSAGEBIRD="YES" SEND_TELEGRAM="YES" SEND_EMAIL="YES" +SEND_PUSHBULLET="YES" +SEND_KAFKA="YES" +SEND_PD="YES" # slack configs SLACK_WEBHOOK_URL= @@ -103,11 +192,42 @@ PUSHOVER_APP_TOKEN= DEFAULT_RECIPIENT_PUSHOVER= declare -A role_recipients_pushover=() +# pushbullet configs +PUSHBULLET_ACCESS_TOKEN= +DEFAULT_RECIPIENT_PUSHBULLET= +declare -A role_recipients_pushbullet=() + +# twilio configs +TWILIO_ACCOUNT_SID= +TWILIO_ACCOUNT_TOKEN= +TWILIO_NUMBER= +DEFAULT_RECIPIENT_TWILIO= +declare -A role_recipients_twilio=() + +# hipchat configs +HIPCHAT_AUTH_TOKEN= +DEFAULT_RECIPIENT_HIPCHAT= +declare -A role_recipients_hipchat=() + +# messagebird configs +MESSAGEBIRD_ACCESS_KEY= +MESSAGEBIRD_NUMBER= +DEFAULT_RECIPIENT_MESSAGEBIRD= +declare -A role_recipients_messagebird=() + # telegram configs TELEGRAM_BOT_TOKEN= DEFAULT_RECIPIENT_TELEGRAM= declare -A role_recipients_telegram=() +# kafka configs +KAFKA_URL= +KAFKA_SENDER_IP= + +# pagerduty.com configs +PD_SERVICE_KEY= +declare -A role_recipients_pd=() + # email configs DEFAULT_RECIPIENT_EMAIL="root" declare -A role_recipients_email=() @@ -164,7 +284,11 @@ filter_recipient_by_criticality() { declare -A arr_slack=() declare -A arr_pushover=() +declare -A arr_pushbullet=() +declare -A arr_twilio=() +declare -A arr_hipchat=() declare -A arr_telegram=() +declare -A arr_pd=() declare -A arr_email=() # netdata may call us with multiple roles, and roles may have multiple but @@ -191,6 +315,38 @@ do [ "${r}" != "disabled" ] && filter_recipient_by_criticality pushover "${r}" && arr_pushover[${r/|*/}]="1" done + # pushbullet + a="${role_recipients_pushbullet[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PUSHBULLET}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality pushbullet "${r}" && arr_pushbullet[${r/|*/}]="1" + done + + # twilio + a="${role_recipients_twilio[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_TWILIO}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality twilio "${r}" && arr_twilio[${r/|*/}]="1" + done + + # hipchat + a="${role_recipients_hipchat[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_HIPCHAT}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality hipchat "${r}" && arr_hipchat[${r/|*/}]="1" + done + + # messagebird + a="${role_recipients_messagebird[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_MESSAGEBIRD}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality messagebird "${r}" && arr_messagebird[${r/|*/}]="1" + done + # telegram a="${role_recipients_telegram[${x}]}" [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_TELEGRAM}" @@ -206,6 +362,14 @@ do do [ "${r}" != "disabled" ] && filter_recipient_by_criticality slack "${r}" && arr_slack[${r/|*/}]="1" done + + # pagerduty.com + a="${role_recipients_pd[${x}]}" + [ -z "${a}" ] && a="${DEFAULT_RECIPIENT_PD}" + for r in ${a//,/ } + do + [ "${r}" != "disabled" ] && filter_recipient_by_criticality pd "${r}" && arr_pd[${r/|*/}]="1" + done done # build the list of slack recipients (channels) @@ -216,10 +380,30 @@ to_slack="${!arr_slack[*]}" to_pushover="${!arr_pushover[*]}" [ -z "${to_pushover}" ] && SEND_PUSHOVER="NO" +# build the list of pushbulet recipients (user tokens) +to_pushbullet="${!arr_pushbullet[*]}" +[ -z "${to_pushbullet}" ] && SEND_PUSHBULLET="NO" + +# build the list of twilio recipients (phone numbers) +to_twilio="${!arr_twilio[*]}" +[ -z "${to_twilio}" ] && SEND_TWILIO="NO" + +# build the list of hipchat recipients (rooms) +to_hipchat="${!arr_hipchat[*]}" +[ -z "${to_hipchat}" ] && SEND_HIPCHAT="NO" + +# build the list of messagebird recipients (phone numbers) +to_messagebird="${!arr_messagebird[*]}" +[ -z "${to_messagebird}" ] && SEND_MESSAGEBIRD="NO" + # check array of telegram recipients (chat ids) to_telegram="${!arr_telegram[*]}" [ -z "${to_telegram}" ] && SEND_TELEGRAM="NO" +# build the list of pagerduty recipients (service keys) +to_pd="${!arr_pd[*]}" +[ -z "${to_pd}" ] && SEND_PD="NO" + # build the list of email recipients (email addresses) to_email= for x in "${!arr_email[@]}" @@ -239,20 +423,67 @@ done # check pushover [ -z "${PUSHOVER_APP_TOKEN}" ] && SEND_PUSHOVER="NO" +# check pushbullet +[ -z "${PUSHBULLET_ACCESS_TOKEN}" ] && SEND_PUSHBULLET="NO" + +# check twilio +[ -z "${TWILIO_ACCOUNT_TOKEN}" -o -z "${TWILIO_ACCOUNT_SID}" -o -z "${TWILIO_NUMBER}" ] && SEND_TWILIO="NO" + +# check hipchat +[ -z "${HIPCHAT_AUTH_TOKEN}" ] && SEND_HIPCHAT="NO" + +# check messagebird +[ -z "${MESSAGEBIRD_ACCESS_KEY}" -o -z "${MESSAGEBIRD_NUMBER}" ] && SEND_MESSAGEBIRD="NO" + # check telegram [ -z "${TELEGRAM_BOT_TOKEN}" ] && SEND_TELEGRAM="NO" -if [ \( "${SEND_PUSHOVER}" = "YES" -o "${SEND_SLACK}" = "YES" -o "${SEND_TELEGRAM}" = "YES" \) -a -z "${curl}" ] +# check kafka +[ -z "${KAFKA_URL}" -o -z "${KAFKA_SENDER_IP}" ] && SEND_KAFKA="NO" + +# check pagerduty.com +# if we need pd-send, check for the pd-send command +# https://www.pagerduty.com/docs/guides/agent-install-guide/ +if [ "${SEND_PD}" = "YES" ] + then + pd_send="$(which pd-send 2>/dev/null || command -v pd-send 2>/dev/null)" + if [ -z "${pd_send}" ] + then + # no pd-send available + # disable pagerduty.com + SEND_PD="NO" + fi +fi + +# if we need curl, check for the curl command +if [ \( \ + "${SEND_PUSHOVER}" = "YES" \ + -o "${SEND_SLACK}" = "YES" \ + -o "${SEND_HIPCHAT}" = "YES" \ + -o "${SEND_TWILIO}" = "YES" \ + -o "${SEND_MESSAGEBIRD}" = "YES" \ + -o "${SEND_TELEGRAM}" = "YES" \ + -o "${SEND_PUSHBULLET}" = "YES" \ + -o "${SEND_KAFKA}" = "YES" \ + \) -a -z "${curl}" ] then curl="$(which curl 2>/dev/null || command -v curl 2>/dev/null)" if [ -z "${curl}" ] then + # no curl available + # disable all curl based methods SEND_PUSHOVER="NO" + SEND_PUSHBULLET="NO" SEND_TELEGRAM="NO" SEND_SLACK="NO" + SEND_TWILIO="NO" + SEND_HIPCHAT="NO" + SEND_MESSAGEBIRD="NO" + SEND_KAFKA="NO" fi fi +# if we need sendmail, check for the sendmail command if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ] then sendmail="$(which sendmail 2>/dev/null || command -v sendmail 2>/dev/null)" @@ -260,14 +491,23 @@ if [ "${SEND_EMAIL}" = "YES" -a -z "${sendmail}" ] fi # check that we have at least a method enabled -if [ "${SEND_EMAIL}" != "YES" -a "${SEND_PUSHOVER}" != "YES" -a "${SEND_TELEGRAM}" != "YES" -a "${SEND_SLACK}" != "YES" ] +if [ "${SEND_EMAIL}" != "YES" \ + -a "${SEND_PUSHOVER}" != "YES" \ + -a "${SEND_TELEGRAM}" != "YES" \ + -a "${SEND_SLACK}" != "YES" \ + -a "${SEND_TWILIO}" != "YES" \ + -a "${SEND_HIPCHAT}" != "YES" \ + -a "${SEND_MESSAGEBIRD}" != "YES" \ + -a "${SEND_PUSHBULLET}" != "YES" \ + -a "${SEND_KAFKA}" != "YES" \ + -a "${SEND_PD}" != "YES" \ + ] then - echo >&2 "All notification methods are disabled. Not sending a notification." - exit 1 + fatal "All notification methods are disabled. Not sending notification to '${roles}' for '${name}' = '${value}' of chart '${chart}' for status '${status}'." fi # ----------------------------------------------------------------------------- -# get the system hostname +# find a suitable hostname to use, if netdata did not supply a hostname [ -z "${host}" ] && host="${NETDATA_HOSTNAME}" [ -z "${host}" ] && host="${NETDATA_REGISTRY_HOSTNAME}" @@ -280,7 +520,7 @@ date="$(date --date=@${when} 2>/dev/null)" [ -z "${date}" ] && date="$(date 2>/dev/null)" # ----------------------------------------------------------------------------- -# URL encode a string +# function to URL encode a string urlencode() { local string="${1}" strlen encoded pos c o @@ -288,14 +528,14 @@ urlencode() { strlen=${#string} for (( pos=0 ; pos<strlen ; pos++ )) do - c=${string:$pos:1} - case "$c" in + c=${string:${pos}:1} + case "${c}" in [-_.~a-zA-Z0-9]) o="${c}" ;; *) - printf -v o '%%%02x' "'$c" + printf -v o '%%%02x' "'${c}" ;; esac encoded+="${o}" @@ -306,7 +546,7 @@ urlencode() { } # ----------------------------------------------------------------------------- -# convert a duration in seconds, to a human readable duration +# function to convert a duration in seconds, to a human readable duration # using DAYS, MINUTES, SECONDS duration4human() { @@ -370,12 +610,12 @@ send_email() { "${sendmail}" -t ret=$? - if [ $ret -eq 0 ] + if [ ${ret} -eq 0 ] then - echo >&2 "${me}: Sent email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}'" + info "sent email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}'" return 0 else - echo >&2 "${me}: Failed to send email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}' with error code ${ret}." + error "failed to send email notification for: ${host} ${chart}.${name} is ${status} to '${to_email}' with error code ${ret}." return 1 fi fi @@ -417,10 +657,10 @@ send_pushover() { if [ "${httpcode}" == "200" ] then - echo >&2 "${me}: Sent pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}'" + info "sent pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}'" sent=$((sent + 1)) else - echo >&2 "${me}: Failed to send pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}." + error "failed to send pushover notification for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}." fi done @@ -430,6 +670,242 @@ send_pushover() { return 1 } +# ----------------------------------------------------------------------------- +# pushbullet sender + +send_pushbullet() { + local userapikey="${1}" recipients="${2}" title="${3}" message="${4}" httpcode sent=0 user + if [ "${SEND_PUSHBULLET}" = "YES" -a ! -z "${userapikey}" -a ! -z "${recipients}" -a ! -z "${message}" -a ! -z "${title}" ] + then + #https://docs.pushbullet.com/#create-push + for user in ${recipients} + do + httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null \ + --header 'Access-Token: '${userapikey}'' \ + --header 'Content-Type: application/json' \ + --data-binary @<(cat <<EOF + {"title": "${title}", + "type": "note", + "email": "${user}", + "body": "$( echo -n ${message})"} +EOF + ) "https://api.pushbullet.com/v2/pushes" -X POST) + + if [ "${httpcode}" == "200" ] + then + info "sent pushbullet notification for: ${host} ${chart}.${name} is ${status} to '${user}'" + sent=$((sent + 1)) + else + error "failed to send pushbullet notification for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}." + fi + done + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} + +# ----------------------------------------------------------------------------- +# kafka sender + +send_kafka() { + local httpcode sent=0 + if [ "${SEND_KAFKA}" = "YES" ] + then + httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \ + --data "{host_ip:\"${KAFKA_SENDER_IP}\",when:${when},name:\"${name}\",chart:\"${chart}\",family:\"${family}\",status:\"${status}\",old_status:\"${old_status}\",value:${value},old_value:${old_value},duration:${duration},non_clear_duration:${non_clear_duration},units:\"${units}\",info:\"${info}\"}" \ + "${KAFKA_URL}") + + if [ "${httpcode}" == "204" ] + then + info "sent kafka data for: ${host} ${chart}.${name} is ${status} and ip '${KAFKA_SENDER_IP}'" + sent=$((sent + 1)) + else + error "failed to send kafka data for: ${host} ${chart}.${name} is ${status} and ip '${KAFKA_SENDER_IP}' with HTTP error code ${httpcode}." + fi + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} + +# ----------------------------------------------------------------------------- +# pagerduty.com sender + +send_pd() { + local recipients="${1}" sent=0 + unset t + case ${status} in + CLEAR) t='resolve';; + WARNING) t='trigger';; + CRITICAL) t='trigger';; + esac + + if [ ${SEND_PD} = "YES" -a ! -z "${t}" ] + then + for PD_SERVICE_KEY in ${recipients} + do + d="${status} ${name}=${value} ${units} - ${host}, ${family}" + ${pd_send} -k ${PD_SERVICE_KEY} \ + -t ${t} \ + -d "${d}" \ + -i ${alarm_id} \ + -f 'info'="${info}" \ + -f 'value_w_units'="${value} ${units}" \ + -f 'when'="${when}" \ + -f 'duration'="${duration}" \ + -f 'roles'="${roles}" \ + -f 'host'="${host}" \ + -f 'unique_id'="${unique_id}" \ + -f 'alarm_id'="${alarm_id}" \ + -f 'event_id'="${event_id}" \ + -f 'name'="${name}" \ + -f 'chart'="${chart}" \ + -f 'family'="${family}" \ + -f 'status'="${status}" \ + -f 'old_status'="${old_status}" \ + -f 'value'="${value}" \ + -f 'old_value'="${old_value}" \ + -f 'src'="${src}" \ + -f 'non_clear_duration'="${non_clear_duration}" \ + -f 'units'="${units}" + retval=$? + if [ ${retval} -eq 0 ] + then + info "sent pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}....: ${d}" + sent=$((sent + 1)) + else + error "failed to send pagerduty.com notification using service key ${PD_SERVICE_KEY::-26}.... (error code ${retval}): ${d}" + fi + done + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} + +# ----------------------------------------------------------------------------- +# twilio sender + +send_twilio() { + local accountsid="${1}" accounttoken="${2}" twilionumber="${3}" recipients="${4}" title="${5}" message="${6}" httpcode sent=0 user + if [ "${SEND_TWILIO}" = "YES" -a ! -z "${accountsid}" -a ! -z "${accounttoken}" -a ! -z "${twilionumber}" -a ! -z "${recipients}" -a ! -z "${message}" -a ! -z "${title}" ] + then + #https://www.twilio.com/packages/labs/code/bash/twilio-sms + for user in ${recipients} + do + httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \ + --data-urlencode "From=${twilionumber}" \ + --data-urlencode "To=${user}" \ + --data-urlencode "Body=${title} ${message}" \ + -u "${accountsid}:${accounttoken}" \ + "https://api.twilio.com/2010-04-01/Accounts/${accountsid}/Messages.json") + + if [ "${httpcode}" == "201" ] + then + info "sent Twilio SMS for: ${host} ${chart}.${name} is ${status} to '${user}'" + sent=$((sent + 1)) + else + error "failed to send Twilio SMS for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}." + fi + done + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} + + +# ----------------------------------------------------------------------------- +# hipchat sender + +send_hipchat() { + local authtoken="${1}" recipients="${2}" message="${3}" httpcode sent=0 room color sender msg_format notify + + if [ "${SEND_HIPCHAT}" = "YES" -a ! -z "${authtoken}" -a ! -z "${recipients}" -a ! -z "${message}" ] + then + + # A label to be shown in addition to the sender's name + # Valid length range: 0 - 64. + sender="netdata" + + # Valid values: html, text. + # Defaults to 'html'. + msg_format="text" + + # Background color for message. Valid values: yellow, green, red, purple, gray, random. Defaults to 'yellow'. + case "${status}" in + WARNING) color="yellow" ;; + CRITICAL) color="red" ;; + CLEAR) color="green" ;; + *) color="gray" ;; + esac + + # Whether this message should trigger a user notification (change the tab color, play a sound, notify mobile phones, etc). + # Each recipient's notification preferences are taken into account. + # Defaults to false. + notify="true" + + for room in ${recipients} + do + httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \ + -H "Content-type: application/json" \ + -H "Authorization: Bearer ${authtoken}" \ + -d "{\"color\": \"${color}\", \"from\": \"${netdata}\", \"message_format\": \"${msg_format}\", \"message\": \"${message}\", \"notify\": \"${notify}\"}" \ + "https://api.hipchat.com/v2/room/${room}/notification") + + if [ "${httpcode}" == "200" ] + then + info "sent HipChat notification for: ${host} ${chart}.${name} is ${status} to '${room}'" + sent=$((sent + 1)) + else + error "failed to send HipChat notification for: ${host} ${chart}.${name} is ${status} to '${room}' with HTTP error code ${httpcode}." + fi + done + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} + + +# ----------------------------------------------------------------------------- +# messagebird sender + +send_messagebird() { + local accesskey="${1}" messagebirdnumber="${2}" recipients="${3}" title="${4}" message="${5}" httpcode sent=0 user + if [ "${SEND_MESSAGEBIRD}" = "YES" -a ! -z "${accesskey}" -a ! -z "${messagebirdnumber}" -a ! -z "${recipients}" -a ! -z "${message}" -a ! -z "${title}" ] + then + #https://developers.messagebird.com/docs/messaging + for user in ${recipients} + do + httpcode=$(${curl} -X POST --write-out %{http_code} --silent --output /dev/null \ + --data-urlencode "originator=${messagebirdnumber}" \ + --data-urlencode "recipients=${user}" \ + --data-urlencode "body=${title} ${message}" \ + --data-urlencode "datacoding=auto" \ + -H "Authorization: AccessKey ${accesskey}" \ + "https://rest.messagebird.com/messages") + + if [ "${httpcode}" == "201" ] + then + info "sent Messagebird SMS for: ${host} ${chart}.${name} is ${status} to '${user}'" + sent=$((sent + 1)) + else + error "failed to send Messagebird SMS for: ${host} ${chart}.${name} is ${status} to '${user}' with HTTP error code ${httpcode}." + fi + done + + [ ${sent} -gt 0 ] && return 0 + fi + + return 1 +} # ----------------------------------------------------------------------------- # telegram sender @@ -447,18 +923,18 @@ send_telegram() { httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null ${disableNotification} \ --data-urlencode "parse_mode=HTML" \ --data-urlencode "disable_web_page_preview=true" \ - --data-urlencode "text=$message" \ - "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=$chatid") + --data-urlencode "text=${message}" \ + "https://api.telegram.org/bot${bottoken}/sendMessage?chat_id=${chatid}") if [ "${httpcode}" == "200" ] then - echo >&2 "${me}: Sent telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}'" + info "sent telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}'" sent=$((sent + 1)) elif [ "${httpcode}" == "401" ] then - echo >&2 "${me}: Failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}': Wrong bot token." + error "failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}': Wrong bot token." else - echo >&2 "${me}: Failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}' with HTTP error code ${httpcode}." + error "failed to send telegram notification for: ${host} ${chart}.${name} is ${status} to '${chatid}' with HTTP error code ${httpcode}." fi done @@ -477,10 +953,10 @@ send_slack() { [ "${SEND_SLACK}" != "YES" ] && return 1 case "${status}" in - WARNING) color="warning" ;; + WARNING) color="warning" ;; CRITICAL) color="danger" ;; - CLEAR) color="good" ;; - *) color="#777777" ;; + CLEAR) color="good" ;; + *) color="#777777" ;; esac for channel in ${channels} @@ -520,10 +996,10 @@ EOF httpcode=$(${curl} --write-out %{http_code} --silent --output /dev/null -X POST --data-urlencode "payload=${payload}" "${webhook}") if [ "${httpcode}" == "200" ] then - echo >&2 "${me}: Sent slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}'" + info "sent slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}'" sent=$((sent + 1)) else - echo >&2 "${me}: Failed to send slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}', with HTTP error code ${httpcode}." + error "failed to send slack notification for: ${host} ${chart}.${name} is ${status} to '${channel}', with HTTP error code ${httpcode}." fi done @@ -581,25 +1057,25 @@ case "${status}" in image="${images_base_url}/images/check-mark-2-128-green.png" status_message="recovered" color="#77ca6d" - - # don't show the value when the status is CLEAR - # for certain alarms, this value might not have any meaning - alarm="${name//_/ } ${raised_for}" ;; esac if [ "${status}" = "CLEAR" ] then severity="Recovered from ${old_status}" - if [ $non_clear_duration -gt $duration ] + if [ ${non_clear_duration} -gt ${duration} ] then raised_for="(alarm was raised for ${non_clear_duration_txt})" fi + # don't show the value when the status is CLEAR + # for certain alarms, this value might not have any meaning + alarm="${name//_/ } ${raised_for}" + elif [ "${old_status}" = "WARNING" -a "${status}" = "CRITICAL" ] then severity="Escalated to ${status}" - if [ $non_clear_duration -gt $duration ] + if [ ${non_clear_duration} -gt ${duration} ] then raised_for="(alarm is raised for ${non_clear_duration_txt})" fi @@ -607,7 +1083,7 @@ then elif [ "${old_status}" = "CRITICAL" -a "${status}" = "WARNING" ] then severity="Demoted to ${status}" - if [ $non_clear_duration -gt $duration ] + if [ ${non_clear_duration} -gt ${duration} ] then raised_for="(alarm is raised for ${non_clear_duration_txt})" fi @@ -648,20 +1124,81 @@ send_pushover "${PUSHOVER_APP_TOKEN}" "${to_pushover}" "${when}" "${goto_url}" " SENT_PUSHOVER=$? # ----------------------------------------------------------------------------- +# send the pushbullet notification + +send_pushbullet "${PUSHBULLET_ACCESS_TOKEN}" "${to_pushbullet}" "${host} ${status_message} - ${name//_/ } - ${chart}" "${alarm}\n +Severity: ${severity}\n +Chart: ${chart}\n +Family: ${family}\n +To View Netdata go to: ${goto_url}\n +The source of this alarm is line ${src}" + +SENT_PUSHBULLET=$? + +# ----------------------------------------------------------------------------- +# send the twilio SMS + +send_twilio "${TWILIO_ACCOUNT_SID}" "${TWILIO_ACCOUNT_TOKEN}" "${TWILIO_NUMBER}" "${to_twilio}" "${host} ${status_message} - ${name//_/ } - ${chart}" "${alarm} +Severity: ${severity} +Chart: ${chart} +Family: ${family} +${info}" + +SENT_TWILIO=$? + +# ----------------------------------------------------------------------------- +# send the messagebird SMS + +send_messagebird "${MESSAGEBIRD_ACCESS_KEY}" "${MESSAGEBIRD_NUMBER}" "${to_messagebird}" "${host} ${status_message} - ${name//_/ } - ${chart}" "${alarm} +Severity: ${severity} +Chart: ${chart} +Family: ${family} +${info}" + +SENT_MESSAGEBIRD=$? + + +# ----------------------------------------------------------------------------- # send the telegram.org message # https://core.telegram.org/bots/api#formatting-options -telegram_message="<b>${severity}" -[ "${status_message}" != "recovered" ] && telegram_message="${telegram_message}, ${status_message}" -telegram_message="${telegram_message} -${chart} (${family})</b> +send_telegram "${TELEGRAM_BOT_TOKEN}" "${to_telegram}" "${host} ${status_message} - <b>${name//_/ }</b> +${chart} (${family}) <a href=\"${goto_url}\">${alarm}</a> <i>${info}</i>" -send_telegram "${TELEGRAM_BOT_TOKEN}" "${to_telegram}" "${telegram_message}" - SENT_TELEGRAM=$? + +# ----------------------------------------------------------------------------- +# send the kafka message + +send_kafka +SENT_KAFKA=$? + + +# ----------------------------------------------------------------------------- +# send the pagerduty.com message + +send_pd "${to_pd}" +SENT_PD=$? + + +# ----------------------------------------------------------------------------- +# send hipchat message + +send_hipchat "${HIPCHAT_AUTH_TOKEN}" "${to_hipchat}" " +<b>${alarm}</b> ${info_html}<br/> +<small><b>${chart}</b><br/>Chart<br/> </small> +<small><b>${family}</b><br/>Family<br/> </small> +<small><b>${severity}</b><br/>Severity<br/> </small> +<small><b>${date}${raised_for_html}</b><br/>Time<br/> </small> +<a href=\"${goto_url}\">View Netdata</a><br/> +<small><small>The source of this alarm is line ${src}</small></small> +" + +SENT_HIPCHAT=$? + # ----------------------------------------------------------------------------- # send the email @@ -760,8 +1297,21 @@ SENT_EMAIL=$? # ----------------------------------------------------------------------------- # let netdata know -# we did send something -[ ${SENT_EMAIL} -eq 0 -o ${SENT_PUSHOVER} -eq 0 -o ${SENT_TELEGRAM} -eq 0 -o ${SENT_SLACK} -eq 0 ] && exit 0 +if [ ${SENT_EMAIL} -eq 0 \ + -o ${SENT_PUSHOVER} -eq 0 \ + -o ${SENT_TELEGRAM} -eq 0 \ + -o ${SENT_SLACK} -eq 0 \ + -o ${SENT_TWILIO} -eq 0 \ + -o ${SENT_HIPCHAT} -eq 0 \ + -o ${SENT_MESSAGEBIRD} -eq 0 \ + -o ${SENT_PUSHBULLET} -eq 0 \ + -o ${SENT_KAFKA} -eq 0 \ + -o ${SENT_PD} -eq 0 \ + ] + then + # we did send something + exit 0 +fi # we did not send anything exit 1 diff --git a/plugins.d/alarm-test.sh b/plugins.d/alarm-test.sh new file mode 100755 index 000000000..1963111a5 --- /dev/null +++ b/plugins.d/alarm-test.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# +# Script to test alarm notifications for netdata + +dir="$(dirname "${0}")" +${dir}/alarm-notify.sh test "${1}" +exit $? diff --git a/plugins.d/cgroup-name.sh b/plugins.d/cgroup-name.sh index 1c6f564b4..9bb3bcabb 100755 --- a/plugins.d/cgroup-name.sh +++ b/plugins.d/cgroup-name.sh @@ -1,17 +1,66 @@ #!/usr/bin/env bash +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# +# Script to find a better name for cgroups +# + export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" export LC_ALL=C +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + NETDATA_CONFIG_DIR="${NETDATA_CONFIG_DIR-/etc/netdata}" CONFIG="${NETDATA_CONFIG_DIR}/cgroups-names.conf" CGROUP="${1}" NAME= +# ----------------------------------------------------------------------------- + if [ -z "${CGROUP}" ] then - echo >&2 "${0}: called without a cgroup name. Nothing to do." - exit 1 + fatal "called without a cgroup name. Nothing to do." fi if [ -f "${CONFIG}" ] @@ -19,15 +68,15 @@ if [ -f "${CONFIG}" ] NAME="$(grep "^${CGROUP} " "${CONFIG}" | sed "s/[[:space:]]\+/ /g" | cut -d ' ' -f 2)" if [ -z "${NAME}" ] then - echo >&2 "${0}: cannot find cgroup '${CGROUP}' in '${CONFIG}'." + info "cannot find cgroup '${CGROUP}' in '${CONFIG}'." fi #else -# echo >&2 "${0}: configuration file '${CONFIG}' is not available." +# info "configuration file '${CONFIG}' is not available." fi function get_name_classic { local DOCKERID="$1" - echo >&2 "Running command: docker ps --filter=id=\"${DOCKERID}\" --format=\"{{.Names}}\"" + info "Running command: docker ps --filter=id=\"${DOCKERID}\" --format=\"{{.Names}}\"" NAME="$( docker ps --filter=id="${DOCKERID}" --format="{{.Names}}" )" return 0 } @@ -36,10 +85,10 @@ function get_name_api { local DOCKERID="$1" if [ ! -S "/var/run/docker.sock" ] then - echo >&2 "Can't find /var/run/docker.sock" + warning "Can't find /var/run/docker.sock" return 1 fi - echo >&2 "Running API command: /containers/${DOCKERID}/json" + info "Running API command: /containers/${DOCKERID}/json" JSON=$(echo -e "GET /containers/${DOCKERID}/json HTTP/1.0\r\n" | nc -U /var/run/docker.sock | egrep '^{.*') NAME=$(echo $JSON | jq -r .Name,.Config.Hostname | grep -v null | head -n1 | sed 's|^/||') return 0 @@ -62,10 +111,10 @@ if [ -z "${NAME}" ] fi if [ -z "${NAME}" ] then - echo >&2 "Cannot find the name of docker container '${DOCKERID}'" + warning "cannot find the name of docker container '${DOCKERID}'" NAME="${DOCKERID:0:12}" else - echo >&2 "Docker container '${DOCKERID}' is named '${NAME}'" + info "docker container '${DOCKERID}' is named '${NAME}'" fi fi fi @@ -74,5 +123,5 @@ if [ -z "${NAME}" ] [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" fi -echo >&2 "${0}: cgroup '${CGROUP}' is called '${NAME}'" +info "cgroup '${CGROUP}' is called '${NAME}'" echo "${NAME}" diff --git a/plugins.d/charts.d.plugin b/plugins.d/charts.d.plugin index df9998ece..00206f95f 100755 --- a/plugins.d/charts.d.plugin +++ b/plugins.d/charts.d.plugin @@ -1,36 +1,95 @@ #!/usr/bin/env bash +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# +# charts.d.plugin allows easy development of BASH plugins +# +# if you need to run parallel charts.d processes, link this file to a different name +# in the same directory, with a .plugin suffix and netdata will start both of them, +# each will have a different config file and modules configuration directory. +# + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" + PROGRAM_FILE="$0" PROGRAM_NAME="$(basename $0)" PROGRAM_NAME="${PROGRAM_NAME/.plugin}" +MODULE_NAME="main" -# if you need to run parallel charts.d processes -# just link this files with a different name -# in the same directory, with a .plugin suffix -# netdata will start multiple of them -# each will have a different config file +# ----------------------------------------------------------------------------- +# create temp dir -echo >&2 "$PROGRAM_NAME: started from '$PROGRAM_FILE' with options: $*" +debug=0 +TMP_DIR= +chartsd_cleanup() { + if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ] + then + [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..." + rm -rf "$TMP_DIR" + fi + exit 0 +} +trap chartsd_cleanup EXIT +trap chartsd_cleanup SIGHUP +trap chartsd_cleanup INT -if [ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] +if [ $UID = "0" ] then - echo >&2 - echo >&2 "$PROGRAM_NAME: ERROR" - echo >&2 "BASH version 4 or later is required." - echo >&2 "You are running version: ${BASH_VERSION}" - echo >&2 "Please upgrade." - echo >&2 - exit 1 + TMP_DIR="$( mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX )" +else + TMP_DIR="$( mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX )" fi +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + echo "DISABLE" + exit 1 +} + +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- # check a few commands + require_cmd() { - which "$1" >/dev/null - if [ $? -ne 0 ] + local x=$(which "${1}" 2>/dev/null || command -v "${1}" 2>/dev/null) + if [ -z "${x}" -o ! -x "${x}" ] then - echo >&2 "$PROGRAM_NAME: ERROR: Command '$1' is not found in the system path." + warning "command '${1}' is not found in ${PATH}." + eval "${1^^}_CMD=\"\"" return 1 fi + + eval "${1^^}_CMD=\"${x}\"" return 0 } @@ -43,9 +102,17 @@ require_cmd grep || exit 1 require_cmd egrep || exit 1 require_cmd mktemp || exit 1 require_cmd awk || exit 1 +require_cmd timeout || exit 1 +require_cmd curl || exit 1 + +# ----------------------------------------------------------------------------- + +[ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] && fatal "BASH version 4 or later is required, but found version: ${BASH_VERSION}. Please upgrade." + +info "started from '$PROGRAM_FILE' with options: $*" # ----------------------------------------------------------------------------- -# insternal defaults +# internal defaults # netdata exposes a few environment variables for us pluginsd="${NETDATA_PLUGINS_DIR}" @@ -97,7 +164,6 @@ enable_all_charts="yes" # ----------------------------------------------------------------------------- # parse parameters -debug=0 check=0 chart_only= while [ ! -z "$1" ] @@ -143,9 +209,7 @@ do continue fi - echo >&2 "Cannot understand parameter $1. Aborting." - echo "DISABLE" - exit 1 + fatal "Cannot understand parameter $1. Aborting." done @@ -173,17 +237,13 @@ mysleep="sleep" if [ -f "$myconfig" ] then . "$myconfig" - if [ $? -ne 0 ] - then - echo >&2 "$PROGRAM_NAME: cannot load $myconfig" - echo "DISABLE" - exit 1 - fi + [ $? -ne 0 ] && fatal "cannot load $myconfig" + time_divisor=$((time_divisor)) [ $time_divisor -lt 10 ] && time_divisor=10 [ $time_divisor -gt 100 ] && time_divisor=100 else - echo >&2 "$PROGRAM_NAME: configuration file '$myconfig' not found. Using defaults." + info "configuration file '$myconfig' not found. Using defaults." fi # we check for the timeout command, after we load our @@ -204,12 +264,7 @@ update_every=$(( update_every + 1 - 1)) # makes sure it is a number test $update_every -eq 0 && update_every=1 # if it is zero, make it 1 # check the charts.d directory -if [ ! -d "$chartsd" ] - then - echo >&2 "$PROGRAM_NAME: cannot find charts directory '$chartsd'" - echo "DISABLE" -fi - +[ ! -d "$chartsd" ] && fatal "cannot find charts directory '$chartsd'" # ----------------------------------------------------------------------------- # library functions @@ -221,6 +276,35 @@ fixid() { tr "[A-Z]" "[a-z]" } +run() { + local ret pid="${BASHPID}" t + + if [ "z${1}" = "z-t" -a "${2}" != "0" ] + then + t="${2}" + shift 2 + timeout ${t} "${@}" 2>"${TMP_DIR}/run.${pid}" + ret=$? + else + "${@}" 2>"${TMP_DIR}/run.${pid}" + ret=$? + fi + + if [ ${ret} -ne 0 ] + then + { + printf "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: command '" + printf "%q " "${@}" + printf "' failed:\n --- BEGIN TRACE ---\n" + cat "${TMP_DIR}/run.${pid}" + printf " --- END TRACE ---\n" + } >&2 + fi + rm "${TMP_DIR}/run.${pid}" + + return ${ret} +} + # convert any floating point number # to integer, give a multiplier # the result is stored in ${FLOAT2INT_RESULT} @@ -230,8 +314,6 @@ float2int() { local f m="$2" a b l v=($1) f=${v[0]} - # echo >&2 "value='${1}' f='${f}', m='${m}'" - # the length of the multiplier - 1 l=$(( ${#m} - 1 )) @@ -277,7 +359,6 @@ float2int() { # store the result FLOAT2INT_RESULT=$(( (a * m) + b )) - #echo >&2 "FLOAT2INT_RESULT='${FLOAT2INT_RESULT}'" } @@ -286,7 +367,7 @@ float2int() { all_charts() { cd "$chartsd" - [ $? -ne 0 ] && echo >&2 "$PROGRAM_NAME: Cannot cd to $chartsd" && return 1 + [ $? -ne 0 ] && error "cannot cd to $chartsd" && return 1 ls *.chart.sh | sed "s/\.chart\.sh$//g" } @@ -316,6 +397,8 @@ all_enabled_charts() { for chart in $( all_charts ) do + MODULE_NAME="${chart}" + eval "enabled=\$$chart" if [ -z "${enabled}" ] then @@ -327,35 +410,38 @@ all_enabled_charts() { if [ ! "${enabled}" = "${required}" ] then - echo >&2 "$PROGRAM_NAME: '$chart' is NOT enabled. Add a line with $chart=$required in $myconfig to enable it (or remove the line that disables it)." + info "is disabled. Add a line with $chart=$required in $myconfig to enable it (or remove the line that disables it)." else - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: '$chart' is enabled." + debug "is enabled for auto-detection." local charts="$charts $chart" fi done + MODULE_NAME="main" local charts2= for chart in $charts do + MODULE_NAME="${chart}" + # check the enabled charts local check="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_check()" )" if [ -z "$check" ] then - echo >&2 "$PROGRAM_NAME: chart '$chart' does not seem to have a $chart$charts_check() function. Disabling it." + error "module '$chart' does not seem to have a $chart$charts_check() function. Disabling it." continue fi local create="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_create()" )" if [ -z "$create" ] then - echo >&2 "$PROGRAM_NAME: chart '$chart' does not seem to have a $chart$charts_create() function. Disabling it." + error "module '$chart' does not seem to have a $chart$charts_create() function. Disabling it." continue fi local update="$( cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_update()" )" if [ -z "$update" ] then - echo >&2 "$PROGRAM_NAME: chart '$chart' does not seem to have a $chart$charts_update() function. Disabling it." + error "module '$chart' does not seem to have a $chart$charts_update() function. Disabling it." continue fi @@ -364,7 +450,7 @@ all_enabled_charts() { #then # if [ ! -z "$( cat "$confd/$chart.conf" | sed "s/^ \+//g" | grep -v "^$" | grep -v "^#" | grep -v "^$chart$charts_undescore" )" ] # then - # echo >&2 "$PROGRAM_NAME: chart's $chart config $confd/$chart.conf should only have lines starting with $chart$charts_undescore . Disabling it." + # error "module's $chart config $confd/$chart.conf should only have lines starting with $chart$charts_undescore . Disabling it." # continue # fi #fi @@ -374,19 +460,19 @@ all_enabled_charts() { # "$pluginsd/charts.d.dryrun-helper.sh" "$chart" "$chartsd/$chart.chart.sh" "$confd/$chart.conf" >/dev/null # if [ $? -ne 0 ] # then - # echo >&2 "$PROGRAM_NAME: chart's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it." + # error "module's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it." # continue # fi #fi local charts2="$charts2 $chart" done + MODULE_NAME="main" echo $charts2 - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: enabled charts: $charts2" + debug "enabled charts: $charts2" } - # ----------------------------------------------------------------------------- # load the charts @@ -394,19 +480,22 @@ suffix_update_every="_update_every" active_charts= for chart in $( all_enabled_charts ) do - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: loading chart: '$chartsd/$chart.chart.sh'" + MODULE_NAME="${chart}" + + debug "loading module: '$chartsd/$chart.chart.sh'" + . "$chartsd/$chart.chart.sh" - if [ -f "$confd/charts.d/$chart.conf" ] + if [ -f "$confd/$PROGRAM_NAME/$chart.conf" ] then - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: loading chart options: '$confd/charts.d/$chart.conf'" - . "$confd/charts.d/$chart.conf" + debug "loading module configuration: '$confd/$PROGRAM_NAME/$chart.conf'" + . "$confd/$PROGRAM_NAME/$chart.conf" elif [ -f "$confd/$chart.conf" ] then - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: loading chart options: '$confd/$chart.conf'" + debug "loading module configuration: '$confd/$chart.conf'" . "$confd/$chart.conf" else - echo >&2 "$PROGRAM_NAME: $chart: configuration file '$confd/charts.d/$chart.conf' not found. Using defaults." + warning "configuration file '$confd/$PROGRAM_NAME/$chart.conf' not found. Using defaults." fi eval "dt=\$$chart$suffix_update_every" @@ -419,13 +508,14 @@ do $chart$charts_check if [ $? -eq 0 ] then - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: '$chart' activated" + debug "module '$chart' activated" active_charts="$active_charts $chart" else - echo >&2 "$PROGRAM_NAME: chart '$chart' check() function reports failure." + error "module's '$chart' check() function reports failure." fi done -[ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: activated charts: $active_charts" +MODULE_NAME="main" +debug "activated modules: $active_charts" # ----------------------------------------------------------------------------- @@ -438,7 +528,7 @@ test $debug -eq 1 && debug_time=tellwork # if we only need a specific chart, remove all the others if [ ! -z "${chart_only}" ] then - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: requested to run only for: '${chart_only}'" + debug "requested to run only for: '${chart_only}'" check_charts= for chart in $active_charts do @@ -450,41 +540,19 @@ then done active_charts="$check_charts" fi -[ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: activated charts: $active_charts" +debug "activated charts: $active_charts" # stop if we just need a pre-check if [ $check -eq 1 ] then - echo >&2 "CHECK RESULT" - echo >&2 "Will run the charts: $active_charts" + info "CHECK RESULT" + info "Will run the charts: $active_charts" exit 0 fi # ----------------------------------------------------------------------------- -# create temp dir - -TMP_DIR= -chartsd_cleanup() { - cd /tmp - if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ] - then - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..." - rm -rf "$TMP_DIR" - fi - exit 0 -} -trap chartsd_cleanup EXIT -trap chartsd_cleanup SIGHUP -trap chartsd_cleanup INT -if [ $UID = "0" ] -then - TMP_DIR="$( mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX )" -else - TMP_DIR="$( mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX )" -fi - -cd "$TMP_DIR" || exit 1 +cd "${TMP_DIR}" || exit 1 # ----------------------------------------------------------------------------- # create charts @@ -492,28 +560,26 @@ cd "$TMP_DIR" || exit 1 run_charts= for chart in $active_charts do - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: Calling '$chart$charts_create()'..." + MODULE_NAME="${chart}" + + debug "calling '$chart$charts_create()'..." $chart$charts_create if [ $? -eq 0 ] then run_charts="$run_charts $chart" - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: '$chart' has initialized." + debug "'$chart' initialized." else - echo >&2 "$PROGRAM_NAME: chart '$chart' function '$chart$charts_create()' reports failure." + error "module's '$chart' function '$chart$charts_create()' reports failure." fi done -[ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: run_charts='$run_charts'" +MODULE_NAME="main" +debug "run_charts='$run_charts'" # ----------------------------------------------------------------------------- # update dimensions -if [ -z "$run_charts" ] - then - echo >&2 "$PROGRAM_NAME: No charts to collect data from." - echo "DISABLE" - exit 1 -fi +[ -z "$run_charts" ] && fatal "No charts to collect data from." declare -A charts_last_update=() charts_update_every=() charts_next_update=() charts_run_counter=() charts_serial_failures=() global_update() { @@ -552,12 +618,12 @@ global_update() { for chart in "${now_charts[@]}" do - #echo >&2 " DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}" + MODULE_NAME="${chart}" + if [ ${now_ms} -ge ${charts_next_update[$chart]} ] then last_ms=${charts_last_update[$chart]} dt=$(( (now_ms - last_ms) )) - #echo >&2 " DEBUG: chart: $chart last: ${charts_last_update[$chart]}, next: ${charts_next_update[$chart]}, now: ${now_ms}, dt: ${dt}" charts_last_update[$chart]=${now_ms} @@ -576,7 +642,6 @@ global_update() { fi exec_start_ms=$now_ms - #echo >&2 " EXEC: $chart$charts_update $dt" $chart$charts_update $dt ret=$? @@ -596,9 +661,9 @@ global_update() { if [ ${charts_serial_failures[$chart]} -gt 10 ] then - echo >&2 "$PROGRAM_NAME: chart '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it." + error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it." else - echo >&2 "$PROGRAM_NAME: chart '$chart' update() function reports failure. Will keep trying for a while." + error "module's '$chart' update() function reports failure. Will keep trying for a while." next_charts+=($chart) fi fi @@ -606,6 +671,7 @@ global_update() { next_charts+=($chart) fi done + MODULE_NAME="${chart}" # wait the time you are required to next_ms=$((now_ms + (update_every * 1000 * 100) )) @@ -625,18 +691,17 @@ global_update() { millis="0${millis}" fi - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: sleeping for ${seconds}.${millis} seconds." + debug "sleeping for ${seconds}.${millis} seconds." ${mysleep} ${seconds}.${millis} else - [ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: sleeping for ${update_every} seconds." + debug "sleeping for ${update_every} seconds." ${mysleep} $update_every fi test ${now_ms} -ge ${exit_at} && exit 0 done - echo >&2 "$PROGRAM_NAME: Nothing left to do. Disabling charts.d.plugin." - echo "DISABLE" + fatal "nothing left to do, exiting..." } global_update diff --git a/plugins.d/fping.plugin b/plugins.d/fping.plugin new file mode 100755 index 000000000..d523f4474 --- /dev/null +++ b/plugins.d/fping.plugin @@ -0,0 +1,178 @@ +#!/usr/bin/env bash + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# +# This plugin requires a latest version of fping. +# You can compile it from source, by running me with option: install + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +if [ "${1}" = "install" ] + then + [ "${UID}" != 0 ] && echo >&2 "Please run me as root. This will install a single binary file: /usr/local/bin/fping." && exit 1 + + run() { + printf >&2 " > " + printf >&2 "%q " "${@}" + printf >&2 "\n" + "${@}" || exit 1 + } + + [ ! -d /usr/src ] && run mkdir -p /usr/src + [ ! -d /usr/local/bin ] && run mkdir -p /usr/local/bin + + run cd /usr/src + + if [ -d fping-ktsaou.git ] + then + run cd fping-ktsaou.git + run git pull + else + run git clone https://github.com/ktsaou/fping.git fping-ktsaou.git + run cd fping-ktsaou.git + fi + + run ./autogen.sh + run ./configure --prefix=/usr/local + run make clean + run make + if [ -f /usr/local/bin/fping ] + then + run mv -f /usr/local/bin/fping /usr/local/bin/fping.old + fi + run mv src/fping /usr/local/bin/fping + run chown root:root /usr/local/bin/fping + run chmod 4755 /usr/local/bin/fping + echo >&2 + echo >&2 "All done, you have a compatible fping now at /usr/local/bin/fping." + echo >&2 + + fping="$(which fping 2>/dev/null || command -v fping 2>/dev/null)" + if [ "${fping}" != "/usr/local/bin/fping" ] + then + echo >&2 "You have another fping installed at: ${fping}." + echo >&2 "Please set:" + echo >&2 + echo >&2 " fping=\"/usr/local/bin/fping\"" + echo >&2 + echo >&2 "at /etc/netdata/fping.conf" + echo >&2 + fi + exit 0 +fi + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + echo "DISABLE" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + +# store in ${plugin} the name we run under +# this allows us to copy/link fping.plugin under a different name +# to have multiple fping plugins running with different settings +plugin="${PROGRAM_NAME/.plugin/}" + + +# ----------------------------------------------------------------------------- + +# the frequency to send info to netdata +# passed by netdata as the first parameter +update_every="${1-1}" + +# the netdata configuration directory +# passed by netdata as an environment variable +NETDATA_CONFIG_DIR="${NETDATA_CONFIG_DIR-/etc/netdata}" + +# ----------------------------------------------------------------------------- +# configuration options +# can be overwritten at /etc/netdata/fping.conf + +# the fping binary to use +# we need one that can output netdata friendly info (supporting: -N) +# if you have multiple versions, put here the full filename of the right one +fping="$( which fping 2>/dev/null || command -v fping 2>/dev/null )" + +# a space separated list of hosts to fping +# we suggest to put names here and the IPs of these names in /etc/hosts +hosts="" + +# the time in milliseconds (1 sec = 1000 ms) +# to ping the hosts - by default 5 pings per host per iteration +ping_every="$((update_every * 1000 / 5))" + +# fping options +fping_opts="-R -b 56 -i 1 -r 0 -t 5000" + +# ----------------------------------------------------------------------------- +# load the configuration file + +if [ ! -f "${NETDATA_CONFIG_DIR}/${plugin}.conf" ] +then + fatal "configuration file '${NETDATA_CONFIG_DIR}/${plugin}.conf' not found - nothing to do." +fi + +source "${NETDATA_CONFIG_DIR}/${plugin}.conf" + +if [ -z "${hosts}" ] +then + fatal "no hosts configued in '${NETDATA_CONFIG_DIR}/${plugin}.conf' - nothing to do." +fi + +if [ -z "${fping}" -o ! -x "${fping}" ] +then + fatal "command '${fping}' is not found or is not executable - cannot proceed." +fi + +if [ ${ping_every} -lt 20 ] + then + warning "ping every was set to ${ping_every} but 20 is the minimum for non-root users. Setting it to 20 ms." + ping_every=20 +fi + +# the fping options we will use +options=( -N -l -Q ${update_every} -p ${ping_every} ${fping_opts} ${hosts} ) + +# execute fping +exec "${fping}" "${options[@]}" + +# if we cannot execute fping, stop +fatal "command '${fping} ${options[@]}' failed to be executed." diff --git a/plugins.d/loopsleepms.sh.inc b/plugins.d/loopsleepms.sh.inc index 6de93043c..ef3db192d 100644 --- a/plugins.d/loopsleepms.sh.inc +++ b/plugins.d/loopsleepms.sh.inc @@ -1,6 +1,6 @@ # no need for shebang - this file is included from other scripts -LOOPSLEEP_DATE="$(which date)" +LOOPSLEEP_DATE="$(which date 2>/dev/null || command -v date 2>/dev/null)" if [ -z "$LOOPSLEEP_DATE" ] then echo >&2 "$0: ERROR: Cannot find the command 'date' in the system path." @@ -139,7 +139,7 @@ loopsleepms() { # calculate ms since last run [ ${LOOPSLEEPMS_LASTRUN} -gt 0 ] && \ - LOOPSLEEPMS_LASTWORK=$((now_ms - LOOPSLEEPMS_LASTRUN - LOOPSLEEPMS_LASTSLEEP)) + LOOPSLEEPMS_LASTWORK=$((now_ms - LOOPSLEEPMS_LASTRUN - LOOPSLEEPMS_LASTSLEEP + current_time_ms_accuracy)) # echo "# last loop's work took $LOOPSLEEPMS_LASTWORK ms" # remember this run diff --git a/plugins.d/node.d.plugin b/plugins.d/node.d.plugin index 21b04384e..8b7047fcb 100755 --- a/plugins.d/node.d.plugin +++ b/plugins.d/node.d.plugin @@ -8,6 +8,11 @@ // Then, the second line, finds nodejs or node or js in the system path // and executes it with the shell parameters. +// netdata +// real-time performance and health monitoring, done right! +// (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +// GPL v3+ + // -------------------------------------------------------------------------------------------------------------------- 'use strict'; @@ -30,6 +35,7 @@ process.mainModule.paths.unshift(NODE_D_DIR); var fs = require('fs'); var url = require('url'); +var util = require('util'); var http = require('http'); var path = require('path'); var extend = require('extend'); @@ -61,8 +67,6 @@ extend(true, netdata.options, { update_every: NETDATA_UPDATE_EVERY, - exit_after_ms: 3600 * 4 * 1000, - paths: { plugins: NETDATA_PLUGINS_DIR, config: NETDATA_CONFIG_DIR, @@ -79,8 +83,15 @@ netdata.options.config_filename = pluginConfig(__filename); try { netdata.options_loaded = JSON.parse(fs.readFileSync(netdata.options.config_filename, 'utf8')); extend(true, netdata.options, netdata.options_loaded); - console.error('merged netdata object:'); - console.error(netdata); + + if(!netdata.options.paths.plugins) + netdata.options.paths.plugins = NETDATA_PLUGINS_DIR; + + if(!netdata.options.paths.config) + netdata.options.paths.config = NETDATA_CONFIG_DIR; + + // console.error('merged netdata object:'); + // console.error(util.inspect(netdata, {depth: 10})); } catch(e) { netdata.error('Cannot read configuration file ' + netdata.options.config_filename + ': ' + e.message + ', using internal defaults.'); diff --git a/plugins.d/python.d.plugin b/plugins.d/python.d.plugin index 5e81fb263..b4e6473a6 100755 --- a/plugins.d/python.d.plugin +++ b/plugins.d/python.d.plugin @@ -9,6 +9,7 @@ import os import sys import time import threading +from re import sub # ----------------------------------------------------------------------------- # globals & environment setup @@ -28,6 +29,7 @@ sys.path.append(MODULES_DIR + "python_modules") PROGRAM = os.path.basename(__file__).replace(".plugin", "") DEBUG_FLAG = False +TRACE_FLAG = False OVERRIDE_UPDATE_EVERY = False # ----------------------------------------------------------------------------- @@ -292,8 +294,8 @@ class PythonCharts(object): if job.name is not None and len(job.name) != 0: prefix += "/" + job.name try: + msg.error("DISABLED:", prefix) self.jobs.remove(job) - msg.info("Disabled", prefix) except Exception as e: msg.debug("This shouldn't happen. NO " + prefix + " IN LIST:" + str(self.jobs) + " ERROR: " + str(e)) @@ -332,21 +334,21 @@ class PythonCharts(object): job = self.jobs[i] try: if not job.check(): - msg.error(job.chart_name, "check function failed.") + msg.error(job.chart_name, "check() failed - disabling job") self._stop(job) else: - msg.debug(job.chart_name, "check succeeded") + msg.info("CHECKED OK:", job.chart_name) i += 1 try: if job.override_name is not None: - new_name = job.__module__ + '_' + job.override_name + new_name = job.__module__ + '_' + sub(r'\s+', '_', job.override_name) if new_name in overridden: - msg.error(job.override_name + " already exists. Stopping '" + job.name + "'") + msg.info("DROPPED:", job.name, ", job '" + job.override_name + "' is already served by another job.") self._stop(job) i -= 1 else: job.name = job.override_name - msg.debug(job.chart_name + " changing chart name to: '" + new_name + "'") + msg.info("RENAMED:", new_name, ", from " + job.chart_name) job.chart_name = new_name overridden.append(job.chart_name) except Exception: @@ -435,7 +437,7 @@ def parse_cmdline(directory, *commands): :param commands: list of str :return: dict """ - global DEBUG_FLAG + global DEBUG_FLAG, TRACE_FLAG global OVERRIDE_UPDATE_EVERY global BASE_CONFIG @@ -447,6 +449,8 @@ def parse_cmdline(directory, *commands): elif cmd == "debug" or cmd == "all": DEBUG_FLAG = True # redirect stderr to stdout? + elif cmd == "trace" or cmd == "all": + TRACE_FLAG = True elif os.path.isfile(directory + cmd + ".chart.py") or os.path.isfile(directory + cmd): #DEBUG_FLAG = True mods.append(cmd.replace(".chart.py", "")) @@ -470,14 +474,14 @@ def run(): """ Main program. """ - global DEBUG_FLAG, BASE_CONFIG + global DEBUG_FLAG, TRACE_FLAG, BASE_CONFIG # read configuration file disabled = [] configfile = CONFIG_DIR + "python.d.conf" msg.PROGRAM = PROGRAM msg.info("reading configuration file:", configfile) - log_counter = 200 + log_throttle = 200 log_interval = 3600 conf = read_config(configfile) @@ -488,23 +492,33 @@ def run(): msg.fatal('disabled in configuration file.\n') except (KeyError, TypeError): pass + try: for param in BASE_CONFIG: BASE_CONFIG[param] = conf[param] except (KeyError, TypeError): pass # use default update_every from NETDATA_UPDATE_EVERY + try: DEBUG_FLAG = conf['debug'] except (KeyError, TypeError): pass + try: - log_counter = conf['logs_per_interval'] + TRACE_FLAG = conf['trace'] except (KeyError, TypeError): pass + + try: + log_throttle = conf['logs_per_interval'] + except (KeyError, TypeError): + pass + try: log_interval = conf['log_interval'] except (KeyError, TypeError): pass + for k, v in conf.items(): if k in ("update_every", "debug", "enabled"): continue @@ -514,8 +528,11 @@ def run(): # parse passed command line arguments modules = parse_cmdline(MODULES_DIR, *sys.argv) msg.DEBUG_FLAG = DEBUG_FLAG - msg.LOG_COUNTER = log_counter + msg.TRACE_FLAG = TRACE_FLAG + msg.LOG_THROTTLE = log_throttle msg.LOG_INTERVAL = log_interval + msg.LOG_COUNTER = 0 + msg.LOG_NEXT_CHECK = 0 msg.info("MODULES_DIR='" + MODULES_DIR + "', CONFIG_DIR='" + CONFIG_DIR + "', UPDATE_EVERY=" + str(BASE_CONFIG['update_every']) + diff --git a/plugins.d/tc-qos-helper.sh b/plugins.d/tc-qos-helper.sh index 9caef85f7..e9253c8f2 100755 --- a/plugins.d/tc-qos-helper.sh +++ b/plugins.d/tc-qos-helper.sh @@ -1,16 +1,64 @@ #!/usr/bin/env bash +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# +# This script is a helper to allow netdata collect tc data. +# tc output parsing has been implemented in C, inside netdata +# This script allows setting names to dimensions. + export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C PROGRAM_FILE="$0" PROGRAM_NAME="$(basename $0)" PROGRAM_NAME="${PROGRAM_NAME/.plugin}" +# ----------------------------------------------------------------------------- + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + plugins_dir="${NETDATA_PLUGINS_DIR}" [ -z "$plugins_dir" ] && plugins_dir="$( dirname $PROGRAM_FILE )" config_dir=${NETDATA_CONFIG_DIR-/etc/netdata} -tc="$(which tc 2>/dev/null)" +tc="$(which tc 2>/dev/null || command -v tc 2>/dev/null)" fireqos_run_dir="/var/run/fireqos" qos_get_class_names_every=120 qos_exit_every=3600 @@ -39,25 +87,37 @@ loopsleepms() { if [ -z "${tc}" -o ! -x "${tc}" ] then - echo >&2 "${PROGRAM_NAME}: Cannot find command 'tc' in this system." - exit 1 + fatal "cannot find command 'tc' in this system." fi -devices= +tc_devices= fix_names= setclassname() { echo "SETCLASSNAME $3 $2" } -show_tc() { - local x="${1}" interface_dev interface_classes interface_classes_monitor +show_tc_cls() { + local x="${1}" - echo "BEGIN ${x}" - ${tc} -s class show dev ${x} + if [ -f /etc/iproute2/tc_cls ] + then + local classid name rest + while read classid name rest + do + [ -z "${classid}" -o -z "${name}" -o "${classid}" = "#" -o "${name}" = "#" -o "${classid:0:1}" = "#" -o "${name:0:1}" = "#" ] && continue + setclassname "" "${name}" "${classid}" + done </etc/iproute2/tc_cls + return 0 + fi - # check FireQOS names for classes - if [ ! -z "${fix_names}" -a -f "${fireqos_run_dir}/ifaces/${x}" ] + return 1 +} + +show_fireqos_names() { + local x="${1}" name n interface_dev interface_classes interface_classes_monitor + + if [ -f "${fireqos_run_dir}/ifaces/${x}" ] then name="$(<"${fireqos_run_dir}/ifaces/${x}")" echo "SETDEVICENAME ${name}" @@ -71,15 +131,50 @@ show_tc() { setclassname ${n//|/ } done [ ! -z "${interface_dev}" ] && echo "SETDEVICEGROUP ${interface_dev}" + + return 0 fi + + return 1 +} + +show_tc() { + local x="${1}" + + echo "BEGIN ${x}" + + # netdata can parse the output of tc + ${tc} -s class show dev ${x} + + # check FireQOS names for classes + if [ ! -z "${fix_names}" ] + then + show_fireqos_names "${x}" || show_tc_cls "${x}" + fi + echo "END ${x}" } -all_devices() { - cat /proc/net/dev | grep ":" | cut -d ':' -f 1 | while read dev +find_tc_devices() { + local count=0 devs= dev rest l + + # find all the devices in the system + # without forking + while IFS=":| " read dev rest + do + count=$((count + 1)) + [ ${count} -le 2 ] && continue + devs="${devs} ${dev}" + done </proc/net/dev + + # from all the devices find the ones + # that have QoS defined + # unfortunately, one fork per device cannot be avoided + tc_devices= + for dev in ${devs} do - l=$(${tc} class show dev ${dev} | wc -l) - [ $l -ne 0 ] && echo ${dev} + l="$(${tc} class show dev ${dev} 2>/dev/null)" + [ ! -z "${l}" ] && tc_devices="${tc_devices} ${dev}" done } @@ -103,10 +198,10 @@ do then c=1 fix_names="YES" - devices="$( all_devices )" + find_tc_devices fi - for d in ${devices} + for d in ${tc_devices} do show_tc ${d} done |