diff options
Diffstat (limited to 'ctdb/config/events/legacy/05.system.script')
-rwxr-xr-x | ctdb/config/events/legacy/05.system.script | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/ctdb/config/events/legacy/05.system.script b/ctdb/config/events/legacy/05.system.script new file mode 100755 index 0000000..56a07c7 --- /dev/null +++ b/ctdb/config/events/legacy/05.system.script @@ -0,0 +1,196 @@ +#!/bin/sh +# ctdb event script for checking local file system utilization + +[ -n "$CTDB_BASE" ] || + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +ctdb_setup_state_dir "service" "system-monitoring" + +validate_percentage() +{ + case "$1" in + "") return 1 ;; # A failure that doesn't need a warning + [0-9] | [0-9][0-9] | 100) return 0 ;; + *) + echo "WARNING: ${1} is an invalid percentage in \"${2}\" check" + return 1 + ;; + esac +} + +check_thresholds() +{ + _thing="$1" + _thresholds="$2" + _usage="$3" + _unhealthy_callout="$4" + + case "$_thresholds" in + *:*) + _warn_threshold="${_thresholds%:*}" + _unhealthy_threshold="${_thresholds#*:}" + ;; + *) + _warn_threshold="$_thresholds" + _unhealthy_threshold="" + ;; + esac + + _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g') + # script_state_dir set by ctdb_setup_state_dir() + # shellcheck disable=SC2154 + _cache="${script_state_dir}/cache_${_t}" + if [ -r "$_cache" ]; then + read -r _prev <"$_cache" + else + _prev=0 + fi + if validate_percentage "$_unhealthy_threshold" "$_thing"; then + if [ "$_usage" -ge "$_unhealthy_threshold" ]; then + printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_unhealthy_threshold" + # Only run unhealthy callout if passing the + # unhealthy threshold. That is, if the + # previous usage was below the threshold. + if [ "$_prev" -lt "$_unhealthy_threshold" ]; then + eval "$_unhealthy_callout" + fi + echo "$_usage" >"$_cache" + exit 1 + fi + fi + + if validate_percentage "$_warn_threshold" "$_thing"; then + if [ "$_usage" -ge "$_warn_threshold" ]; then + if [ "$_usage" = "$_prev" ]; then + return + fi + printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_warn_threshold" + echo "$_usage" >"$_cache" + else + if [ ! -r "$_cache" ]; then + return + fi + printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_warn_threshold" + rm -f "$_cache" + fi + fi +} + +set_monitor_filsystem_usage_defaults() +{ + _fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults" + + if [ ! -r "$_fs_defaults_cache" ]; then + # Determine filesystem for each database directory, generate + # an entry to warn at 90%, de-duplicate entries, put all items + # on 1 line (so the read below gets everything) + for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \ + "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \ + "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do + df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }' + done | sort -u | xargs >"$_fs_defaults_cache" + fi + + read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache" +} + +monitor_filesystem_usage() +{ + if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then + set_monitor_filsystem_usage_defaults + fi + + # Check each specified filesystem, specified in format + # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold] + for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do + _fs_mount="${_fs%%:*}" + _fs_thresholds="${_fs#*:}" + + if [ ! -d "$_fs_mount" ]; then + echo "WARNING: Directory ${_fs_mount} does not exist" + continue + fi + + # Get current utilization + _fs_usage=$(df -kP "$_fs_mount" | + sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p') + if [ -z "$_fs_usage" ]; then + printf 'WARNING: Unable to get FS utilization for %s\n' \ + "$_fs_mount" + continue + fi + + check_thresholds "Filesystem ${_fs_mount}" \ + "$_fs_thresholds" \ + "$_fs_usage" + done +} + +dump_memory_info() +{ + get_proc "meminfo" + ps auxfww + set_proc "sysrq-trigger" "m" +} + +monitor_memory_usage() +{ + # Defaults + if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then + CTDB_MONITOR_MEMORY_USAGE=80 + fi + + _meminfo=$(get_proc "meminfo") + # Intentional word splitting here + # shellcheck disable=SC2046 + set -- $(echo "$_meminfo" | awk ' +$1 == "MemAvailable:" { memavail += $2 } +$1 == "MemFree:" { memfree += $2 } +$1 == "Cached:" { memfree += $2 } +$1 == "Buffers:" { memfree += $2 } +$1 == "MemTotal:" { memtotal = $2 } +$1 == "SwapFree:" { swapfree = $2 } +$1 == "SwapTotal:" { swaptotal = $2 } +END { + if (memavail != 0) { memfree = memavail ; } + if (memtotal + swaptotal != 0) { + usedtotal = memtotal - memfree + swaptotal - swapfree + print int(usedtotal / (memtotal + swaptotal) * 100) + } else { + print 0 + } +}') + _mem_usage="$1" + + check_thresholds "System memory" \ + "$CTDB_MONITOR_MEMORY_USAGE" \ + "$_mem_usage" \ + dump_memory_info +} + +case "$1" in +monitor) + # Load/cache database options from configuration file + ctdb_get_db_options + + rc=0 + monitor_filesystem_usage || rc=$? + monitor_memory_usage || rc=$? + exit $rc + ;; +esac + +exit 0 |