#!/bin/sh # ctdb event script for checking local file system utilization [ -n "$CTDB_BASE" ] || CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") . "${CTDB_BASE}/functions" load_script_options ctdb_setup_state_dir "service" "system-monitoring" validate_percentage() { case "$1" in "") return 1 ;; # A failure that doesn't need a warning [0-9] | [0-9][0-9] | 100) return 0 ;; *) echo "WARNING: ${1} is an invalid percentage in \"${2}\" check" return 1 ;; esac } check_thresholds() { _thing="$1" _thresholds="$2" _usage="$3" _unhealthy_callout="$4" case "$_thresholds" in *:*) _warn_threshold="${_thresholds%:*}" _unhealthy_threshold="${_thresholds#*:}" ;; *) _warn_threshold="$_thresholds" _unhealthy_threshold="" ;; esac _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g') # script_state_dir set by ctdb_setup_state_dir() # shellcheck disable=SC2154 _cache="${script_state_dir}/cache_${_t}" if [ -r "$_cache" ]; then read -r _prev <"$_cache" else _prev=0 fi if validate_percentage "$_unhealthy_threshold" "$_thing"; then if [ "$_usage" -ge "$_unhealthy_threshold" ]; then printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \ "$_thing" \ "$_usage" \ "$_unhealthy_threshold" # Only run unhealthy callout if passing the # unhealthy threshold. That is, if the # previous usage was below the threshold. if [ "$_prev" -lt "$_unhealthy_threshold" ]; then eval "$_unhealthy_callout" fi echo "$_usage" >"$_cache" exit 1 fi fi if validate_percentage "$_warn_threshold" "$_thing"; then if [ "$_usage" -ge "$_warn_threshold" ]; then if [ "$_usage" = "$_prev" ]; then return fi printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \ "$_thing" \ "$_usage" \ "$_warn_threshold" echo "$_usage" >"$_cache" else if [ ! -r "$_cache" ]; then return fi printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \ "$_thing" \ "$_usage" \ "$_warn_threshold" rm -f "$_cache" fi fi } set_monitor_filsystem_usage_defaults() { _fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults" if [ ! -r "$_fs_defaults_cache" ]; then # Determine filesystem for each database directory, generate # an entry to warn at 90%, de-duplicate entries, put all items # on 1 line (so the read below gets everything) for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \ "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \ "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }' done | sort -u | xargs >"$_fs_defaults_cache" fi read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache" } monitor_filesystem_usage() { if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then set_monitor_filsystem_usage_defaults fi # Check each specified filesystem, specified in format # :[:fs_unhealthy_threshold] for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do _fs_mount="${_fs%%:*}" _fs_thresholds="${_fs#*:}" if [ ! -d "$_fs_mount" ]; then echo "WARNING: Directory ${_fs_mount} does not exist" continue fi # Get current utilization _fs_usage=$(df -kP "$_fs_mount" | sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p') if [ -z "$_fs_usage" ]; then printf 'WARNING: Unable to get FS utilization for %s\n' \ "$_fs_mount" continue fi check_thresholds "Filesystem ${_fs_mount}" \ "$_fs_thresholds" \ "$_fs_usage" done } dump_memory_info() { get_proc "meminfo" ps auxfww set_proc "sysrq-trigger" "m" } monitor_memory_usage() { # Defaults if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then CTDB_MONITOR_MEMORY_USAGE=80 fi _meminfo=$(get_proc "meminfo") # Intentional word splitting here # shellcheck disable=SC2046 set -- $(echo "$_meminfo" | awk ' $1 == "MemAvailable:" { memavail += $2 } $1 == "MemFree:" { memfree += $2 } $1 == "Cached:" { memfree += $2 } $1 == "Buffers:" { memfree += $2 } $1 == "MemTotal:" { memtotal = $2 } $1 == "SwapFree:" { swapfree = $2 } $1 == "SwapTotal:" { swaptotal = $2 } END { if (memavail != 0) { memfree = memavail ; } if (memtotal + swaptotal != 0) { usedtotal = memtotal - memfree + swaptotal - swapfree print int(usedtotal / (memtotal + swaptotal) * 100) } else { print 0 } }') _mem_usage="$1" check_thresholds "System memory" \ "$CTDB_MONITOR_MEMORY_USAGE" \ "$_mem_usage" \ dump_memory_info } case "$1" in monitor) # Load/cache database options from configuration file ctdb_get_db_options rc=0 monitor_filesystem_usage || rc=$? monitor_memory_usage || rc=$? exit $rc ;; esac exit 0