summaryrefslogtreecommitdiffstats
path: root/ctdb/config/events/legacy/05.system.script
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/config/events/legacy/05.system.script')
-rwxr-xr-xctdb/config/events/legacy/05.system.script196
1 files changed, 196 insertions, 0 deletions
diff --git a/ctdb/config/events/legacy/05.system.script b/ctdb/config/events/legacy/05.system.script
new file mode 100755
index 0000000..56a07c7
--- /dev/null
+++ b/ctdb/config/events/legacy/05.system.script
@@ -0,0 +1,196 @@
+#!/bin/sh
+# ctdb event script for checking local file system utilization
+
+[ -n "$CTDB_BASE" ] ||
+ CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")
+
+. "${CTDB_BASE}/functions"
+
+load_script_options
+
+ctdb_setup_state_dir "service" "system-monitoring"
+
+validate_percentage()
+{
+ case "$1" in
+ "") return 1 ;; # A failure that doesn't need a warning
+ [0-9] | [0-9][0-9] | 100) return 0 ;;
+ *)
+ echo "WARNING: ${1} is an invalid percentage in \"${2}\" check"
+ return 1
+ ;;
+ esac
+}
+
+check_thresholds()
+{
+ _thing="$1"
+ _thresholds="$2"
+ _usage="$3"
+ _unhealthy_callout="$4"
+
+ case "$_thresholds" in
+ *:*)
+ _warn_threshold="${_thresholds%:*}"
+ _unhealthy_threshold="${_thresholds#*:}"
+ ;;
+ *)
+ _warn_threshold="$_thresholds"
+ _unhealthy_threshold=""
+ ;;
+ esac
+
+ _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g')
+ # script_state_dir set by ctdb_setup_state_dir()
+ # shellcheck disable=SC2154
+ _cache="${script_state_dir}/cache_${_t}"
+ if [ -r "$_cache" ]; then
+ read -r _prev <"$_cache"
+ else
+ _prev=0
+ fi
+ if validate_percentage "$_unhealthy_threshold" "$_thing"; then
+ if [ "$_usage" -ge "$_unhealthy_threshold" ]; then
+ printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_unhealthy_threshold"
+ # Only run unhealthy callout if passing the
+ # unhealthy threshold. That is, if the
+ # previous usage was below the threshold.
+ if [ "$_prev" -lt "$_unhealthy_threshold" ]; then
+ eval "$_unhealthy_callout"
+ fi
+ echo "$_usage" >"$_cache"
+ exit 1
+ fi
+ fi
+
+ if validate_percentage "$_warn_threshold" "$_thing"; then
+ if [ "$_usage" -ge "$_warn_threshold" ]; then
+ if [ "$_usage" = "$_prev" ]; then
+ return
+ fi
+ printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_warn_threshold"
+ echo "$_usage" >"$_cache"
+ else
+ if [ ! -r "$_cache" ]; then
+ return
+ fi
+ printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \
+ "$_thing" \
+ "$_usage" \
+ "$_warn_threshold"
+ rm -f "$_cache"
+ fi
+ fi
+}
+
+set_monitor_filsystem_usage_defaults()
+{
+ _fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults"
+
+ if [ ! -r "$_fs_defaults_cache" ]; then
+ # Determine filesystem for each database directory, generate
+ # an entry to warn at 90%, de-duplicate entries, put all items
+ # on 1 line (so the read below gets everything)
+ for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \
+ "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \
+ "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do
+ df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }'
+ done | sort -u | xargs >"$_fs_defaults_cache"
+ fi
+
+ read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache"
+}
+
+monitor_filesystem_usage()
+{
+ if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then
+ set_monitor_filsystem_usage_defaults
+ fi
+
+ # Check each specified filesystem, specified in format
+ # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold]
+ for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do
+ _fs_mount="${_fs%%:*}"
+ _fs_thresholds="${_fs#*:}"
+
+ if [ ! -d "$_fs_mount" ]; then
+ echo "WARNING: Directory ${_fs_mount} does not exist"
+ continue
+ fi
+
+ # Get current utilization
+ _fs_usage=$(df -kP "$_fs_mount" |
+ sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p')
+ if [ -z "$_fs_usage" ]; then
+ printf 'WARNING: Unable to get FS utilization for %s\n' \
+ "$_fs_mount"
+ continue
+ fi
+
+ check_thresholds "Filesystem ${_fs_mount}" \
+ "$_fs_thresholds" \
+ "$_fs_usage"
+ done
+}
+
+dump_memory_info()
+{
+ get_proc "meminfo"
+ ps auxfww
+ set_proc "sysrq-trigger" "m"
+}
+
+monitor_memory_usage()
+{
+ # Defaults
+ if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then
+ CTDB_MONITOR_MEMORY_USAGE=80
+ fi
+
+ _meminfo=$(get_proc "meminfo")
+ # Intentional word splitting here
+ # shellcheck disable=SC2046
+ set -- $(echo "$_meminfo" | awk '
+$1 == "MemAvailable:" { memavail += $2 }
+$1 == "MemFree:" { memfree += $2 }
+$1 == "Cached:" { memfree += $2 }
+$1 == "Buffers:" { memfree += $2 }
+$1 == "MemTotal:" { memtotal = $2 }
+$1 == "SwapFree:" { swapfree = $2 }
+$1 == "SwapTotal:" { swaptotal = $2 }
+END {
+ if (memavail != 0) { memfree = memavail ; }
+ if (memtotal + swaptotal != 0) {
+ usedtotal = memtotal - memfree + swaptotal - swapfree
+ print int(usedtotal / (memtotal + swaptotal) * 100)
+ } else {
+ print 0
+ }
+}')
+ _mem_usage="$1"
+
+ check_thresholds "System memory" \
+ "$CTDB_MONITOR_MEMORY_USAGE" \
+ "$_mem_usage" \
+ dump_memory_info
+}
+
+case "$1" in
+monitor)
+ # Load/cache database options from configuration file
+ ctdb_get_db_options
+
+ rc=0
+ monitor_filesystem_usage || rc=$?
+ monitor_memory_usage || rc=$?
+ exit $rc
+ ;;
+esac
+
+exit 0