From 7de03e4e519705301265c0415b3c0af85263a7ac Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 09:52:36 +0200 Subject: Adding upstream version 1:4.13.0. Signed-off-by: Daniel Baumann --- heartbeat/storage-mon.in | 399 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100644 heartbeat/storage-mon.in (limited to 'heartbeat/storage-mon.in') diff --git a/heartbeat/storage-mon.in b/heartbeat/storage-mon.in new file mode 100644 index 0000000..284dec3 --- /dev/null +++ b/heartbeat/storage-mon.in @@ -0,0 +1,399 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 2021 Red Hat, Inc. All rights reserved. +# +# Authors: Christine Caulfield +# Fabio M. Di Nitto +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +# +# Checks storage I/O status of all given drives and writes the #health-storage +# status into the CIB +# Implementation is heavily based on ocf:pacemaker:HealtSMART +# +# It sends a single block on IO to a radom location on the device and reports any errors returned. +# If the IO hangs, that will also be returned. (bear in mind tha tmay also hang the C app in some +# instances). +# +# It's worth making a note in the RA description that the smartmon RA is also recommended (this +# does not replace it), and that Pacemaker health checking should be configued. +# +# https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Explained/singlehtml/index.html#tracking-node-health + +####################################################################### + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# +STORAGEMON=${HA_BIN}/storage_mon +ATTRDUP=${HA_SBIN_DIR}/attrd_updater +PIDFILE=${HA_VARRUN}/storage-mon-${OCF_RESOURCE_INSTANCE}.pid +ATTRNAME="#health-${OCF_RESOURCE_INSTANCE}" + +OCF_RESKEY_CRM_meta_interval_default="0" +OCF_RESKEY_io_timeout_default="10" +OCF_RESKEY_check_interval_default="30" +OCF_RESKEY_inject_errors_default="" +OCF_RESKEY_state_file_default="${HA_RSCTMP%%/}/storage-mon-${OCF_RESOURCE_INSTANCE}.state" +OCF_RESKEY_daemonize_default="false" + +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_interval:=${OCF_RESKEY_CRM_meta_interval_default}} +: ${OCF_RESKEY_drives:=""} +: ${OCF_RESKEY_io_timeout:=${OCF_RESKEY_io_timeout_default}} +: ${OCF_RESKEY_check_interval:=${OCF_RESKEY_check_interval_default}} +: ${OCF_RESKEY_inject_errors:=${OCF_RESKEY_inject_errors_default}} +: ${OCF_RESKEY_state_file:=${OCF_RESKEY_state_file_default}} +: ${OCF_RESKEY_daemonize:=${OCF_RESKEY_daemonize_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +System health agent that checks the storage I/O status of the given drives and +updates the #health-storage attribute. Usage is highly recommended in combination +with the HealthSMART monitoring agent. The agent currently support a maximum of 25 +devices per instance. + +storage I/O health status + + + + + +Location to store the resource state in. + +State file + + + + + +The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". + +Drives to check + + + + + +Specify disk I/O timeout in seconds. Minimum 1, recommended 10 (default). + +Disk I/O timeout + + + + + +Specify interval between I/O checks in seconds.(Only supported with the damonize option.) + +I/O check interval + + + + + +Used only for testing! Specify % of I/O errors to simulate drives failures. + +Specify % of I/O errors to simulate drives failures + + + + + +Specifies to start storage-mon as a daemon and check for devices. + +start storage-mon with daemon + + + + + + + + + + + + + +END + return $OCF_SUCCESS +} + +####################################################################### + +storage-mon_usage() { + cat < /dev/null 2>&1 + case "$?" in + 0) rc=$OCF_SUCCESS;; + 1|2) rc=$OCF_NOT_RUNNING;; + *) rc=$OCF_ERR_GENERIC;; + esac + + if [ $rc -ne $OCF_SUCCESS ]; then + return "$rc" + fi + if [ "$1" = "pid_check_only" ]; then + return "$rc" + fi + + # generate client command line + cmdline="" + cmdline="$cmdline --client --attrname ${ATTRNAME}" + while : + do + # 0 : Normal. + # greater than 0 : monitoring error. + # 255(-1) : communication system error. + # 254(-2) : Not all checks completed for first device in daemon mode. + $STORAGEMON $cmdline + rc=$? + case "$rc" in + 254|255) + # If there is a communication error or the initial check of all devices has not been completed, + # it will loop and try to reconnect. + # When everything ends with a communication error during monitor, a monitor timeout occurs. + ocf_log debug "client monitor error : $rc" + ;; + 0) + status="green" + break + ;; + *) + status="red" + break + ;; + esac + done + + storage-mon_update_attribute $status + return "$?" + fi +} + +storage-mon_start() { + if ! ocf_is_true "$OCF_RESKEY_daemonize"; then + storage-mon_monitor + if [ $? -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + fi + touch "${OCF_RESKEY_state_file}" + else + storage-mon_init + # generate command line + cmdline="" + for DRIVE in ${OCF_RESKEY_drives}; do + cmdline="$cmdline --device $DRIVE --score 1" + done + cmdline="$cmdline --daemonize --timeout ${OCF_RESKEY_io_timeout} --interval ${OCF_RESKEY_check_interval} --pidfile ${PIDFILE} --attrname ${ATTRNAME}" + if [ -n "${OCF_RESKEY_inject_errors}" ]; then + cmdline="$cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors}" + fi + $STORAGEMON $cmdline + if [ "$?" -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + fi +} + +storage-mon_stop() { + storage-mon_monitor + rc=$? + + if ! ocf_is_true "$OCF_RESKEY_daemonize"; then + if [ $rc -eq $OCF_SUCCESS ]; then + rm "${OCF_RESKEY_state_file}" + fi + else + case "$rc" in + $OCF_SUCCESS) + ;; + $OCF_NOT_RUNNING) + return "$OCF_SUCCESS";; + *) + return "$rc";; + esac + + kill -TERM $(cat "${PIDFILE}") + if [ "$?" -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + while true; do + storage-mon_monitor pid_check_only + rc="$?" + case "$rc" in + $OCF_SUCCESS) + ;; + $OCF_NOT_RUNNING) + return "$OCF_SUCCESS";; + *) + return "$rc";; + esac + sleep 1 + done + fi + return $OCF_SUCCESS +} + +storage-mon_validate() { + storage-mon_init + + if ! ocf_is_true "$OCF_RESKEY_daemonize"; then + # Is the state directory writable? + state_dir=$(dirname "${OCF_RESKEY_state_file}") + touch "$state_dir/$$" + if [ $? -ne 0 ]; then + return $OCF_ERR_CONFIGURED + fi + rm "$state_dir/$$" + fi + + return $OCF_SUCCESS +} + +case "$__OCF_ACTION" in + start) storage-mon_start;; + stop) storage-mon_stop;; + monitor) storage-mon_monitor;; + validate-all) storage-mon_validate;; + meta-data) meta_data;; + usage|help) storage-mon_usage $OCF_SUCCESS;; + *) storage-mon_usage $OCF_ERR_UNIMPLEMENTED;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc +# vim: set filetype=sh: -- cgit v1.2.3