#!@BASH_PATH@ # # ocf:pacemaker:HealthSMART resource agent # # Copyright 2009-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Checks the S.M.A.R.T. status of all given drives and writes the #health-smart # status into the CIB # ####################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} : ${OCF_RESKEY_temp_warning:=""} : ${OCF_RESKEY_temp_lower_limit:=""} : ${OCF_RESKEY_temp_upper_limit:=""} : ${OCF_RESKEY_drives:="/dev/sda"} : ${OCF_RESKEY_devices:=""} : ${OCF_RESKEY_state:=""} : ${OCF_RESKEY_smartctl:="/usr/sbin/smartctl"} : ${OCF_RESKEY_dampen:="5s"} # Turn these into arrays so we can iterate them later. DRIVES=(${OCF_RESKEY_drives}) DEVICES=(${OCF_RESKEY_devices}) ####################################################################### meta_data() { cat < 1.1 System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute The path to the smartctl program, used for querying device health. The path to the smartctl program The time to wait (dampening) for further changes to occur Dampening interval END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" attrd_updater -n "#health-smart" -U "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" attrd_updater -n "#health-smart" -U "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" attrd_updater -n "#health-smart" -U "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" attrd_updater -n "#health-smart" -U "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi } common_checks() { # Each item in $OCF_RESKEY_drives must have a corresponding item in # $OCF_RESKEY_devices with the device type. Alternately, # $OCF_RESKEY_devices can be empty. drives_len=${#DRIVES[@]} devices_len=${#DEVICES[@]} if [ "${drives_len}" -ne "${devices_len}" ] && [ "${devices_len}" -gt 0 ]; then ocf_log err "OCF_RESKEY_devices must be empty or the same length as OCF_RESKEY_drives." exit $OCF_ERR_ARGS fi # Each item in $OCF_RESKEY_drives must look like a device node. for d in "${DRIVES[@]}"; do if [[ "$d" != /dev/* ]]; then ocf_log err "Device in OCF_RESKEY_devices does not look like a device node: $d" exit $OCF_ERR_ARGS fi done } init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) for ndx in ${!DRIVES[*]}; do DRIVE=${DRIVES[$ndx]} if [ -n "${OCF_RESKEY_devices}" ]; then DEVICE=${DEVICES[$ndx]} "${OCF_RESKEY_smartctl}" -d "${DEVICE}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi else "${OCF_RESKEY_smartctl}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat <