summaryrefslogtreecommitdiffstats
path: root/heartbeat/ManageRAID.in
diff options
context:
space:
mode:
Diffstat (limited to 'heartbeat/ManageRAID.in')
-rw-r--r--heartbeat/ManageRAID.in391
1 files changed, 391 insertions, 0 deletions
diff --git a/heartbeat/ManageRAID.in b/heartbeat/ManageRAID.in
new file mode 100644
index 0000000..bf5c745
--- /dev/null
+++ b/heartbeat/ManageRAID.in
@@ -0,0 +1,391 @@
+#!@BASH_SHELL@
+#
+# Name ManageRAID
+# Author Matthias Dahl, m.dahl@designassembly.de
+# License GPL version 2
+#
+# (c) 2006 The Design Assembly GmbH.
+#
+#
+# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+#
+# This resource agent is most likely function complete but not error free. Please
+# consider it BETA quality for the moment until it has proven itself stable...
+#
+# USE AT YOUR OWN RISK.
+#
+# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+#
+#
+# partly based on/inspired by original Heartbeat2 OCF resource agents
+#
+# Description
+#
+# Manages starting, mounting, unmounting, stopping and monitoring of RAID devices
+# which are preconfigured in /etc/conf.d/HB-ManageRAID.
+#
+#
+# Created 11. Sep 2006
+# Updated 18. Sep 2006
+#
+# rev. 1.00.2
+#
+# Changelog
+#
+# 18/Sep/06 1.00.1 more cleanup
+# 12/Sep/06 1.00.1 add more functionality
+# add sanity check for config parameters
+# general cleanup all over the place
+# 11/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-)
+#
+#
+# TODO
+#
+# - check if at least one disk out of PREFIX_LOCALDISKS is still active
+# in RAID otherwise consider RAID broken and stop it.
+#
+# The reason behind this: consider a RAID-1 which contains iSCSI devices
+# shared over Ethernet which get dynamically added/removed to/from the RAID.
+# Once all local disks have failed and only those iSCSI disks remain, the RAID
+# should really stop to prevent bad performance and possible data loss.
+#
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+
+OCF_RESKEY_raidname_default=""
+
+: ${OCF_RESKEY_raidname=${OCF_RESKEY_raidname_default}}
+
+###
+
+# required utilities
+
+# required files/devices
+RAID_MDSTAT=/proc/mdstat
+
+#
+# check_file()
+#
+check_file ()
+{
+ if [[ ! -e $1 ]]; then
+ ocf_log err "setup problem: file $1 does not exist."
+ exit $OCF_ERR_GENERIC
+ fi
+}
+
+#
+# usage()
+#
+usage()
+{
+ cat <<-EOT
+ usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
+ EOT
+}
+
+#
+# meta_data()
+#
+meta_data()
+{
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ManageRAID" version="1.00.2">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ Manages starting, stopping and monitoring of RAID devices which
+ are preconfigured in /etc/conf.d/HB-ManageRAID.
+ </longdesc>
+
+ <shortdesc lang="en">Manages RAID devices</shortdesc>
+
+ <parameters>
+ <parameter name="raidname" unique="0" required="1">
+ <longdesc lang="en">
+ Name (case sensitive) of RAID to manage. (preconfigured in /etc/conf.d/HB-ManageRAID)
+ </longdesc>
+ <shortdesc lang="en">RAID name</shortdesc>
+ <content type="string" default="${OCF_RESKEY_raidname_default}" />
+ </parameter>
+ </parameters>
+
+ <actions>
+ <action name="start" timeout="75s" />
+ <action name="stop" timeout="75s" />
+ <action name="status" depth="0" timeout="10s" interval="10s" />
+ <action name="monitor" depth="0" timeout="10s" interval="10s" />
+ <action name="validate-all" timeout="5s" />
+ <action name="meta-data" timeout="5s" />
+ </actions>
+</resource-agent>
+END
+}
+
+#
+# start_raid()
+#
+start_raid()
+{
+ declare -i retcode
+
+ status_raid
+ retcode=$?
+ if [[ $retcode == $OCF_SUCCESS ]]; then
+ return $OCF_SUCCESS
+ elif [[ $retcode != $OCF_NOT_RUNNING ]]; then
+ return $retcode
+ fi
+
+ for ldev in "${RAID_LOCALDISKS[@]}"; do
+ if [[ ! -b $ldev ]]; then
+ ocf_log err "$ldev is not a (local) block device."
+ return $OCF_ERR_ARGS
+ fi
+ done
+
+ $MDADM -A $RAID_DEVPATH -a yes -u ${!RAID_UUID} "${RAID_LOCALDISKS[@]}" &> /dev/null
+ if [[ $? != 0 ]]; then
+ ocf_log err "starting ${!RAID_DEV} with ${RAID_LOCALDISKS[*]} failed."
+ return $OCF_ERR_GENERIC
+ fi
+
+ $MOUNT -o ${!RAID_MOUNTOPTIONS} $RAID_DEVPATH ${!RAID_MOUNTPOINT} &> /dev/null
+ if [[ $? != 0 ]]; then
+ $MDADM -S $RAID_DEVPATH &> /dev/null
+
+ if [[ $? != 0 ]]; then
+ ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed as well as stopping the RAID itself."
+ else
+ ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed. RAID stopped again."
+ fi
+
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# stop_raid()
+#
+stop_raid()
+{
+ status_raid
+ if [[ $? == $OCF_NOT_RUNNING ]]; then
+ return $OCF_SUCCESS
+ fi
+
+ $UMOUNT ${!RAID_MOUNTPOINT} &> /dev/null
+ if [[ $? != 0 ]]; then
+ ocf_log err "unmounting ${!RAID_MOUNTPOINT} failed. not stopping ${!RAID_DEV}!"
+ return $OCF_ERR_GENERIC
+ fi
+
+ $MDADM -S $RAID_DEVPATH &> /dev/null
+ if [[ $? != 0 ]]; then
+ ocf_log err "stopping RAID ${!RAID_DEV} failed."
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# status_raid()
+#
+status_raid()
+{
+ declare -i retcode_raidcheck
+ declare -i retcode_uuidcheck
+
+ $CAT $RAID_MDSTAT | $GREP -e "${!RAID_DEV}[\ ]*:[\ ]*active" &> /dev/null
+ if [ $? -ne 0 ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ if [ ! -e $RAID_DEVPATH ]; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ $MDADM --detail -t $RAID_DEVPATH &> /dev/null
+ retcode_raidcheck=$?
+ $MDADM --detail -t $RAID_DEVPATH | $GREP -qEe "^[\ ]*UUID[\ ]*:[\ ]*${!RAID_UUID}" &> /dev/null
+ retcode_uuidcheck=$?
+
+ if [ $retcode_raidcheck -gt 3 ]; then
+ ocf_log err "mdadm returned error code $retcode_raidcheck while checking ${!RAID_DEV}."
+ return $OCF_ERR_GENERIC
+ elif [ $retcode_raidcheck -eq 3 ]; then
+ ocf_log err "${!RAID_DEV} has failed."
+ return $OCF_ERR_GENERIC
+ elif [ $retcode_raidcheck -lt 3 ] && [ $retcode_uuidcheck != 0 ]; then
+ ocf_log err "active RAID ${!RAID_DEV} and configured UUID (!$RAID_UUID) do not match."
+ return $OCF_ERR_GENERIC
+ fi
+
+ $MOUNT | $GREP -e "$RAID_DEVPATH on ${!RAID_MOUNTPOINT}" &> /dev/null
+ if [[ $? != 0 ]]; then
+ ocf_log err "${!RAID_DEV} seems to be no longer mounted at ${!RAID_MOUNTPOINT}"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# validate_all_raid()
+#
+validate_all_raid()
+{
+ #
+ # since all parameters are checked every time ManageRAID is
+ # invoked, there not much more to check...
+ #
+ # status_raid should cover the rest.
+ #
+ declare -i retcode
+
+ status_ve
+ retcode=$?
+
+ if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then
+ return $retcode
+ fi
+
+ return $OCF_SUCCESS
+}
+
+if [ $# -ne 1 ]; then
+ usage
+ exit $OCF_ERR_ARGS
+fi
+
+case "$1" in
+ meta-data)
+ meta_data
+ exit $OCF_SUCCESS
+ ;;
+ usage)
+ usage
+ exit $OCF_SUCCESS
+ ;;
+ *)
+ ;;
+esac
+
+## required configuration
+#
+[ -f /etc/conf.d/HB-ManageRAID ] || {
+ ocf_log err "/etc/conf.d/HB-ManageRAID missing"
+ exit $OCF_ERR_INSTALLED
+}
+. /etc/conf.d/HB-ManageRAID
+#
+##
+
+#
+# check relevant environment variables for sanity and security
+#
+
+declare -i retcode_test
+declare -i retcode_grep
+
+$TEST -z "$OCF_RESKEY_raidname"
+retcode_test=$?
+echo "$OCF_RESKEY_raidname" | $GREP -qEe "^[[:alnum:]\_]+$"
+retcode_grep=$?
+
+if [[ $retcode_test != 1 || $retcode_grep != 0 ]]; then
+ ocf_log err "OCF_RESKEY_raidname not set or invalid."
+ exit $OCF_ERR_ARGS
+fi
+
+RAID_UUID=${OCF_RESKEY_raidname}_UUID
+
+echo ${!RAID_UUID} | $GREP -qEe "^[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}$"
+if [[ $? != 0 ]]; then
+ ocf_log err "${OCF_RESKEY_raidname}_UUID is invalid."
+ exit $OCF_ERR_ARGS
+fi
+
+RAID_DEV=${OCF_RESKEY_raidname}_DEV
+
+echo ${!RAID_DEV} | $GREP -qEe "^md[0-9]+$"
+if [[ $? != 0 ]]; then
+ ocf_log err "${OCF_RESKEY_raidname}_DEV is invalid."
+ exit $OCF_ERR_ARGS
+fi
+
+RAID_DEVPATH=/dev/${!RAID_DEV/md/md\/}
+RAID_MOUNTPOINT=${OCF_RESKEY_raidname}_MOUNTPOINT
+
+echo ${!RAID_MOUNTPOINT} | $GREP -qEe "^[[:alnum:]\/\_\"\ ]+$"
+if [[ $? != 0 ]]; then
+ ocf_log err "${OCF_RESKEY_raidname}_MOUNTPOINT is invalid."
+ exit $OCF_ERR_ARGS
+fi
+
+RAID_MOUNTOPTIONS=${OCF_RESKEY_raidname}_MOUNTOPTIONS
+
+echo ${!RAID_MOUNTOPTIONS} | $GREP -qEe "^[[:alpha:]\,]+$"
+if [[ $? != 0 ]]; then
+ ocf_log err "${OCF_RESKEY_raidname}_MOUNTOPTIONS is invalid."
+ exit $OCF_ERR_ARGS
+fi
+
+RAID_LOCALDISKS=${OCF_RESKEY_raidname}_LOCALDISKS[@]
+RAID_LOCALDISKS=( "${!RAID_LOCALDISKS}" )
+
+if [ ${#RAID_LOCALDISKS[@]} -lt 1 ]; then
+ ocf_log err "you have to specify at least one local disk."
+ exit $OCF_ERR_ARGS
+fi
+
+#
+# check that all relevant utilities are available
+#
+check_binary $MDADM
+check_binary $MOUNT
+check_binary $UMOUNT
+check_binary $GREP
+check_binary $CAT
+check_binary $TEST
+check_binary echo
+
+
+#
+# check that all relevant devices are available
+#
+check_file $RAID_MDSTAT
+
+#
+# finally... let's see what we are ordered to do :-)
+#
+case "$1" in
+ start)
+ start_raid
+ ;;
+ stop)
+ stop_raid
+ ;;
+ status|monitor)
+ status_raid
+ ;;
+ validate-all)
+ validate_all_raid
+ ;;
+ *)
+ usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+
+exit $?
+