1 files changed, 584 insertions, 0 deletions
diff --git a/heartbeat/mdraid b/heartbeat/mdraid
new file mode 100755
index 0000000..1e6a5d0
--- /dev/null
+++ b/heartbeat/mdraid
@@ -0,0 +1,584 @@
+#!/bin/sh
+#
+# License:      GNU General Public License (GPL)
+# Support:      users@clusterlabs.org
+#
+# mdraid (inspired by the Raid1 upstream resource agent)
+#
+#      Description: Manages a Linux software RAID device on a (shared) storage medium.
+#           Author: Heinz Mauelshagen (heinzm@redhat.com)
+#          Release: Mar 2020
+#
+# usage: $0 {start|stop|monitor|validate-all|usage|meta-data}
+#
+# EXAMPLE config file /etc/mdadm.conf (for more info: mdadm.conf(5))
+#
+#  AUTO -all
+#  ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799
+#
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+
+OCF_RESKEY_mdadm_conf_default=""
+OCF_RESKEY_md_dev_default=""
+OCF_RESKEY_force_stop_default="false"
+OCF_RESKEY_wait_for_udev_default="true"
+OCF_RESKEY_force_clones_default="false"
+
+: ${OCF_RESKEY_mdadm_conf=${OCF_RESKEY_mdadm_conf_default}}
+: ${OCF_RESKEY_md_dev=${OCF_RESKEY_md_dev_default}}
+: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
+: ${OCF_RESKEY_wait_for_udev=${OCF_RESKEY_wait_for_udev_default}}
+: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
+
+#######################################################################
+
+usage() {
+	cat <<-EOT
+	usage: $0 {start|stop|monitor|validate-all|usage|meta-data}
+	EOT
+}
+
+#
+# Action: provide meta-data (parameter specifications and descriptive text)
+#
+meta_data() {
+	cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="mdraid" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+This resource agent manages Linux software RAID (MD) devices on
+a shared storage medium ensuring that non-clustered MD arrays
+are prohibited from starting cloned (which would cause data corruption
+(e.g., on raid6 arrays) unless forced (see force_clones parameter).
+Clustered MD RAID layouts (see below) will be discovered and allowed
+cloning by default; no need to set force_clones.
+
+It uses mdadm(8) to start, stop, and monitor the MD devices.
+
+Supported clustered (i.e., clonable active-active) arrays are linear,
+raid0, and clustered raid1/raid10 (i.e. mdadm(8) created with
+--bitmap=clustered).
+
+Option: OCF_CHECK_LEVEL
+
+When OCF_CHECK_LEVEL is set to any number greater than 0, the standard
+monitor operation (including probe) will check the array and attempt
+recovery sequence to re-add devices if any failed device exists. By
+default, OCF_CHECK_LEVEL is unset, and this is disabled.
+
+</longdesc>
+<shortdesc lang="en">Manages Linux software RAID (MD) devices on shared
+storage</shortdesc>
+
+<parameters>
+<parameter name="mdadm_conf" unique="0" required="1">
+<longdesc lang="en">
+The MD RAID configuration file (e.g., /etc/mdadm.conf).
+</longdesc>
+<shortdesc lang="en">MD config file</shortdesc>
+<content type="string" default="${OCF_RESKEY_mdadm_conf_default}" />
+</parameter>
+
+<parameter name="md_dev" unique="0" required="1">
+<longdesc lang="en">
+MD array block device to use (e.g., /dev/md0 or /dev/md/3).
+With shared access to the array's storage, this should
+preferably be a clustered raid1 or raid10 array created
+with --bitmap=clustered, assuming its resource will
+be cloned (i.e., active-active access).
+
+Be sure to disable auto-assembly for the resource-managed arrays!
+</longdesc>
+<shortdesc lang="en">MD block device</shortdesc>
+<content type="string" default="${OCF_RESKEY_md_dev_default}" />
+</parameter>
+
+<parameter name="force_stop" unique="0" required="0">
+<longdesc lang="en">
+If processes or kernel threads are using the array, it cannot be
+stopped. We will try to stop processes, first by sending TERM and
+then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
+The lsof(8) program is required to get the list of array users.
+Of course, the kernel threads cannot be stopped this way.
+If the processes are critical for data integrity, then set this
+parameter to false. Note that in that case the stop operation
+will fail and the node will be fenced.
+</longdesc>
+<shortdesc lang="en">force stop processes using the array</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
+</parameter>
+
+<parameter name="wait_for_udev" unique="0" required="0">
+<longdesc lang="en">
+Wait until udevd creates a device in the start operation. On a
+normally loaded host this should happen quickly, but you may be
+unlucky. If you are not using udev set this to "no".
+</longdesc>
+<shortdesc lang="en">wait_for_udev</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_wait_for_udev_default}" />
+</parameter>
+
+<parameter name="force_clones" unique="0" required="0">
+<longdesc lang="en">
+Activating the same, non-clustered MD RAID array (i.e. single-host
+raid1/4/5/6/10) on multiple nodes at the same time will result in
+data corruption and thus is forbidden by default.
+
+A safe example could be an (exotic) array that is only named identically
+across all nodes, but is in fact based on distinct (non-shared) storage.
+
+Only set this to "true" if you know what you are doing!
+</longdesc>
+<shortdesc lang="en">force ability to run as a clone</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_clones_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20s" />
+<action name="stop" timeout="20s" />
+<action name="monitor" depth="0" timeout="20s" interval="10s" />
+<action name="validate-all" timeout="5s" />
+<action name="meta-data" timeout="5s" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# ocf-shellfunc ocf_is_clone() fails with meta attribute clone-max < 2.
+# Checking for defined meta_clone_max reskey is sufficient until fixed.
+resource_is_cloned() {
+	[ -z "$OCF_RESKEY_CRM_meta_clone_max" ] && return 1 || return 0;
+}
+
+raid_validate_all() {
+	if [ -z "$mdadm_conf" ] ; then
+		ocf_exit_reason "Please set OCF_RESKEY_mdadm_conf"
+		return $OCF_ERR_CONFIGURED
+	fi
+	if [ ! -r "$mdadm_conf" ] ; then
+		ocf_exit_reason "Configuration file [$mdadm_conf] does not exist, or cannot be opened"
+		return $OCF_ERR_ARGS
+	fi
+	if [ -z "$md_dev" ] ; then
+		ocf_exit_reason "Please set OCF_RESKEY_md_dev to the MD RAID array block device you want to control"
+		return $OCF_ERR_CONFIGURED
+	fi
+	case "$md_dev" in
+	/dev/*) ;;
+	*)	ocf_exit_reason "Bogus MD RAID array block device name (\"$md_dev\")"
+		return $OCF_ERR_ARGS;;
+	esac
+	if ocf_is_true $wait_for_udev && ! have_binary udevadm && [ "$__OCF_ACTION" = "start" ]; then
+		ocf_exit_reason "either install udevadm or set udev to false"
+		return $OCF_ERR_INSTALLED
+	fi
+	if ocf_is_true $force_stop && ! have_binary lsof; then
+		ocf_exit_reason "Please install lsof(8) or set force_stop to false."
+		return $OCF_ERR_INSTALLED
+	fi
+	if ! have_binary $MDADM; then
+		ocf_exit_reason "Please install mdadm(8)!"
+		return $OCF_ERR_INSTALLED
+	fi
+	if ! have_binary blkid; then
+		ocf_exit_reason "Please install blkid(8). We need it to list MD array UUIDs!"
+		return $OCF_ERR_INSTALLED
+	fi
+	if [ `echo $md_dev|wc -w` -gt 1 ]; then
+		ocf_exit_reason "Only one MD array supported"
+		return $OCF_ERR_CONFIGURED
+	fi
+
+	return $OCF_SUCCESS
+}
+
+# Remove ':' or '-' from uuid string to be able to compare between MD and blkid format.
+uuid_flat() {
+	echo $1|sed 's/[-:]//g'
+}
+
+# Global variable for devices by MD uuid.
+devs=""
+
+# Get array uuid from mdadm_conf based on $md_dev.
+get_array_uuid_by_mddev() {
+	local array_uuid
+
+	array_uuid="`grep $md_dev $mdadm_conf`"
+	if [ -z "$array_uuid" ]
+	then
+		ocf_exit_reason "Entry for $MMDEV does not exist in $mdadm_conf!"
+		return $OCF_ERR_CONFIGURED
+	fi
+
+	array_uuid=$(echo $array_uuid | sed 's/^.*UUID=//;s/ .*$//')
+	if [ -z "$array_uuid" ]
+	then
+		ocf_exit_reason "Bogus entry for $MMDEV in $mdadm_conf!"
+		return $OCF_ERR_CONFIGURED
+	fi
+
+	echo `uuid_flat $array_uuid`
+
+	return $OCF_SUCCESS
+}
+
+# Use blkid to get to the subset of raid members by array uuid.
+list_devices_for_mddev() {
+	local array_uuid blkid_array_uuid dev line rc
+
+	array_uuid=`get_array_uuid_by_mddev`
+	rc=$?
+	if [ $rc -ne $OCF_SUCCESS ]; then
+		ocf_exit_reason "Failed to get UUID of $md_dev from $mdadm_conf"
+		return $rc
+	fi
+
+	blkid | grep linux_raid_member 2>/dev/null | while read line
+	do
+		dev=`echo $line | sed 's/: .*//'`
+		blkid_array_uuid=$(echo $line | sed 's/^.* UUID="//;s/" .*$//')
+		[ "`uuid_flat $blkid_array_uuid`" = "$array_uuid" ] && echo $dev
+	done
+}
+
+# Check for linear or raid0 array; presumes defined global devs() array.
+array_is_linear_or_raid0() {
+	local c=0 d
+
+	for d in $devs
+	do
+		$MDADM -E $d 2>&1 | $EGREP -i "raid level : (raid0|linear)" >/dev/null 2>&1
+		[ $? -eq 0 ] && c=$((c+1))
+	done
+
+	[ $c -eq `echo $devs|wc -w` ] && return 0 || return 1
+}
+
+# Return true for clustered RAID relying on all component devices reporting clustered type;
+# presumes defined global devs variable with the component devices of the array.
+array_is_clustered_raid() {
+	local c=0 d dev_count=`echo $devs|wc -w` s
+	# Check based on specific "intent bitmap : clustered" output once
+	# available in mdadm output or fall back to "Cluster Name" defined
+	# presuming bitmap is clustered if so.
+	local strs="clustered cluster.name"
+
+	for d in $devs
+	do
+		for s in $strs
+		do
+			$MDADM -E $d 2>&1 | grep -i "$s" >/dev/null 2>&1
+			if [ $? -eq 0 ]; then
+				c=$((c+1))
+				break
+			fi
+		done
+	done
+
+	[ $c -eq $dev_count ] && return 0 || return 1
+}
+
+# Check for all clustered types (linear, raid0, and clustered raid1/raid10).
+is_clustered_raid() {
+	array_is_clustered_raid || array_is_linear_or_raid0
+}
+
+md_assemble() {
+	local rc
+
+	$MDADM --assemble $md_dev --config="$mdadm_conf"
+	rc=$?
+	[ $rc -eq 0 ] && ocf_is_true $wait_for_udev && udevadm settle --exit-if-exists=$md_dev
+
+	return $rc
+}
+
+# Try setting an MD array to readonly.
+mark_readonly() {
+	local rc
+
+	$MDADM --readonly $md_dev --config="$mdadm_conf"
+	rc=$?
+	[ $rc -ne 0 ] && ocf_exit_reason "Failed to set $md_dev readonly (rc=$rc)"
+
+	return $rc
+}
+
+# Try stopping an MD array in case its block device is nonexistent for some reason.
+mknod_raid_stop() {
+	local rc n tmp_block_file
+
+	# first create a block device file, then try to stop the array
+	n=`echo $1 | sed 's/[^0-9]*//'`
+	if ! ocf_is_decimal "$n"; then
+		ocf_log warn "could not get the minor device number from $1"
+		return 1
+	fi
+	tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`"
+	rm -f $tmp_block_file
+	ocf_log info "block device file $1 missing, creating one in order to stop the array"
+	mknod $tmp_block_file b 9 $n
+	$MDADM --stop $tmp_block_file --config="$mdadm_conf"
+	rc=$?
+	rm -f $tmp_block_file
+	return $rc
+}
+
+# Stop an MD array.
+raid_stop_one() {
+	if [ -b "$1" ]; then
+		$MDADM --stop $1 --config="$mdadm_conf" && return
+	else
+		# newer mdadm releases can stop arrays when given the
+		# basename; try that first
+		$MDADM --stop `basename $1` --config="$mdadm_conf" && return
+		# otherwise create a block device file
+		mknod_raid_stop $1
+	fi
+}
+
+# Functions show/stop any resource holding processes.
+get_users_pids() {
+	ocf_log debug "running lsof to list $md_dev users..."
+	ocf_run -warn 'lsof $md_dev | tail -n +2 | $AWK "{print $2}" | sort -u'
+}
+
+stop_raid_users() {
+	local pids=`get_users_pids $md_dev`
+
+	if [ -z "$pids" ]; then
+		ocf_log warn "lsof reported no users holding arrays"
+		return 2
+	else
+		ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
+	fi
+}
+
+showusers() {
+	local disk=`basename $md_dev`
+
+	ocf_log info "running lsof to list $disk users..."
+	ocf_run -warn lsof $md_dev
+
+	if [ -d /sys/block/$disk/holders ]; then
+		ocf_log info "ls -l /sys/block/$disk/holders"
+		ocf_run -warn ls -l /sys/block/$disk/holders
+	fi
+}
+
+#######################################################################
+
+#
+# Action: START up the MD RAID array.
+#
+raid_start() {
+	local rc
+
+	if resource_is_cloned && ! is_clustered_raid; then
+		if ocf_is_true "$OCF_RESKEY_force_clones"; then
+			ocf_log warn "Forced cloned starting non-clustered $md_dev which may lead to data corruption!"
+		else
+			ocf_exit_reason "Rejecting start: non-clustered MD RAID array $md_dev is NOT safe to run cloned"
+			exit $OCF_ERR_CONFIGURED
+		fi
+	fi
+
+	raid_monitor
+	rc=$?
+	# md array already online, nothing to do.
+	[ $rc -eq $OCF_SUCCESS ] && return $rc
+
+	if [ $rc -ne $OCF_NOT_RUNNING ]
+	then
+		# If the array is in a broken state, this agent doesn't know how to repair that.
+		ocf_exit_reason "MD RAID array $md_dev in a broken state; cannot start (rc=$rc)"
+		return $OCF_ERR_GENERIC
+	fi
+
+	md_assemble
+	rc=$?
+	if [ $rc -ne 0 ]; then
+		ocf_exit_reason "Failed to assemble MD RAID array $md_dev (rc=$rc, is $mdadm_conf up-to-date?)"
+		return $OCF_ERR_GENERIC
+	fi
+
+	raid_monitor
+	[ $? -eq $OCF_SUCCESS ] && return $OCF_SUCCESS
+
+	ocf_exit_reason "Couldn't start MD RAID array $md_dev (rc=$rc)"
+
+	return $OCF_ERR_GENERIC
+}
+
+#
+# Action: STOP the MD RAID array
+#
+raid_stop() {
+	local rc
+
+	# See if the MD device is already cleanly stopped:
+	raid_monitor
+	[ $? -eq $OCF_NOT_RUNNING ] && return $OCF_SUCCESS
+
+	# Turn off raid
+	if ! raid_stop_one $md_dev; then
+		if ocf_is_true $force_stop; then
+			stop_raid_users
+			case $? in
+			2) false;;
+			*) raid_stop_one $md_dev;;
+			esac
+		else
+			false
+		fi
+	fi
+	rc=$?
+
+	if [ $rc -ne 0 ]; then
+		ocf_log warn "Couldn't stop MD RAID array $md_dev (rc=$rc)"
+		showusers $md_dev
+		mark_readonly $md_dev
+		return $OCF_ERR_GENERIC
+	fi
+
+	raid_monitor
+	rc=$?
+	[ $rc -eq $OCF_NOT_RUNNING ] && return $OCF_SUCCESS
+
+	ocf_exit_reason "MD RAID array $md_dev still active after stop command (rc=$rc)"
+	return $OCF_ERR_GENERIC
+}
+
+#
+# Action: monitor the MD RAID array.
+#
+raid_monitor() {
+	local TRY_READD=0 md rc pbsize
+
+	# check if the md device exists first
+	# but not if we are in the stop operation
+	# device existence is important only for the running arrays
+	if [ "$__OCF_ACTION" != "stop" ]; then
+		if [ -h "$md_dev" ]; then
+			md=$(ls $md_dev -l | $AWK -F'/' '{print $NF}')
+		elif [ -b "$md_dev" ]; then
+			md=${md_dev#/dev/}
+		else
+			ocf_log info "$md_dev is not a block device"
+			return $OCF_NOT_RUNNING
+		fi
+	fi
+
+	if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then
+		ocf_log info "$md not found in /proc/mdstat"
+		return $OCF_NOT_RUNNING
+	fi
+
+	$MDADM --detail --test $md_dev >/dev/null 2>&1
+	rc=$?
+	case $rc in
+	0)	;;
+	1)	ocf_log warn "$md_dev has at least one failed device."
+		TRY_READD=1;;
+	2)	ocf_exit_reason "$md_dev has failed."
+		return $OCF_ERR_GENERIC;;
+	4)
+		if [ "$__OCF_ACTION" = "stop" ] ; then
+			# There may be a transient invalid device after
+			# we stop MD due to uevent processing, the
+			# original device is stopped though.
+			return $OCF_NOT_RUNNING
+		else
+			ocf_exit_reason "mdadm failed on $md_dev."
+			return $OCF_ERR_GENERIC
+		fi;;
+	*)	ocf_exit_reason "mdadm returned an unknown result ($rc)."
+		return $OCF_ERR_GENERIC;;
+	esac
+
+	if ! array_is_linear_or_raid0; then
+		if [ "$__OCF_ACTION" = "monitor" ] && [ "$OCF_RESKEY_CRM_meta_interval" != 0 \
+			] && [ $TRY_READD -eq 1 ] && [ $OCF_CHECK_LEVEL -gt 0 ]; then
+			ocf_log info "Attempting recovery sequence to re-add devices on MD RAID array $md_dev:"
+			$MDADM $md_dev --fail detached
+			$MDADM $md_dev --remove failed
+			$MDADM $md_dev --re-add missing
+			# TODO: At this stage, there's nothing to actually do
+			# here. Either this worked or it did not.
+		fi
+	fi
+
+	pbsize=`(blockdev --getpbsz $md_dev || stat -c "%o" $md_dev) 2>/dev/null`
+	if [ -z "$pbsize" ]; then
+		ocf_log warn "both blockdev and stat were unable to get the block size (will use 4k)"
+		pbsize=4096 # try with 4k
+	fi
+	if ! dd if=$md_dev count=1 bs=$pbsize of=/dev/null iflag=direct >/dev/null 2>&1; then
+		ocf_exit_reason "$md_dev: I/O error on read"
+		return $OCF_ERR_GENERIC
+	fi
+
+	[ "$__OCF_ACTION" = "monitor" ] && ocf_log info "monitoring...($md_dev)"
+
+	return $OCF_SUCCESS
+}
+
+if [ $# -ne 1 ]; then
+	usage
+	exit $OCF_ERR_ARGS
+fi
+
+# Process actions which are independant from validation
+case "$1" in
+meta-data)	meta_data
+		exit $OCF_SUCCESS;;
+usage)		usage
+		exit $OCF_SUCCESS;;
+*)		;;
+esac
+
+# Define global variables used in ^ functions.
+mdadm_conf="${OCF_RESKEY_mdadm_conf}"
+md_dev="${OCF_RESKEY_md_dev}"
+force_stop="${OCF_RESKEY_force_stop}"
+wait_for_udev="${OCF_RESKEY_wait_for_udev}"
+
+# Validate all parameters and check for mandatory binaries present
+raid_validate_all
+rc=$?
+[ $rc -ne $OCF_SUCCESS ] && exit $rc
+# raid_validate_all already processed and result checked.
+[ "$1" = "validate-all" ] && return ${OCF_SUCCESS}
+
+# Required by start|stop|monitor processed below
+devs="`list_devices_for_mddev`"
+if [ `echo $devs|wc -l` -eq 0 ]; then
+	ocf_exit_reason "No component device(s) found for MD RAID array $md_dev"
+	exit $OCF_ERR_GENERIC
+fi
+
+case "$1" in
+start)		raid_start;;
+stop)		raid_stop;;
+monitor)	raid_monitor;;
+*)		usage
+		exit $OCF_ERR_UNIMPLEMENTED;;
+esac
+rc=$?
+
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc