1 files changed, 586 insertions, 0 deletions
diff --git a/heartbeat/Raid1 b/heartbeat/Raid1
new file mode 100755
index 0000000..924d94c
--- /dev/null
+++ b/heartbeat/Raid1
@@ -0,0 +1,586 @@
+#!/bin/sh
+#
+#
+# License:      GNU General Public License (GPL)
+# Support:      users@clusterlabs.org
+#
+# Raid1
+#      Description: Manages a Linux software RAID device on a shared storage medium.
+#  Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
+# Original Release: 25 Oct 2000
+#     RAID patches: http://people.redhat.com/mingo/raid-patches/
+# Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3
+#  Sympathetic Ear: mailto:linux-raid@vger.kernel.org
+#
+# usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
+#
+#
+# EXAMPLE config file /etc/raidtab.md0
+# This file must exist on both machines!
+#
+#  raiddev		    /dev/md0
+#  raid-level		    1
+#  nr-raid-disks	    2
+#  chunk-size		    64k
+#  persistent-superblock    1
+#  #nr-spare-disks	    0
+#    device	    /dev/sda1
+#    raid-disk     0
+#    device	    /dev/sdb1
+#    raid-disk     1
+#
+# EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf)
+#
+#  DEVICE /dev/sdb1 /dev/sdc1
+#  ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+
+OCF_RESKEY_raidconf_default=""
+OCF_RESKEY_raiddev_default=""
+OCF_RESKEY_homehost_default=""
+OCF_RESKEY_force_stop_default="true"
+OCF_RESKEY_udev_default="true"
+OCF_RESKEY_force_clones_default="false"
+
+: ${OCF_RESKEY_raidconf=${OCF_RESKEY_raidconf_default}}
+: ${OCF_RESKEY_raiddev=${OCF_RESKEY_raiddev_default}}
+: ${OCF_RESKEY_homehost=${OCF_RESKEY_homehost_default}}
+: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
+: ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}}
+: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
+
+#######################################################################
+
+usage() {
+	cat <<-EOT
+	usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
+	EOT
+}
+
+meta_data() {
+	cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="Raid1" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+This resource agent manages Linux software RAID (MD) devices on
+a shared storage medium. It uses mdadm(8) to start, stop, and
+monitor the MD devices. Raidtools are supported, but deprecated.
+See https://raid.wiki.kernel.org/index.php/Linux_Raid for more
+information.
+</longdesc>
+<shortdesc lang="en">Manages Linux software RAID (MD) devices on shared storage</shortdesc>
+
+<parameters>
+<parameter name="raidconf" unique="0" required="1">
+<longdesc lang="en">
+The RAID configuration file, e.g. /etc/mdadm.conf.
+</longdesc>
+<shortdesc lang="en">RAID config file</shortdesc>
+<content type="string" default="${OCF_RESKEY_raidconf_default}" />
+</parameter>
+
+<parameter name="raiddev" unique="0" required="1">
+<longdesc lang="en">
+One or more block devices to use, space separated. Alternatively,
+set to "auto" to manage all devices specified in raidconf.
+</longdesc>
+<shortdesc lang="en">block device</shortdesc>
+<content type="string" default="${OCF_RESKEY_raiddev_default}" />
+</parameter>
+
+<parameter name="homehost" unique="0" required="0">
+<longdesc lang="en">
+The value for the homehost directive; this is an mdadm feature to
+protect RAIDs against being activated by accident. It is recommended to
+create RAIDs managed by the cluster with "homehost" set to a special
+value, so they are not accidentally auto-assembled by nodes not
+supposed to own them.
+</longdesc>
+<shortdesc lang="en">Homehost for mdadm</shortdesc>
+<content type="string" default="${OCF_RESKEY_homehost_default}" />
+</parameter>
+
+<parameter name="force_stop" unique="0" required="0">
+<longdesc lang="en">
+If processes or kernel threads are using the array, it cannot be
+stopped. We will try to stop processes, first by sending TERM and
+then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
+The lsof(8) program is required to get the list of array users.
+Of course, the kernel threads cannot be stopped this way.
+If the processes are critical for data integrity, then set this
+parameter to false. Note that in that case the stop operation
+will fail and the node will be fenced.
+</longdesc>
+<shortdesc lang="en">force stop processes using the array</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
+</parameter>
+
+<parameter name="udev" unique="0" required="0">
+<longdesc lang="en">
+Wait until udevd creates a device in the start operation. On a
+normally loaded host this should happen quickly, but you may be
+unlucky. If you are not using udev set this to "no".
+</longdesc>
+<shortdesc lang="en">udev</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_udev_default}" />
+</parameter>
+
+<parameter name="force_clones">
+<longdesc lang="en">
+Activating the same md RAID array on multiple nodes at the same time
+will result in data corruption and thus is forbidden by default.
+
+A safe example could be an array that is only named identically across
+all nodes, but is in fact distinct.
+
+Only set this to "true" if you know what you are doing!
+</longdesc>
+<shortdesc lang="en">force ability to run as a clone</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_clones_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20s" />
+<action name="stop" timeout="20s" />
+<action name="status" depth="0" timeout="20s" interval="10s" />
+<action name="monitor" depth="0" timeout="20s" interval="10s" />
+<action name="validate-all" timeout="5s" />
+<action name="meta-data" timeout="5s" />
+</actions>
+</resource-agent>
+END
+}
+
+udev_settle() {
+	if ocf_is_true $WAIT_FOR_UDEV; then
+		udevadm settle $*
+	fi
+}
+list_conf_arrays() {
+	test -f $RAIDCONF || {
+		ocf_exit_reason "$RAIDCONF gone missing!"
+		exit $OCF_ERR_GENERIC
+	}
+	grep ^ARRAY $RAIDCONF | awk '{print $2}'
+}
+forall() {
+	local func=$1
+	local checkall=$2
+	local mddev rc=0
+	for mddev in $RAIDDEVS; do
+		$func $mddev
+		rc=$(($rc | $?))
+		[ "$checkall" = all ] && continue
+		[ $rc -ne 0 ] && return $rc
+	done
+	return $rc
+}
+are_arrays_stopped() {
+	local rc mddev
+	for mddev in $RAIDDEVS; do
+		raid1_monitor_one $mddev
+		rc=$?
+		[ $rc -ne $OCF_NOT_RUNNING ] && break
+	done
+	test $rc -eq $OCF_NOT_RUNNING
+}
+
+md_assemble() {
+	local mddev=$1
+	$MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST
+	udev_settle --exit-if-exists=$mddev
+}
+#
+# START: Start up the RAID device
+#
+raid1_start() {
+	local rc
+	raid1_monitor
+	rc=$?
+	if [ $rc -eq $OCF_SUCCESS ]; then
+		# md already online, nothing to do.
+		return $OCF_SUCCESS
+	fi
+	if [ $rc -ne $OCF_NOT_RUNNING ]; then
+		# If the array is in a broken state, this agent doesn't
+		# know how to repair that.
+		ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)"
+		return $OCF_ERR_GENERIC
+	fi
+
+	if [ $HAVE_RAIDTOOLS = "true" ]; then
+	    # Run raidstart to start up the RAID array
+	    $RAIDSTART --configfile $RAIDCONF $MDDEV
+	else
+		forall md_assemble all
+	fi
+
+	raid1_monitor
+	if [ $? -eq $OCF_SUCCESS ]; then
+	    return $OCF_SUCCESS
+	else
+	    ocf_exit_reason "Couldn't start RAID for $RAIDDEVS"
+	    return $OCF_ERR_GENERIC
+	fi
+}
+
+#
+# STOP: stop the RAID device
+#
+mark_readonly() {
+	local mddev=$1
+	local rc
+	ocf_log info "Attempting to mark array $mddev readonly"
+	$MDADM --readonly $mddev --config=$RAIDCONF
+	rc=$?
+	if [ $rc -ne 0 ]; then
+		ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)"
+	fi
+	return $rc
+}
+mknod_raid1_stop() {
+	# first create a block device file, then try to stop the
+	# array
+	local rc n tmp_block_file
+	n=`echo $1 | sed 's/[^0-9]*//'`
+	if ! ocf_is_decimal "$n"; then
+		ocf_log warn "could not get the minor device number from $1"
+		return 1
+	fi
+	tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`"
+	rm -f $tmp_block_file
+	ocf_log info "block device file $1 missing, creating one in order to stop the array"
+	mknod $tmp_block_file b 9 $n
+	$MDADM --stop $tmp_block_file --config=$RAIDCONF
+	rc=$?
+	rm -f $tmp_block_file
+	return $rc
+}
+raid1_stop_one() {
+	ocf_log info "Stopping array $1"
+	if [ -b "$1" ]; then
+		$MDADM --stop $1 --config=$RAIDCONF &&
+			return
+	else
+		# newer mdadm releases can stop arrays when given the
+		# basename; try that first
+		$MDADM --stop `basename $1` --config=$RAIDCONF &&
+			return
+		# otherwise create a block device file
+		mknod_raid1_stop $1
+	fi
+}
+get_users_pids() {
+	local mddev=$1
+	local outp l
+	ocf_log debug "running lsof to list $mddev users..."
+	outp=`lsof $mddev | tail -n +2`
+	echo "$outp" | awk '{print $2}' | sort -u
+	echo "$outp" | while read l; do
+		ocf_log warn "$l"
+	done
+}
+stop_raid_users() {
+	local pids
+	pids=`forall get_users_pids all | sort -u`
+	if [ -z "$pids" ]; then
+		ocf_log warn "lsof reported no users holding arrays"
+		return 2
+	else
+		ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
+	fi
+}
+stop_arrays() {
+	if [ $HAVE_RAIDTOOLS = "true" ]; then
+		$RAIDSTOP --configfile $RAIDCONF $MDDEV
+	else
+		forall raid1_stop_one all
+	fi
+}
+showusers() {
+	local disk
+	for disk; do
+		if have_binary lsof; then
+			ocf_log info "running lsof to list $disk users..."
+			ocf_run -warn lsof $disk
+		fi
+		if [ -d /sys/block/$disk/holders ]; then
+			ocf_log info "ls -l /sys/block/$disk/holders"
+			ocf_run -warn ls -l /sys/block/$disk/holders
+		fi
+	done
+}
+raid1_stop() {
+	local rc
+	# See if the MD device is already cleanly stopped:
+	if are_arrays_stopped; then
+		return $OCF_SUCCESS
+	fi
+
+	# Turn off raid
+	if ! stop_arrays; then
+		if ocf_is_true $FORCESTOP; then
+			if have_binary lsof; then
+				stop_raid_users
+				case $? in
+				2) false;;
+				*) stop_arrays;;
+				esac
+			else
+				ocf_log warn "install lsof(8) to list users holding the disk"
+				false
+			fi
+		else
+			false
+		fi
+	fi
+	rc=$?
+
+	if [ $rc -ne 0 ]; then
+		ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)"
+ 		showusers $RAIDDEVS
+		if [ $HAVE_RAIDTOOLS != "true" ]; then
+			forall mark_readonly all
+		fi
+		return $OCF_ERR_GENERIC
+	fi
+
+	if are_arrays_stopped; then
+		return $OCF_SUCCESS
+	fi
+
+	ocf_exit_reason "RAID $RAIDDEVS still active after stop command!"
+	return $OCF_ERR_GENERIC
+}
+
+#
+# monitor: a less noisy status
+#
+raid1_monitor_one() {
+	local mddev=$1
+	local md=
+	local rc
+	local TRY_READD=0
+	local pbsize
+	# check if the md device exists first
+	# but not if we are in the stop operation
+	# device existence is important only for the running arrays
+	if [ "$__OCF_ACTION" != "stop" ]; then
+		if [ -h "$mddev" ]; then
+			md=$(ls $mddev -l | awk -F'/' '{print $NF}')
+		elif [ -b "$mddev" ]; then
+			md=$(echo $mddev | sed 's,/dev/,,')
+		else
+			ocf_log info "$mddev is not a block device"
+			return $OCF_NOT_RUNNING
+		fi
+	fi
+	if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then
+		ocf_log info "$md not found in /proc/mdstat"
+		return $OCF_NOT_RUNNING
+	fi
+	if [ $HAVE_RAIDTOOLS != "true" ]; then
+		$MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$?
+		case $rc in
+		0)	;;
+		1)	ocf_log warn "$mddev has at least one failed device."
+			TRY_READD=1
+			;;
+		2)	ocf_exit_reason "$mddev has failed."
+			return $OCF_ERR_GENERIC
+			;;
+		4)
+			if [ "$__OCF_ACTION" = "stop" ] ; then
+				# There may be a transient invalid device after
+				# we stop MD due to uevent processing, the
+				# original device is stopped though.
+				return $OCF_NOT_RUNNING
+			else
+				ocf_exit_reason "mdadm failed on $mddev."
+				return $OCF_ERR_GENERIC
+			fi
+			;;
+		*)	ocf_exit_reason "mdadm returned an unknown result ($rc)."
+			return $OCF_ERR_GENERIC
+			;;
+		esac
+	fi
+	if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \
+		-a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then
+		ocf_log info "Attempting recovery sequence to re-add devices on $mddev:"
+		$MDADM $mddev --fail detached
+		$MDADM $mddev --remove failed
+		$MDADM $mddev --re-add missing
+		# TODO: At this stage, there's nothing to actually do
+		# here. Either this worked or it did not.
+	fi
+
+	pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null`
+	if [ -z "$pbsize" ]; then
+		ocf_log warn "both blockdev and stat could not get the block size (will use 4k)"
+		pbsize=4096 # try with 4k
+	fi
+	if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \
+	     iflag=direct >/dev/null 2>&1 ; then
+		ocf_exit_reason "$mddev: I/O error on read"
+		return $OCF_ERR_GENERIC
+	fi
+
+	return $OCF_SUCCESS
+}
+
+raid1_monitor() {
+	forall raid1_monitor_one
+}
+
+#
+# STATUS: is the raid device online or offline?
+#
+raid1_status() {
+	# See if the MD device is online
+	local rc
+	raid1_monitor
+	rc=$?
+	if [ $rc -ne $OCF_SUCCESS ]; then
+	    echo "stopped"
+	else
+	    echo "running"
+	fi
+	return $rc
+}
+
+raid1_validate_all() {
+    return $OCF_SUCCESS
+}
+
+PROC_CLEANUP_TIME=3
+
+if
+  ( [ $# -ne 1 ] )
+then
+  usage
+  exit $OCF_ERR_ARGS
+fi
+
+case "$1" in
+  meta-data)
+	meta_data
+	exit $OCF_SUCCESS
+	;;
+  usage)
+	usage
+	exit $OCF_SUCCESS
+	;;
+  *)
+	;;
+esac
+
+RAIDCONF="$OCF_RESKEY_raidconf"
+MDDEV="$OCF_RESKEY_raiddev"
+FORCESTOP="${OCF_RESKEY_force_stop}"
+WAIT_FOR_UDEV="${OCF_RESKEY_udev}"
+
+if [ -z "$RAIDCONF" ] ; then
+	ocf_exit_reason "Please set OCF_RESKEY_raidconf!"
+	exit $OCF_ERR_CONFIGURED
+fi
+
+if [ ! -r "$RAIDCONF" ] ; then
+	ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opened!"
+	exit $OCF_ERR_INSTALLED
+fi
+
+if [ -z "$MDDEV" ] ; then
+	ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!"
+	exit $OCF_ERR_CONFIGURED
+fi
+
+if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then
+	ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!"
+	ocf_log err "Please read the comment on the force_clones parameter."
+	exit $OCF_ERR_CONFIGURED
+fi
+
+if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then
+	if [ "$__OCF_ACTION" = "start" ]; then
+		ocf_log warn "either install udevadm or set udev to false"
+		ocf_log info "setting udev to false!"
+	fi
+	WAIT_FOR_UDEV=0
+fi
+
+if ! ocf_is_true $WAIT_FOR_UDEV; then
+	export MDADM_NO_UDEV=1
+fi
+
+if ocf_is_true $FORCESTOP && ! have_binary lsof; then
+	ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..."
+fi
+
+HAVE_RAIDTOOLS=false
+if have_binary $MDADM >/dev/null 2>&1 ; then
+  if [ -n "$OCF_RESKEY_homehost" ]; then
+  	MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}"
+  else
+  	MDADM_HOMEHOST=""
+  fi
+else
+  check_binary $RAIDSTART
+  HAVE_RAIDTOOLS=true
+fi
+
+if [ $HAVE_RAIDTOOLS = true ]; then
+	if [ "$MDDEV" = "auto" ]; then
+		ocf_exit_reason "autoconf supported only with mdadm!"
+		exit $OCF_ERR_INSTALLED
+	elif [ `echo $MDDEV|wc -w` -gt 1 ]; then
+		ocf_exit_reason "multiple devices supported only with mdadm!"
+		exit $OCF_ERR_INSTALLED
+	fi
+fi
+
+if [ "$MDDEV" = "auto" ]; then
+	RAIDDEVS=`list_conf_arrays`
+else
+	RAIDDEVS="$MDDEV"
+fi
+
+# At this stage,
+# [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM,
+# otherwise we have raidtools (raidstart and raidstop)
+
+# Look for how we are called
+case "$1" in
+  start)
+	raid1_start
+	;;
+  stop)
+	raid1_stop
+	;;
+  status)
+	raid1_status
+	;;
+  monitor)
+	raid1_monitor
+	;;
+  validate-all)
+	raid1_validate_all
+	;;
+  *)
+	usage
+	exit $OCF_ERR_UNIMPLEMENTED
+	;;
+esac
+
+exit $?