From 7de03e4e519705301265c0415b3c0af85263a7ac Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 09:52:36 +0200 Subject: Adding upstream version 1:4.13.0. Signed-off-by: Daniel Baumann --- heartbeat/Raid1 | 586 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 586 insertions(+) create mode 100755 heartbeat/Raid1 (limited to 'heartbeat/Raid1') diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 new file mode 100755 index 0000000..924d94c --- /dev/null +++ b/heartbeat/Raid1 @@ -0,0 +1,586 @@ +#!/bin/sh +# +# +# License: GNU General Public License (GPL) +# Support: users@clusterlabs.org +# +# Raid1 +# Description: Manages a Linux software RAID device on a shared storage medium. +# Original Author: Eric Z. Ayers (eric.ayers@compgen.com) +# Original Release: 25 Oct 2000 +# RAID patches: http://people.redhat.com/mingo/raid-patches/ +# Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 +# Sympathetic Ear: mailto:linux-raid@vger.kernel.org +# +# usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} +# +# +# EXAMPLE config file /etc/raidtab.md0 +# This file must exist on both machines! +# +# raiddev /dev/md0 +# raid-level 1 +# nr-raid-disks 2 +# chunk-size 64k +# persistent-superblock 1 +# #nr-spare-disks 0 +# device /dev/sda1 +# raid-disk 0 +# device /dev/sdb1 +# raid-disk 1 +# +# EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) +# +# DEVICE /dev/sdb1 /dev/sdc1 +# ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_raidconf_default="" +OCF_RESKEY_raiddev_default="" +OCF_RESKEY_homehost_default="" +OCF_RESKEY_force_stop_default="true" +OCF_RESKEY_udev_default="true" +OCF_RESKEY_force_clones_default="false" + +: ${OCF_RESKEY_raidconf=${OCF_RESKEY_raidconf_default}} +: ${OCF_RESKEY_raiddev=${OCF_RESKEY_raiddev_default}} +: ${OCF_RESKEY_homehost=${OCF_RESKEY_homehost_default}} +: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} +: ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}} +: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} + +####################################################################### + +usage() { + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} + EOT +} + +meta_data() { + cat < + + +1.0 + + +This resource agent manages Linux software RAID (MD) devices on +a shared storage medium. It uses mdadm(8) to start, stop, and +monitor the MD devices. Raidtools are supported, but deprecated. +See https://raid.wiki.kernel.org/index.php/Linux_Raid for more +information. + +Manages Linux software RAID (MD) devices on shared storage + + + + +The RAID configuration file, e.g. /etc/mdadm.conf. + +RAID config file + + + + + +One or more block devices to use, space separated. Alternatively, +set to "auto" to manage all devices specified in raidconf. + +block device + + + + + +The value for the homehost directive; this is an mdadm feature to +protect RAIDs against being activated by accident. It is recommended to +create RAIDs managed by the cluster with "homehost" set to a special +value, so they are not accidentally auto-assembled by nodes not +supposed to own them. + +Homehost for mdadm + + + + + +If processes or kernel threads are using the array, it cannot be +stopped. We will try to stop processes, first by sending TERM and +then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. +The lsof(8) program is required to get the list of array users. +Of course, the kernel threads cannot be stopped this way. +If the processes are critical for data integrity, then set this +parameter to false. Note that in that case the stop operation +will fail and the node will be fenced. + +force stop processes using the array + + + + + +Wait until udevd creates a device in the start operation. On a +normally loaded host this should happen quickly, but you may be +unlucky. If you are not using udev set this to "no". + +udev + + + + + +Activating the same md RAID array on multiple nodes at the same time +will result in data corruption and thus is forbidden by default. + +A safe example could be an array that is only named identically across +all nodes, but is in fact distinct. + +Only set this to "true" if you know what you are doing! + +force ability to run as a clone + + + + + + + + + + + + + + +END +} + +udev_settle() { + if ocf_is_true $WAIT_FOR_UDEV; then + udevadm settle $* + fi +} +list_conf_arrays() { + test -f $RAIDCONF || { + ocf_exit_reason "$RAIDCONF gone missing!" + exit $OCF_ERR_GENERIC + } + grep ^ARRAY $RAIDCONF | awk '{print $2}' +} +forall() { + local func=$1 + local checkall=$2 + local mddev rc=0 + for mddev in $RAIDDEVS; do + $func $mddev + rc=$(($rc | $?)) + [ "$checkall" = all ] && continue + [ $rc -ne 0 ] && return $rc + done + return $rc +} +are_arrays_stopped() { + local rc mddev + for mddev in $RAIDDEVS; do + raid1_monitor_one $mddev + rc=$? + [ $rc -ne $OCF_NOT_RUNNING ] && break + done + test $rc -eq $OCF_NOT_RUNNING +} + +md_assemble() { + local mddev=$1 + $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST + udev_settle --exit-if-exists=$mddev +} +# +# START: Start up the RAID device +# +raid1_start() { + local rc + raid1_monitor + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + # md already online, nothing to do. + return $OCF_SUCCESS + fi + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # If the array is in a broken state, this agent doesn't + # know how to repair that. + ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)" + return $OCF_ERR_GENERIC + fi + + if [ $HAVE_RAIDTOOLS = "true" ]; then + # Run raidstart to start up the RAID array + $RAIDSTART --configfile $RAIDCONF $MDDEV + else + forall md_assemble all + fi + + raid1_monitor + if [ $? -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + else + ocf_exit_reason "Couldn't start RAID for $RAIDDEVS" + return $OCF_ERR_GENERIC + fi +} + +# +# STOP: stop the RAID device +# +mark_readonly() { + local mddev=$1 + local rc + ocf_log info "Attempting to mark array $mddev readonly" + $MDADM --readonly $mddev --config=$RAIDCONF + rc=$? + if [ $rc -ne 0 ]; then + ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)" + fi + return $rc +} +mknod_raid1_stop() { + # first create a block device file, then try to stop the + # array + local rc n tmp_block_file + n=`echo $1 | sed 's/[^0-9]*//'` + if ! ocf_is_decimal "$n"; then + ocf_log warn "could not get the minor device number from $1" + return 1 + fi + tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`" + rm -f $tmp_block_file + ocf_log info "block device file $1 missing, creating one in order to stop the array" + mknod $tmp_block_file b 9 $n + $MDADM --stop $tmp_block_file --config=$RAIDCONF + rc=$? + rm -f $tmp_block_file + return $rc +} +raid1_stop_one() { + ocf_log info "Stopping array $1" + if [ -b "$1" ]; then + $MDADM --stop $1 --config=$RAIDCONF && + return + else + # newer mdadm releases can stop arrays when given the + # basename; try that first + $MDADM --stop `basename $1` --config=$RAIDCONF && + return + # otherwise create a block device file + mknod_raid1_stop $1 + fi +} +get_users_pids() { + local mddev=$1 + local outp l + ocf_log debug "running lsof to list $mddev users..." + outp=`lsof $mddev | tail -n +2` + echo "$outp" | awk '{print $2}' | sort -u + echo "$outp" | while read l; do + ocf_log warn "$l" + done +} +stop_raid_users() { + local pids + pids=`forall get_users_pids all | sort -u` + if [ -z "$pids" ]; then + ocf_log warn "lsof reported no users holding arrays" + return 2 + else + ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids + fi +} +stop_arrays() { + if [ $HAVE_RAIDTOOLS = "true" ]; then + $RAIDSTOP --configfile $RAIDCONF $MDDEV + else + forall raid1_stop_one all + fi +} +showusers() { + local disk + for disk; do + if have_binary lsof; then + ocf_log info "running lsof to list $disk users..." + ocf_run -warn lsof $disk + fi + if [ -d /sys/block/$disk/holders ]; then + ocf_log info "ls -l /sys/block/$disk/holders" + ocf_run -warn ls -l /sys/block/$disk/holders + fi + done +} +raid1_stop() { + local rc + # See if the MD device is already cleanly stopped: + if are_arrays_stopped; then + return $OCF_SUCCESS + fi + + # Turn off raid + if ! stop_arrays; then + if ocf_is_true $FORCESTOP; then + if have_binary lsof; then + stop_raid_users + case $? in + 2) false;; + *) stop_arrays;; + esac + else + ocf_log warn "install lsof(8) to list users holding the disk" + false + fi + else + false + fi + fi + rc=$? + + if [ $rc -ne 0 ]; then + ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" + showusers $RAIDDEVS + if [ $HAVE_RAIDTOOLS != "true" ]; then + forall mark_readonly all + fi + return $OCF_ERR_GENERIC + fi + + if are_arrays_stopped; then + return $OCF_SUCCESS + fi + + ocf_exit_reason "RAID $RAIDDEVS still active after stop command!" + return $OCF_ERR_GENERIC +} + +# +# monitor: a less noisy status +# +raid1_monitor_one() { + local mddev=$1 + local md= + local rc + local TRY_READD=0 + local pbsize + # check if the md device exists first + # but not if we are in the stop operation + # device existence is important only for the running arrays + if [ "$__OCF_ACTION" != "stop" ]; then + if [ -h "$mddev" ]; then + md=$(ls $mddev -l | awk -F'/' '{print $NF}') + elif [ -b "$mddev" ]; then + md=$(echo $mddev | sed 's,/dev/,,') + else + ocf_log info "$mddev is not a block device" + return $OCF_NOT_RUNNING + fi + fi + if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then + ocf_log info "$md not found in /proc/mdstat" + return $OCF_NOT_RUNNING + fi + if [ $HAVE_RAIDTOOLS != "true" ]; then + $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? + case $rc in + 0) ;; + 1) ocf_log warn "$mddev has at least one failed device." + TRY_READD=1 + ;; + 2) ocf_exit_reason "$mddev has failed." + return $OCF_ERR_GENERIC + ;; + 4) + if [ "$__OCF_ACTION" = "stop" ] ; then + # There may be a transient invalid device after + # we stop MD due to uevent processing, the + # original device is stopped though. + return $OCF_NOT_RUNNING + else + ocf_exit_reason "mdadm failed on $mddev." + return $OCF_ERR_GENERIC + fi + ;; + *) ocf_exit_reason "mdadm returned an unknown result ($rc)." + return $OCF_ERR_GENERIC + ;; + esac + fi + if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ + -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then + ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" + $MDADM $mddev --fail detached + $MDADM $mddev --remove failed + $MDADM $mddev --re-add missing + # TODO: At this stage, there's nothing to actually do + # here. Either this worked or it did not. + fi + + pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null` + if [ -z "$pbsize" ]; then + ocf_log warn "both blockdev and stat could not get the block size (will use 4k)" + pbsize=4096 # try with 4k + fi + if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \ + iflag=direct >/dev/null 2>&1 ; then + ocf_exit_reason "$mddev: I/O error on read" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +raid1_monitor() { + forall raid1_monitor_one +} + +# +# STATUS: is the raid device online or offline? +# +raid1_status() { + # See if the MD device is online + local rc + raid1_monitor + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + echo "stopped" + else + echo "running" + fi + return $rc +} + +raid1_validate_all() { + return $OCF_SUCCESS +} + +PROC_CLEANUP_TIME=3 + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; + *) + ;; +esac + +RAIDCONF="$OCF_RESKEY_raidconf" +MDDEV="$OCF_RESKEY_raiddev" +FORCESTOP="${OCF_RESKEY_force_stop}" +WAIT_FOR_UDEV="${OCF_RESKEY_udev}" + +if [ -z "$RAIDCONF" ] ; then + ocf_exit_reason "Please set OCF_RESKEY_raidconf!" + exit $OCF_ERR_CONFIGURED +fi + +if [ ! -r "$RAIDCONF" ] ; then + ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opened!" + exit $OCF_ERR_INSTALLED +fi + +if [ -z "$MDDEV" ] ; then + ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" + exit $OCF_ERR_CONFIGURED +fi + +if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then + ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!" + ocf_log err "Please read the comment on the force_clones parameter." + exit $OCF_ERR_CONFIGURED +fi + +if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then + if [ "$__OCF_ACTION" = "start" ]; then + ocf_log warn "either install udevadm or set udev to false" + ocf_log info "setting udev to false!" + fi + WAIT_FOR_UDEV=0 +fi + +if ! ocf_is_true $WAIT_FOR_UDEV; then + export MDADM_NO_UDEV=1 +fi + +if ocf_is_true $FORCESTOP && ! have_binary lsof; then + ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." +fi + +HAVE_RAIDTOOLS=false +if have_binary $MDADM >/dev/null 2>&1 ; then + if [ -n "$OCF_RESKEY_homehost" ]; then + MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" + else + MDADM_HOMEHOST="" + fi +else + check_binary $RAIDSTART + HAVE_RAIDTOOLS=true +fi + +if [ $HAVE_RAIDTOOLS = true ]; then + if [ "$MDDEV" = "auto" ]; then + ocf_exit_reason "autoconf supported only with mdadm!" + exit $OCF_ERR_INSTALLED + elif [ `echo $MDDEV|wc -w` -gt 1 ]; then + ocf_exit_reason "multiple devices supported only with mdadm!" + exit $OCF_ERR_INSTALLED + fi +fi + +if [ "$MDDEV" = "auto" ]; then + RAIDDEVS=`list_conf_arrays` +else + RAIDDEVS="$MDDEV" +fi + +# At this stage, +# [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, +# otherwise we have raidtools (raidstart and raidstop) + +# Look for how we are called +case "$1" in + start) + raid1_start + ;; + stop) + raid1_stop + ;; + status) + raid1_status + ;; + monitor) + raid1_monitor + ;; + validate-all) + raid1_validate_all + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? -- cgit v1.2.3