summaryrefslogtreecommitdiffstats
path: root/heartbeat/mdraid
diff options
context:
space:
mode:
Diffstat (limited to 'heartbeat/mdraid')
-rwxr-xr-xheartbeat/mdraid584
1 files changed, 584 insertions, 0 deletions
diff --git a/heartbeat/mdraid b/heartbeat/mdraid
new file mode 100755
index 0000000..1e6a5d0
--- /dev/null
+++ b/heartbeat/mdraid
@@ -0,0 +1,584 @@
+#!/bin/sh
+#
+# License: GNU General Public License (GPL)
+# Support: users@clusterlabs.org
+#
+# mdraid (inspired by the Raid1 upstream resource agent)
+#
+# Description: Manages a Linux software RAID device on a (shared) storage medium.
+# Author: Heinz Mauelshagen (heinzm@redhat.com)
+# Release: Mar 2020
+#
+# usage: $0 {start|stop|monitor|validate-all|usage|meta-data}
+#
+# EXAMPLE config file /etc/mdadm.conf (for more info: mdadm.conf(5))
+#
+# AUTO -all
+# ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799
+#
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+
+OCF_RESKEY_mdadm_conf_default=""
+OCF_RESKEY_md_dev_default=""
+OCF_RESKEY_force_stop_default="false"
+OCF_RESKEY_wait_for_udev_default="true"
+OCF_RESKEY_force_clones_default="false"
+
+: ${OCF_RESKEY_mdadm_conf=${OCF_RESKEY_mdadm_conf_default}}
+: ${OCF_RESKEY_md_dev=${OCF_RESKEY_md_dev_default}}
+: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
+: ${OCF_RESKEY_wait_for_udev=${OCF_RESKEY_wait_for_udev_default}}
+: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
+
+#######################################################################
+
+usage() {
+ cat <<-EOT
+ usage: $0 {start|stop|monitor|validate-all|usage|meta-data}
+ EOT
+}
+
+#
+# Action: provide meta-data (parameter specifications and descriptive text)
+#
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="mdraid" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+This resource agent manages Linux software RAID (MD) devices on
+a shared storage medium ensuring that non-clustered MD arrays
+are prohibited from starting cloned (which would cause data corruption
+(e.g., on raid6 arrays) unless forced (see force_clones parameter).
+Clustered MD RAID layouts (see below) will be discovered and allowed
+cloning by default; no need to set force_clones.
+
+It uses mdadm(8) to start, stop, and monitor the MD devices.
+
+Supported clustered (i.e., clonable active-active) arrays are linear,
+raid0, and clustered raid1/raid10 (i.e. mdadm(8) created with
+--bitmap=clustered).
+
+Option: OCF_CHECK_LEVEL
+
+When OCF_CHECK_LEVEL is set to any number greater than 0, the standard
+monitor operation (including probe) will check the array and attempt
+recovery sequence to re-add devices if any failed device exists. By
+default, OCF_CHECK_LEVEL is unset, and this is disabled.
+
+</longdesc>
+<shortdesc lang="en">Manages Linux software RAID (MD) devices on shared
+storage</shortdesc>
+
+<parameters>
+<parameter name="mdadm_conf" unique="0" required="1">
+<longdesc lang="en">
+The MD RAID configuration file (e.g., /etc/mdadm.conf).
+</longdesc>
+<shortdesc lang="en">MD config file</shortdesc>
+<content type="string" default="${OCF_RESKEY_mdadm_conf_default}" />
+</parameter>
+
+<parameter name="md_dev" unique="0" required="1">
+<longdesc lang="en">
+MD array block device to use (e.g., /dev/md0 or /dev/md/3).
+With shared access to the array's storage, this should
+preferably be a clustered raid1 or raid10 array created
+with --bitmap=clustered, assuming its resource will
+be cloned (i.e., active-active access).
+
+Be sure to disable auto-assembly for the resource-managed arrays!
+</longdesc>
+<shortdesc lang="en">MD block device</shortdesc>
+<content type="string" default="${OCF_RESKEY_md_dev_default}" />
+</parameter>
+
+<parameter name="force_stop" unique="0" required="0">
+<longdesc lang="en">
+If processes or kernel threads are using the array, it cannot be
+stopped. We will try to stop processes, first by sending TERM and
+then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
+The lsof(8) program is required to get the list of array users.
+Of course, the kernel threads cannot be stopped this way.
+If the processes are critical for data integrity, then set this
+parameter to false. Note that in that case the stop operation
+will fail and the node will be fenced.
+</longdesc>
+<shortdesc lang="en">force stop processes using the array</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
+</parameter>
+
+<parameter name="wait_for_udev" unique="0" required="0">
+<longdesc lang="en">
+Wait until udevd creates a device in the start operation. On a
+normally loaded host this should happen quickly, but you may be
+unlucky. If you are not using udev set this to "no".
+</longdesc>
+<shortdesc lang="en">wait_for_udev</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_wait_for_udev_default}" />
+</parameter>
+
+<parameter name="force_clones" unique="0" required="0">
+<longdesc lang="en">
+Activating the same, non-clustered MD RAID array (i.e. single-host
+raid1/4/5/6/10) on multiple nodes at the same time will result in
+data corruption and thus is forbidden by default.
+
+A safe example could be an (exotic) array that is only named identically
+across all nodes, but is in fact based on distinct (non-shared) storage.
+
+Only set this to "true" if you know what you are doing!
+</longdesc>
+<shortdesc lang="en">force ability to run as a clone</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_clones_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20s" />
+<action name="stop" timeout="20s" />
+<action name="monitor" depth="0" timeout="20s" interval="10s" />
+<action name="validate-all" timeout="5s" />
+<action name="meta-data" timeout="5s" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# ocf-shellfunc ocf_is_clone() fails with meta attribute clone-max < 2.
+# Checking for defined meta_clone_max reskey is sufficient until fixed.
+resource_is_cloned() {
+ [ -z "$OCF_RESKEY_CRM_meta_clone_max" ] && return 1 || return 0;
+}
+
+raid_validate_all() {
+ if [ -z "$mdadm_conf" ] ; then
+ ocf_exit_reason "Please set OCF_RESKEY_mdadm_conf"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if [ ! -r "$mdadm_conf" ] ; then
+ ocf_exit_reason "Configuration file [$mdadm_conf] does not exist, or cannot be opened"
+ return $OCF_ERR_ARGS
+ fi
+ if [ -z "$md_dev" ] ; then
+ ocf_exit_reason "Please set OCF_RESKEY_md_dev to the MD RAID array block device you want to control"
+ return $OCF_ERR_CONFIGURED
+ fi
+ case "$md_dev" in
+ /dev/*) ;;
+ *) ocf_exit_reason "Bogus MD RAID array block device name (\"$md_dev\")"
+ return $OCF_ERR_ARGS;;
+ esac
+ if ocf_is_true $wait_for_udev && ! have_binary udevadm && [ "$__OCF_ACTION" = "start" ]; then
+ ocf_exit_reason "either install udevadm or set udev to false"
+ return $OCF_ERR_INSTALLED
+ fi
+ if ocf_is_true $force_stop && ! have_binary lsof; then
+ ocf_exit_reason "Please install lsof(8) or set force_stop to false."
+ return $OCF_ERR_INSTALLED
+ fi
+ if ! have_binary $MDADM; then
+ ocf_exit_reason "Please install mdadm(8)!"
+ return $OCF_ERR_INSTALLED
+ fi
+ if ! have_binary blkid; then
+ ocf_exit_reason "Please install blkid(8). We need it to list MD array UUIDs!"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [ `echo $md_dev|wc -w` -gt 1 ]; then
+ ocf_exit_reason "Only one MD array supported"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# Remove ':' or '-' from uuid string to be able to compare between MD and blkid format.
+uuid_flat() {
+ echo $1|sed 's/[-:]//g'
+}
+
+# Global variable for devices by MD uuid.
+devs=""
+
+# Get array uuid from mdadm_conf based on $md_dev.
+get_array_uuid_by_mddev() {
+ local array_uuid
+
+ array_uuid="`grep $md_dev $mdadm_conf`"
+ if [ -z "$array_uuid" ]
+ then
+ ocf_exit_reason "Entry for $MMDEV does not exist in $mdadm_conf!"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ array_uuid=$(echo $array_uuid | sed 's/^.*UUID=//;s/ .*$//')
+ if [ -z "$array_uuid" ]
+ then
+ ocf_exit_reason "Bogus entry for $MMDEV in $mdadm_conf!"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ echo `uuid_flat $array_uuid`
+
+ return $OCF_SUCCESS
+}
+
+# Use blkid to get to the subset of raid members by array uuid.
+list_devices_for_mddev() {
+ local array_uuid blkid_array_uuid dev line rc
+
+ array_uuid=`get_array_uuid_by_mddev`
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Failed to get UUID of $md_dev from $mdadm_conf"
+ return $rc
+ fi
+
+ blkid | grep linux_raid_member 2>/dev/null | while read line
+ do
+ dev=`echo $line | sed 's/: .*//'`
+ blkid_array_uuid=$(echo $line | sed 's/^.* UUID="//;s/" .*$//')
+ [ "`uuid_flat $blkid_array_uuid`" = "$array_uuid" ] && echo $dev
+ done
+}
+
+# Check for linear or raid0 array; presumes defined global devs() array.
+array_is_linear_or_raid0() {
+ local c=0 d
+
+ for d in $devs
+ do
+ $MDADM -E $d 2>&1 | $EGREP -i "raid level : (raid0|linear)" >/dev/null 2>&1
+ [ $? -eq 0 ] && c=$((c+1))
+ done
+
+ [ $c -eq `echo $devs|wc -w` ] && return 0 || return 1
+}
+
+# Return true for clustered RAID relying on all component devices reporting clustered type;
+# presumes defined global devs variable with the component devices of the array.
+array_is_clustered_raid() {
+ local c=0 d dev_count=`echo $devs|wc -w` s
+ # Check based on specific "intent bitmap : clustered" output once
+ # available in mdadm output or fall back to "Cluster Name" defined
+ # presuming bitmap is clustered if so.
+ local strs="clustered cluster.name"
+
+ for d in $devs
+ do
+ for s in $strs
+ do
+ $MDADM -E $d 2>&1 | grep -i "$s" >/dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ c=$((c+1))
+ break
+ fi
+ done
+ done
+
+ [ $c -eq $dev_count ] && return 0 || return 1
+}
+
+# Check for all clustered types (linear, raid0, and clustered raid1/raid10).
+is_clustered_raid() {
+ array_is_clustered_raid || array_is_linear_or_raid0
+}
+
+md_assemble() {
+ local rc
+
+ $MDADM --assemble $md_dev --config="$mdadm_conf"
+ rc=$?
+ [ $rc -eq 0 ] && ocf_is_true $wait_for_udev && udevadm settle --exit-if-exists=$md_dev
+
+ return $rc
+}
+
+# Try setting an MD array to readonly.
+mark_readonly() {
+ local rc
+
+ $MDADM --readonly $md_dev --config="$mdadm_conf"
+ rc=$?
+ [ $rc -ne 0 ] && ocf_exit_reason "Failed to set $md_dev readonly (rc=$rc)"
+
+ return $rc
+}
+
+# Try stopping an MD array in case its block device is nonexistent for some reason.
+mknod_raid_stop() {
+ local rc n tmp_block_file
+
+ # first create a block device file, then try to stop the array
+ n=`echo $1 | sed 's/[^0-9]*//'`
+ if ! ocf_is_decimal "$n"; then
+ ocf_log warn "could not get the minor device number from $1"
+ return 1
+ fi
+ tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`"
+ rm -f $tmp_block_file
+ ocf_log info "block device file $1 missing, creating one in order to stop the array"
+ mknod $tmp_block_file b 9 $n
+ $MDADM --stop $tmp_block_file --config="$mdadm_conf"
+ rc=$?
+ rm -f $tmp_block_file
+ return $rc
+}
+
+# Stop an MD array.
+raid_stop_one() {
+ if [ -b "$1" ]; then
+ $MDADM --stop $1 --config="$mdadm_conf" && return
+ else
+ # newer mdadm releases can stop arrays when given the
+ # basename; try that first
+ $MDADM --stop `basename $1` --config="$mdadm_conf" && return
+ # otherwise create a block device file
+ mknod_raid_stop $1
+ fi
+}
+
+# Functions show/stop any resource holding processes.
+get_users_pids() {
+ ocf_log debug "running lsof to list $md_dev users..."
+ ocf_run -warn 'lsof $md_dev | tail -n +2 | $AWK "{print $2}" | sort -u'
+}
+
+stop_raid_users() {
+ local pids=`get_users_pids $md_dev`
+
+ if [ -z "$pids" ]; then
+ ocf_log warn "lsof reported no users holding arrays"
+ return 2
+ else
+ ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
+ fi
+}
+
+showusers() {
+ local disk=`basename $md_dev`
+
+ ocf_log info "running lsof to list $disk users..."
+ ocf_run -warn lsof $md_dev
+
+ if [ -d /sys/block/$disk/holders ]; then
+ ocf_log info "ls -l /sys/block/$disk/holders"
+ ocf_run -warn ls -l /sys/block/$disk/holders
+ fi
+}
+
+#######################################################################
+
+#
+# Action: START up the MD RAID array.
+#
+raid_start() {
+ local rc
+
+ if resource_is_cloned && ! is_clustered_raid; then
+ if ocf_is_true "$OCF_RESKEY_force_clones"; then
+ ocf_log warn "Forced cloned starting non-clustered $md_dev which may lead to data corruption!"
+ else
+ ocf_exit_reason "Rejecting start: non-clustered MD RAID array $md_dev is NOT safe to run cloned"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ raid_monitor
+ rc=$?
+ # md array already online, nothing to do.
+ [ $rc -eq $OCF_SUCCESS ] && return $rc
+
+ if [ $rc -ne $OCF_NOT_RUNNING ]
+ then
+ # If the array is in a broken state, this agent doesn't know how to repair that.
+ ocf_exit_reason "MD RAID array $md_dev in a broken state; cannot start (rc=$rc)"
+ return $OCF_ERR_GENERIC
+ fi
+
+ md_assemble
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_exit_reason "Failed to assemble MD RAID array $md_dev (rc=$rc, is $mdadm_conf up-to-date?)"
+ return $OCF_ERR_GENERIC
+ fi
+
+ raid_monitor
+ [ $? -eq $OCF_SUCCESS ] && return $OCF_SUCCESS
+
+ ocf_exit_reason "Couldn't start MD RAID array $md_dev (rc=$rc)"
+
+ return $OCF_ERR_GENERIC
+}
+
+#
+# Action: STOP the MD RAID array
+#
+raid_stop() {
+ local rc
+
+ # See if the MD device is already cleanly stopped:
+ raid_monitor
+ [ $? -eq $OCF_NOT_RUNNING ] && return $OCF_SUCCESS
+
+ # Turn off raid
+ if ! raid_stop_one $md_dev; then
+ if ocf_is_true $force_stop; then
+ stop_raid_users
+ case $? in
+ 2) false;;
+ *) raid_stop_one $md_dev;;
+ esac
+ else
+ false
+ fi
+ fi
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ ocf_log warn "Couldn't stop MD RAID array $md_dev (rc=$rc)"
+ showusers $md_dev
+ mark_readonly $md_dev
+ return $OCF_ERR_GENERIC
+ fi
+
+ raid_monitor
+ rc=$?
+ [ $rc -eq $OCF_NOT_RUNNING ] && return $OCF_SUCCESS
+
+ ocf_exit_reason "MD RAID array $md_dev still active after stop command (rc=$rc)"
+ return $OCF_ERR_GENERIC
+}
+
+#
+# Action: monitor the MD RAID array.
+#
+raid_monitor() {
+ local TRY_READD=0 md rc pbsize
+
+ # check if the md device exists first
+ # but not if we are in the stop operation
+ # device existence is important only for the running arrays
+ if [ "$__OCF_ACTION" != "stop" ]; then
+ if [ -h "$md_dev" ]; then
+ md=$(ls $md_dev -l | $AWK -F'/' '{print $NF}')
+ elif [ -b "$md_dev" ]; then
+ md=${md_dev#/dev/}
+ else
+ ocf_log info "$md_dev is not a block device"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+
+ if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then
+ ocf_log info "$md not found in /proc/mdstat"
+ return $OCF_NOT_RUNNING
+ fi
+
+ $MDADM --detail --test $md_dev >/dev/null 2>&1
+ rc=$?
+ case $rc in
+ 0) ;;
+ 1) ocf_log warn "$md_dev has at least one failed device."
+ TRY_READD=1;;
+ 2) ocf_exit_reason "$md_dev has failed."
+ return $OCF_ERR_GENERIC;;
+ 4)
+ if [ "$__OCF_ACTION" = "stop" ] ; then
+ # There may be a transient invalid device after
+ # we stop MD due to uevent processing, the
+ # original device is stopped though.
+ return $OCF_NOT_RUNNING
+ else
+ ocf_exit_reason "mdadm failed on $md_dev."
+ return $OCF_ERR_GENERIC
+ fi;;
+ *) ocf_exit_reason "mdadm returned an unknown result ($rc)."
+ return $OCF_ERR_GENERIC;;
+ esac
+
+ if ! array_is_linear_or_raid0; then
+ if [ "$__OCF_ACTION" = "monitor" ] && [ "$OCF_RESKEY_CRM_meta_interval" != 0 \
+ ] && [ $TRY_READD -eq 1 ] && [ $OCF_CHECK_LEVEL -gt 0 ]; then
+ ocf_log info "Attempting recovery sequence to re-add devices on MD RAID array $md_dev:"
+ $MDADM $md_dev --fail detached
+ $MDADM $md_dev --remove failed
+ $MDADM $md_dev --re-add missing
+ # TODO: At this stage, there's nothing to actually do
+ # here. Either this worked or it did not.
+ fi
+ fi
+
+ pbsize=`(blockdev --getpbsz $md_dev || stat -c "%o" $md_dev) 2>/dev/null`
+ if [ -z "$pbsize" ]; then
+ ocf_log warn "both blockdev and stat were unable to get the block size (will use 4k)"
+ pbsize=4096 # try with 4k
+ fi
+ if ! dd if=$md_dev count=1 bs=$pbsize of=/dev/null iflag=direct >/dev/null 2>&1; then
+ ocf_exit_reason "$md_dev: I/O error on read"
+ return $OCF_ERR_GENERIC
+ fi
+
+ [ "$__OCF_ACTION" = "monitor" ] && ocf_log info "monitoring...($md_dev)"
+
+ return $OCF_SUCCESS
+}
+
+if [ $# -ne 1 ]; then
+ usage
+ exit $OCF_ERR_ARGS
+fi
+
+# Process actions which are independant from validation
+case "$1" in
+meta-data) meta_data
+ exit $OCF_SUCCESS;;
+usage) usage
+ exit $OCF_SUCCESS;;
+*) ;;
+esac
+
+# Define global variables used in ^ functions.
+mdadm_conf="${OCF_RESKEY_mdadm_conf}"
+md_dev="${OCF_RESKEY_md_dev}"
+force_stop="${OCF_RESKEY_force_stop}"
+wait_for_udev="${OCF_RESKEY_wait_for_udev}"
+
+# Validate all parameters and check for mandatory binaries present
+raid_validate_all
+rc=$?
+[ $rc -ne $OCF_SUCCESS ] && exit $rc
+# raid_validate_all already processed and result checked.
+[ "$1" = "validate-all" ] && return ${OCF_SUCCESS}
+
+# Required by start|stop|monitor processed below
+devs="`list_devices_for_mddev`"
+if [ `echo $devs|wc -l` -eq 0 ]; then
+ ocf_exit_reason "No component device(s) found for MD RAID array $md_dev"
+ exit $OCF_ERR_GENERIC
+fi
+
+case "$1" in
+start) raid_start;;
+stop) raid_stop;;
+monitor) raid_monitor;;
+*) usage
+ exit $OCF_ERR_UNIMPLEMENTED;;
+esac
+rc=$?
+
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc