diff options
Diffstat (limited to '')
-rw-r--r-- | agents/ocf/controld.in | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/agents/ocf/controld.in b/agents/ocf/controld.in new file mode 100644 index 0000000..ace4933 --- /dev/null +++ b/agents/ocf/controld.in @@ -0,0 +1,303 @@ +#!/bin/sh +# +# ocf:pacemaker:controld resource agent +# +# Copyright 2008-2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# (GPLv2) WITHOUT ANY WARRANTY. + +# +# Manages the DLM controld process +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} +. "${OCF_FUNCTIONS}" +: ${__OCF_ACTION:="$1"} + +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_globally_unique:="false"} +: ${OCF_RESKEY_allow_stonith_disabled:="false"} +: ${OCF_RESKEY_sctp:="false"} +: ${OCF_RESOURCE_INSTANCE:=""} + +case "$OCF_RESOURCE_INSTANCE" in + *[gG][fF][sS]*) + : ${OCF_RESKEY_args=-g 0} + : ${OCF_RESKEY_daemon:=gfs_controld} + ;; + *[dD][lL][mM]*) + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon:=dlm_controld} + ;; + *) + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon:=dlm_controld} +esac + + +####################################################################### + +if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then + ocf_log info "Using heartbeat controld agent" + "$OCF_ROOT/resource.d/heartbeat/controld" "$1" + exit $? +fi + +meta_data() { + cat <<END +<?xml version="1.0"?> +<resource-agent name="controld" version="@VERSION@"> +<version>1.1</version> + +<longdesc lang="en"> +This Resource Agent can control the dlm_controld services needed by cluster-aware file systems. +It assumes that dlm_controld is in your default PATH. +In most cases, it should be run as an anonymous clone. +</longdesc> +<shortdesc lang="en">DLM Agent for cluster file systems</shortdesc> + +<parameters> + +<parameter name="args"> +<longdesc lang="en"> +Any additional options to start the dlm_controld service with +</longdesc> +<shortdesc lang="en">DLM Options</shortdesc> +<content type="string" default="-s 0" /> +</parameter> + +<parameter name="daemon" unique-group="daemon"> +<longdesc lang="en"> +The daemon to start - supports gfs_controld and dlm_controld +</longdesc> +<shortdesc lang="en">The daemon to start</shortdesc> +<content type="string" default="dlm_controld" /> +</parameter> + +<parameter name="allow_stonith_disabled"> +<longdesc lang="en"> +Allow DLM start-up even if STONITH/fencing is disabled in the cluster. + +Setting this option to true will cause cluster malfunction and hangs on +fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and +cLVM2). + +This option is advanced use only. +</longdesc> +<shortdesc lang="en">Allow start-up even without STONITH/fencing</shortdesc> +<content type="string" default="false" /> +</parameter> + +</parameters> + +<actions> +<action name="start" timeout="90s" /> +<action name="stop" timeout="100s" /> +<action name="monitor" timeout="20s" interval="10s" depth="0" start-delay="0s" /> +<action name="meta-data" timeout="5s" /> +<action name="validate-all" timeout="30s" depth="0" /> +</actions> +</resource-agent> +END +} + +####################################################################### + +CONFIGFS_DIR="/sys/kernel/config" +DLM_CONFIGFS_DIR="${CONFIGFS_DIR}/dlm" +DLM_SYSFS_DIR="/sys/kernel/dlm" + +controld_usage() { + cat <<END +usage: $0 {start|stop|monitor|validate-all|meta-data} + +Expects to have a fully populated OCF RA-compliant environment set. +END +} + +check_uncontrolled_locks() +{ + CUL_TMP=$(ls "$DLM_SYSFS_DIR" 2>&1) + if [ $? -eq 0 ]; then + if [ -n "$CUL_TMP" ]; then + + ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing" + stonith_admin --reboot="$(crm_node -n)" --tag controld + + exit $OCF_ERR_GENERIC + fi + fi +} + +controld_start() { + controld_monitor; rc=$? + + case $rc in + "$OCF_SUCCESS") return $OCF_SUCCESS;; + "$OCF_NOT_RUNNING") ;; + *) return $OCF_ERR_GENERIC;; + esac + + # Ensure @runstatedir@/cluster exists + [ -d "@runstatedir@/cluster" ] || mkdir "@runstatedir@/cluster" + + # Ensure configfs is mounted + if [ ! -e "$CONFIGFS_DIR" ]; then + modprobe configfs + if [ ! -e "$CONFIGFS_DIR" ]; then + ocf_log err "$CONFIGFS_DIR not available" + return $OCF_ERR_INSTALLED + fi + fi + mount -t configfs | grep " $CONFIGFS_DIR " >/dev/null 2>/dev/null + if [ $? -ne 0 ]; then + mount -t configfs none "$CONFIGFS_DIR" + fi + + # Ensure DLM is available + if [ ! -e "$DLM_CONFIGFS_DIR" ]; then + modprobe dlm + if [ ! -e "$DLM_CONFIGFS_DIR" ]; then + ocf_log err "$DLM_CONFIGFS_DIR not available" + return $OCF_ERR_INSTALLED + fi + fi + + if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \ + ! ocf_is_true "$(crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true)"; then + ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM" + return $OCF_ERR_CONFIGURED + fi + + # If no-quorum-policy not set, or not set as freeze, give a warning + crm_attribute --type=crm_config --name=no-quorum-policy --query|grep value=freeze >/dev/null 2>/dev/null + if [ $? -ne 0 ]; then + ocf_log warn "The DLM cluster best practice suggests to set the cluster property \"no-quorum-policy=freeze\"" + fi + + "${OCF_RESKEY_daemon}" $OCF_RESKEY_args + + while true + do + sleep 1 + + controld_monitor; rc=$? + case $rc in + "$OCF_SUCCESS") + CS_ADDR_LIST="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)" + if [ $? -eq 0 ] && [ -n "$CS_ADDR_LIST" ]; then + return $OCF_SUCCESS + fi + ;; + "$OCF_NOT_RUNNING") + return $OCF_NOT_RUNNING + ;; + *) + return $OCF_ERR_GENERIC + ;; + esac + + ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready" + done +} + +controld_stop() { + controld_monitor; rc=$? + + if [ $rc -eq $OCF_NOT_RUNNING ]; then + return $OCF_SUCCESS + fi + + killall -TERM "${OCF_RESKEY_daemon}"; rc=$? + + if [ $rc -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + rc=$OCF_SUCCESS + while [ $rc -eq $OCF_SUCCESS ]; do + controld_monitor; rc=$? + sleep 1 + done + + if [ $rc -eq $OCF_NOT_RUNNING ]; then + rc=$OCF_SUCCESS + fi + + return $rc +} + +controld_monitor() { + killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; CM_RC=$? + + case $CM_RC in + 0) smw=$(dlm_tool status -v | grep "stateful_merge_wait=" | cut -d= -f2) + if [ -n "$smw" ] && [ $smw -eq 1 ]; then + ocf_log err "DLM status is: stateful_merge_wait" + CM_RC=$OCF_ERR_GENERIC + elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \ + ! stonith_admin -H '*' --output-as xml | grep -q "extended-status=\"pending\""; then + ocf_log err "DLM status is: wait fencing" + CM_RC=$OCF_ERR_GENERIC + else + CM_RC=$OCF_SUCCESS + fi + ;; + 1) CM_RC=$OCF_NOT_RUNNING;; + *) CM_RC=$OCF_ERR_GENERIC;; + esac + + # if the dlm is not successfully running, but + # dlm lockspace bits are left over, we self must fence. + if [ $CM_RC -ne $OCF_SUCCESS ]; then + check_uncontrolled_locks + fi + + return $CM_RC +} + +controld_validate() { + case "${OCF_RESKEY_CRM_meta_globally_unique}" in + [Tt][Rr][Uu][Ee] | [Oo][Nn] | [Yy][Ee][Ss] | [Yy] | 1) + msg="The globally-unique meta attribute must not be enabled for" + msg="$msg $OCF_RESOURCE_INSTANCE" + ocf_log err "$msg" + exit $OCF_ERR_CONFIGURED + ;; + esac + + # Host-specific checks + if [ "$OCF_CHECK_LEVEL" = "10" ]; then + check_binary killall + check_binary "${OCF_RESKEY_daemon}" + fi + + return $OCF_SUCCESS +} + +case "$__OCF_ACTION" in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) controld_validate; controld_start;; +stop) controld_stop;; +monitor) controld_validate; controld_monitor;; +validate-all) controld_validate;; +usage|help) controld_usage + exit $OCF_SUCCESS + ;; +*) controld_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +exit $rc + +# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: |