diff options
Diffstat (limited to 'rgmanager/src')
66 files changed, 19606 insertions, 0 deletions
diff --git a/rgmanager/src/Makefile.am b/rgmanager/src/Makefile.am new file mode 100644 index 0000000..083c628 --- /dev/null +++ b/rgmanager/src/Makefile.am @@ -0,0 +1,21 @@ +# +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = resources diff --git a/rgmanager/src/resources/ASEHAagent.sh.in b/rgmanager/src/resources/ASEHAagent.sh.in new file mode 100644 index 0000000..5fe807b --- /dev/null +++ b/rgmanager/src/resources/ASEHAagent.sh.in @@ -0,0 +1,900 @@ +#!@BASH_SHELL@ + +# +# Sybase Availability Agent for Red Hat Cluster v15.0.2 +# Copyright (C) - 2007 +# Sybase, Inc. All rights reserved. +# +# Sybase Availability Agent for Red Hat Cluster v15.0.2 is licensed +# under the GNU General Public License Version 2. +# +# Author(s): +# Jian-ping Hui <jphui@sybase.com> +# +# Description: Service script for starting/stopping/monitoring \ +# Sybase Adaptive Server on: \ +# Red Hat Enterprise Linux 5 ES \ +# Red Hat Enterprise Linux 5 AS +# +# NOTES: +# +# (1) Before running this script, we assume that user has installed +# Sybase ASE 15.0.2 or higher version on the machine. Please +# customize your configuration in /etc/cluster/cluster.conf according +# to your actual environment. We assume the following files exist before +# you start the service: +# /$sybase_home/SYBASE.sh +# /$sybase_home/$sybase_ase/install/RUN_$server_name +# +# (2) You can customize the interval value in the meta-data section if needed: +# <action name="start" timeout="300" /> +# <action name="stop" timeout="300" /> +# +# <!-- Checks to see if it''s mounted in the right place --> +# <action name="status" interval="30" timeout="100" /> +# <action name="monitor" interval="30" timeout="100" /> +# +# <!--Checks to see if we can read from the mountpoint --> +# <action name="status" depth="10" timeout="100" interval="120" /> +# <action name="monitor" depth="10" timeout="100" interval="120" /> +# +# <action name="meta-data" timeout="5" /> +# <action name="validate-all" timeout="5" /> +# The timeout value is not supported by Redhat in RHCS5.0. +# +# (3) This script should be put under /usr/share/cluster. Its owner should be "root" with +# execution permission. +# + +. /etc/init.d/functions +. $(dirname $0)/ocf-shellfuncs + +PROG=${0} + +export LD_POINTER_GUARD=0 + +####################################################################################### +# Declare some variables we will use in the script. Please don't change their values. # +####################################################################################### +declare login_string="" +declare RUNSERVER_SCRIPT=$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ase/install/RUN_$OCF_RESKEY_server_name +declare CONSOLE_LOG=$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ase/install/$OCF_RESKEY_server_name.log + +################################################################################################## +# This function will be called by rgmanager to get the meta data of resource agent "ASEHAagent". # +# NEVER CHANGE ANYTHING IN THIS FUNCTION. +################################################################################################## +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<resource-agent name="ASEHAagent" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Sybase ASE Failover Instance + </longdesc> + <shortdesc lang="en"> + Sybase ASE Failover Instance + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + Instance name of resource agent "ASEHAagent" + </longdesc> + <shortdesc lang="en"> + name + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="sybase_home" required="1"> + <longdesc lang="en"> + The home directory of sybase products + </longdesc> + <shortdesc lang="en"> + SYBASE home directory + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="sybase_ase" required="1"> + <longdesc lang="en"> + The directory name under sybase_home where ASE products are installed + </longdesc> + <shortdesc lang="en"> + SYBASE_ASE directory name + </shortdesc> + <content type="string" default="ASE-15_0" /> + </parameter> + + <parameter name="sybase_ocs" required="1"> + <longdesc lang="en"> + The directory name under sybase_home where OCS products are installed, i.e. ASE-15_0 + </longdesc> + <shortdesc lang="en"> + SYBASE_OCS directory name + </shortdesc> + <content type="string" default="OCS-15_0" /> + </parameter> + + <parameter name="server_name" required="1"> + <longdesc lang="en"> + The ASE server name which is configured for the HA service + </longdesc> + <shortdesc lang="en"> + ASE server name + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="login_file" required="1"> + <longdesc lang="en"> + The full path of login file which contains the login/password pair + </longdesc> + <shortdesc lang="en"> + Login file + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="interfaces_file" required="1"> + <longdesc lang="en"> + The full path of interfaces file which is used to start/access the ASE server + </longdesc> + <shortdesc lang="en"> + Interfaces file + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="sybase_user" required="1"> + <longdesc lang="en"> + The user who can run ASE server + </longdesc> + <shortdesc lang="en"> + Sybase user + </shortdesc> + <content type="string" default="sybase" /> + </parameter> + + <parameter name="shutdown_timeout" required="1"> + <longdesc lang="en"> + The maximum seconds to wait for the ASE server to shutdown before killing the process directly + </longdesc> + <shortdesc> + Shutdown timeout value + </shortdesc> + <content type="integer" default="0" /> + </parameter> + + <parameter name="start_timeout" required="1"> + <longdesc lang="en"> + The maximum seconds to wait for an ASE server to complete before determining that the server had failed to start + </longdesc> + <shortdesc lang="en"> + Start timeout value + </shortdesc> + <content type="integer" default="0" /> + </parameter> + + <parameter name="deep_probe_timeout" required="1"> + <longdesc lang="en"> + The maximum seconds to wait for the response of ASE server before determining that the server had no response while running deep probe + </longdesc> + <shortdesc lang="en"> + Deep probe timeout value + </shortdesc> + <content type="integer" default="0" /> + </parameter> + </parameters> + <actions> + <action name="start" timeout="300" /> + <action name="stop" timeout="300" /> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="30" timeout="100" /> + <action name="monitor" interval="30" timeout="100" /> + + <!--Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="100" interval="120" /> + <action name="monitor" depth="10" timeout="100" interval="120" /> + + <action name="meta-data" timeout="5" /> + <action name="validate-all" timeout="5" /> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> +EOT +} + +################################################################################################## +# Function Name: verify_all # +# Parameter: None # +# Return value: # +# 0 SUCCESS # +# OCF_ERR_ARGS Parameters are invalid # +# Description: Do some validation on the user-configurable stuff at the beginning of the script. # +################################################################################################## +verify_all() +{ + ocf_log debug "ASEHAagent: Start 'verify_all'" + + # Check if the parameter 'sybase_home' is set. + if [[ -z "$OCF_RESKEY_sybase_home" ]] + then + ocf_log err "ASEHAagent: The parameter 'sybase_home' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'sybase_home' is a valid path. + if [[ ! -d $OCF_RESKEY_sybase_home ]] + then + ocf_log err "ASEHAagent: The sybase_home '$OCF_RESKEY_sybase_home' doesn't exist." + return $OCF_ERR_ARGS + fi + + # Check if the script file SYBASE.sh exists + if [[ ! -f $OCF_RESKEY_sybase_home/SYBASE.sh ]] + then + ocf_log err "ASEHAagent: The file $OCF_RESKEY_sybase_home/SYBASE.sh is required to run this script. Failed to run the script." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'sybase_ase' is set. + if [[ -z "$OCF_RESKEY_sybase_ase" ]] + then + ocf_log err "ASEHAagent: The parameter 'sybase_ase' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the directory /$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ase exists. + if [[ ! -d $OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ase ]] + then + ocf_log err "ASEHAagent: The directory '$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ase' doesn't exist." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'sybase_ocs' is set. + if [[ -z "$OCF_RESKEY_sybase_ocs" ]] + then + ocf_log err "ASEHAagent: The parameter 'sybase_ocs' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the directory /$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ocs exists. + if [[ ! -d $OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ocs ]] + then + ocf_log err "ASEHAagent: The directory '$OCF_RESKEY_sybase_home/$OCF_RESKEY_sybase_ocs' doesn't exist." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'server_name' is set. + if [[ -z "$OCF_RESKEY_server_name" ]] + then + ocf_log err "ASEHAagent: The parameter 'server_name' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the Run_server file exists. + if [[ ! -f $RUNSERVER_SCRIPT ]] + then + ocf_log err "ASEHAagent: The file $RUNSERVER_SCRIPT doesn't exist. The sybase directory may be incorrect." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'login_file' is set. + if [[ -z "$OCF_RESKEY_login_file" ]] + then + ocf_log err "ASEHAagent: The parameter 'login_file' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the login file exist. + if [[ ! -f $OCF_RESKEY_login_file ]] + then + ocf_log err "ASEHAagent: The login file '$OCF_RESKEY_login_file' doesn't exist." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'sybase_user' is set + if [[ -z "$OCF_RESKEY_sybase_user" ]] + then + ocf_log err "ASEHAagent: The parameter 'sybase_user' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the user 'sybase_user' exist + id -u $OCF_RESKEY_sybase_user + if [[ $? != 0 ]] + then + ocf_log err "ASEHAagent: The user '$OCF_RESKEY_sybase_user' doesn't exist in the system." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'interfaces_file' is set + if [[ -z "$OCF_RESKEY_interfaces_file" ]] + then + ocf_log err "ASEHAagent: The parameter 'interfaces_file' is not set." + return $OCF_ERR_ARGS + fi + + # Check if the file 'interfaces_file' exists + if [[ ! -f $OCF_RESKEY_interfaces_file ]] + then + ocf_log err "ASEHAagent: The interfaces file '$OCF_RESKEY_interfaces_file' doesn't exist." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'shutdown_timeout' is a valid value + if [[ $OCF_RESKEY_shutdown_timeout -eq 0 ]] + then + ocf_log err "ASEHAagent: The parameter 'shutdown_timeout' is not set. Its value cannot be zero." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'start_timeout' is a valid value + if [[ $OCF_RESKEY_start_timeout -eq 0 ]] + then + ocf_log err "ASEHAagent: The parameter 'start_timeout' is not set. Its value cannot be zero." + return $OCF_ERR_ARGS + fi + + # Check if the parameter 'deep_probe_timeout' is a valid value + if [[ $OCF_RESKEY_deep_probe_timeout -eq 0 ]] + then + ocf_log err "ASEHAagent: The parameter 'deep_probe_timeout' is not set. Its value cannot be zero." + return $OCF_ERR_ARGS + fi + + ocf_log debug "ASEHAagent: End 'verify_all' successfully." + + return 0 +} + +################################################################################################################ +# Function name: get_login_string # +# Parameter: None # +# Return value: # +# 0 SUCCESS # +# 1 FAIL # +# Description: Analyze the login_file to format the login string. This function will set the global variable # +# "login_string". If the login/password is clear text, the "login_string" will become to "-Ulogin # +# -Ppassword" if there is no error. If there are any errors in this function, the string # +# "login_string" will be still empty. In current stage, the encrypted string is not supported # +# because "haisql" is not available on this platform. # +################################################################################################################ +get_login_string() +{ + tmpstring="" + login_sting="" + + # Read the first column. The valid value will be "normal" or "encrypted". Any other values are invalid. + login_type=`head -1 $OCF_RESKEY_login_file | awk '{print $1}'` + if [[ $login_type = "normal" ]] + then + # The login/password pair is saved in clear text. + # Abstract the login/password from the line. + tmpstring=`head -1 $OCF_RESKEY_login_file | awk '{print $2}'` + + # Abstract "user" from the string. + user=`echo $tmpstring | awk -F'/' '{print $1}'` + # Check if the "user" string is NULL. If it is NULL, it means this is not a valid user. + if [[ -z $user ]] + then + ocf_log err "ASEHAagent: Login username is not specified in the file '$OCF_RESKEY_login_file'" + return 1 + fi + + # Abstract "password" from the string. + passwd=`echo $tmpstring | awk -F'/' '{print $2}'` + + # Format the "login_string". + login_string="-U$user -P$passwd" + else + # The login_type is invalid value. + ocf_log err "ASEHAagent: Login type specified in the file $OCF_RESKEY_login_file is not 'normal' or 'encrypted' which are only supported values." + return 1 + fi + + # The "login_file" has been analyzed successfully. Now, the value of "login_string" contains the login/password information. + return 0 +} + +############################################################################################## +# Function name: ase_start # +# Parameter: None # +# Return value: # +# 0 SUCCESS # +# 1 FAIL # +# Description: This function is used to start the ASE server in primary or secondary server. # +############################################################################################## +ase_start() +{ + ocf_log debug "ASEHAagent: Start 'ase_start'" + + # Check if the server is running. If yes, return SUCCESS directly. Otherwise, continue the start work. + ase_is_running + if [[ $? = 0 ]] + then + # The server is running. + ocf_log info "ASEHAagent: Server is running. Start is success." + return 0 + fi + + # The server is not running. We need to start it. + # If the log file existed, delete it. + if [[ -f $CONSOLE_LOG ]] + then + rm -f $CONSOLE_LOG + fi + + ocf_log debug "ASEHAagent: Starting '$OCF_RESKEY_server_name'..." + + # Run runserver script to start the server. Since this script will be run by root and ASE server + # needs to be run by another user, we need to change the user to sybase_user first. Then, run + # the script to start the server. + su $OCF_RESKEY_sybase_user -c ksh << EOF + # set required SYBASE environment by running SYBASE.sh. + . $OCF_RESKEY_sybase_home/SYBASE.sh + # Run the RUNSERVER_SCRIPT to start the server. + . $RUNSERVER_SCRIPT > $CONSOLE_LOG 2>&1 & +EOF + + # Monitor every 1 seconds if the server has + # recovered, until RECOVERY_TIMEOUT. + t=0 + while [[ $t -le $OCF_RESKEY_start_timeout ]] + do + grep -s "Recovery complete." $CONSOLE_LOG > /dev/null 2>&1 + if [[ $? != 0 ]] + then + # The server has not completed the recovery. We need to continue to monitor the recovery + # process. + t=`expr $t + 1` + else + # The server has completed the recovery. + ocf_log info "ASEHAagent: ASE server '$OCF_RESKEY_server_name' started successfully." + break + fi + sleep 1 + done + + # If $t is larger than start_timeout, it means the ASE server cannot start in given time. Otherwise, it + # means the ASE server has started successfully. + if [[ $t -gt $OCF_RESKEY_start_timeout ]] + then + # The server cannot start in specified time. We think the start is failed. + ocf_log err "ASEHAagent: Failed to start ASE server '$OCF_RESKEY_server_name'. Please check the server error log $CONSOLE_LOG for possible problems." + return 1 + fi + + ocf_log debug "ASEHAagent: End 'ase_start' successfully." + + return 0 +} + +############################################################################################# +# Function name: ase_stop # +# Parameter: None # +# Return value: # +# 0 SUCCESS # +# 1 FAIL # +# Description: This function is used to stop the ASE server in primary or secondary server. # +############################################################################################# +ase_stop() +{ + ocf_log debug "ASEHAagent: Start 'ase_stop'" + + # Check if the ASE server is still running. + ase_is_running + if [[ $? != 0 ]] + then + # The ASE server is not running. We need not to shutdown it. + ocf_log info "ASEHAagent: The dataserver $OCF_RESKEY_server_name is not running." + return 0 + fi + + # Call get_login_string() to parse the login/password string + get_login_string + if [[ $? = 1 ]] + then + # The login account cannot be used. So we will kill the process directly. + ocf_log info "ASEHAagent: Cannot parse the login file $OCF_RESKEY_login_file. Kill the processes of ASE directly." + # Kill the OS processes immediately. + kill_ase 0 + return $? + fi + + # Just in case things are hung, start a process that will wait for the + # timeout period, then kill any remaining porcesses. We'll need to + # monitor this process (set -m), so we can terminate it later if it is + # not needed. + set -m + $PROG kill & + KILL_PID=$! # If successful, we will also terminate watchdog process + + # Run "shutdown with nowait" from isql command line to shutdown the server + su $OCF_RESKEY_sybase_user -c ksh << EOF + # set required SYBASE environment by running SYBASE.sh. + . $OCF_RESKEY_sybase_home/SYBASE.sh + # Run "shutdown with nowait" to shutdown the server immediately. + (echo "use master" ; echo go ; echo "shutdown with nowait"; echo go) | \ + \$SYBASE/\$SYBASE_OCS/bin/isql $login_string -S$OCF_RESKEY_server_name -I$OCF_RESKEY_interfaces_file & +EOF + + sleep 5 + + # Check if the server has been shut down successfully + t=0 + while [[ $t -lt $OCF_RESKEY_shutdown_timeout ]] + do + # Search "ueshutdown: exiting" in the server log. If found, it means the server has been shut down. + # Otherwise, we need to wait. + tail $CONSOLE_LOG | grep "ueshutdown: exiting" > /dev/null 2>&1 + if [[ $? != 0 ]] + then + # The shutdown is still in processing. Wait... + sleep 2 + t=`expr $t+2` + else + # The shutdown is success. + ocf_log info "ASEHAagent: ASE server '$OCF_RESKEY_server_name' shutdown with isql successfully." + break + fi + done + + # If $t is larger than shutdown_timeout, it means the ASE server cannot be shut down in given time. We need + # to wait for the background kill process to kill the OS processes directly. + if [[ $t -ge $OCF_RESKEY_shutdown_timeout ]] + then + ocf_log err "ASEHAagent: Shutdown of '$OCF_RESKEY_server_name' from isql failed. Server is either down or unreachable." + fi + + # Here, the ASE server has been shut down by isql command or killed by background process. We need to do + # further check to make sure all processes have gone away before saying shutdown is complete. This stops the + # other node from starting up the package before it has been stopped and the file system has been unmounted. + + # Get all processes ids from log file + declare -a ENGINE_ALL=(`sed -n -e '/engine /s/^.*os pid \([0-9]*\).*online$/\1/p' $CONSOLE_LOG`) + typeset -i num_procs=${#ENGINE_ALL[@]} + + # We cannot find any process id from log file. It may be because the log file is corrupted or be deleted. + # In this case, we determine the shutdown is failed. + if [[ "${ENGINE_ALL[@]}" = "" ]] + then + ocf_log err "ASEHAagent: Unable to find the process id from $CONSOLE_LOG." + ocf_log err "ASEHAagent: Stop ASE server failed." + return 1 + fi + + # Monitor the system processes to make sure all ASE related processes have gone away. + while true + do + # To every engine process, search it in system processes list. If it is not in the + # list, it means this process has gone away. Otherwise, we need to wait for it is + # killed by background process. + for i in ${ENGINE_ALL[@]} + do + ps -fu $OCF_RESKEY_sybase_user | awk '{print $2}' | grep $i | grep -v grep + if [[ $? != 0 ]] + then + ocf_log debug "ASEHAagent: $i process has stopped." + c=0 + while (( c < $num_procs )) + do + if [[ ${ENGINE_ALL[$c]} = $i ]] + then + unset ENGINE_ALL[$c] + c=$num_procs + fi + (( c = c + 1 )) + done + fi + done + + # To here, all processes should have gone away. + if [[ ${ENGINE_ALL[@]} = "" ]] + then + # + # Looks like shutdown was successful, so kill the + # script to kill any hung processes, which we started earlier. + # Check to see if the script is still running. If jobs + # returns that the script is done, then we don't need to kill + # it. + # + job=$(jobs | grep -v Done) + if [[ ${job} != "" ]] + then + ocf_log debug "ASEHAagent: Killing the kill_ase script." + + kill -15 $KILL_PID > /dev/null 2>&1 + fi + break + fi + sleep 5 + done + + ocf_log debug "ASEHAagent: End 'ase_stop'." + + return 0 +} + +#################################################################################### +# Function name: ase_is_running # +# Parameter: None # +# Return value: # +# 0 ASE server is running # +# 1 ASE server is not running or there are errors # +# Description: This function is used to check if the ASE server is still running . # +#################################################################################### +ase_is_running() +{ + # If the error log doesn't exist, we can say there is no ASE is running. + if [[ ! -f $CONSOLE_LOG ]] + then + return 1 + fi + + # The error log file exists. Check if the engine 0 is alive. + ENGINE_0=(`sed -n -e '/engine 0/s/^.*os pid \([0-9]*\).*online$/\1/p' $CONSOLE_LOG`) + if [[ "$ENGINE_0" = "" ]] + then + # The engine 0 is down. + return 1 + else + kill -s 0 $ENGINE_0 > /dev/null 2>&1 + if [[ $? != 0 ]] + then + # The engine 0 is not running. + return 1 + else + # The engine 0 is running. + return 0 + fi + fi + + return 1 +} + +#################################################################################### +# Function name: kill_ase # +# Parameter: # +# DELAY The seconds to wait before killing the ASE processes. 0 means # +# kill the ASE processes immediately. # +# Return value: None # +# 1 ASE server is not running or there are errors # +# Description: This function is used to check if the ASE server is still running . # +#################################################################################### +kill_ase() +{ + ocf_log debug "ASEHAagent: Start 'kill_ase'." + + DELAY=$1 + + # Wait for sometime before sending a kill signal. + t=0 + while [[ $t -lt $DELAY ]] + do + sleep 1 + t=`expr $t+1` + done + + # Get the process ids from log file + declare -a ENGINE_ALL=`sed -n -e '/engine /s/^.*os pid \([0-9]*\).*online$/\1/p' $CONSOLE_LOG` + + # If there is no process id found in the log file, we need not to continue. + if [[ "${ENGINE_ALL[@]}" = "" ]] + then + ocf_log err "ASEHAagent: Unable to find the process id from $CONSOLE_LOG." + return + fi + + # Kill the datasever process(es) + for pid in ${ENGINE_ALL[@]} + do + kill -9 $pid > /dev/null 2>&1 + if [[ $? != 0 ]] + then + ocf_log info "ASEHAagent: kill_ase function did NOT find process $pid running." + else + ocf_log info "ASEHAagent: kill_ase function did find process $pid running. Sent SIGTERM." + fi + done + + ocf_log debug "ASEHAagent: End 'kill_ase'." +} + + +####################################################################################### +# Function name: terminate # +# Parameter: None # +# Return value: Always be 1 # +# Description: This function is called automatically after this script is terminated. # +####################################################################################### +terminate() +{ + ocf_log debug "ASEHAagent: This monitor script has been signaled to terminate." + exit 1 +} + +##################################################################################### +# Function name: ase_status # +# Parameter: # +# 0 Level 0 probe. In this level, we just check if engine 0 is alive # +# 10 Level 10 probe. In this level, we need to probe if the ASE server # +# still has response. # +# Return value: # +# 0 The server is still alive # +# 1 The server is down # +# Description: This function is used to check if the ASE server is still running. # +##################################################################################### +ase_status() +{ + ocf_log debug "ASEHAagent: Start 'ase_status'." + + # Step 1: Check if the engine 0 is alive + ase_is_running + if [[ $? = 1 ]] + then + # ASE is down. Return fail to rgmanager to trigger the failover process. + ocf_log err "ASEHAagent: ASE server is down." + return 1 + fi + + # ASE process is still alive. + # Step2: If this is level 10 probe, We need to check if the ASE server still has response. + if [[ $1 -gt 0 ]] + then + ocf_log debug "ASEHAagent: Need to run deep probe." + # Run deep probe + deep_probe + if [[ $? = 1 ]] + then + # Deep probe failed. This means the server has been down. + ocf_log err "ASEHAagent: Deep probe found the ASE server is down." + return 1 + fi + fi + + ocf_log debug "ASEHAagent: End 'ase_status'." + + return 0 +} + +#################################################################################### +# Function name: deep_probe # +# Parameter: None # +# Return value: # +# 0 ASE server is alive # +# 1 ASE server is down # +# Description: This function is used to run deep probe to make sure the ASE server # +# still has response. # +#################################################################################### +deep_probe() +{ + declare -i rv + + ocf_log debug "ASEHAagent: Start 'deep_probe'." + + # Declare two temporary files which will be used in this probe. + tmpfile1="$(mktemp /tmp/ASEHAagent.1.XXXXXX)" + tmpfile2="$(mktemp /tmp/ASEHAagent.2.XXXXXX)" + + # Get the login_string by analyzing the login_file. + get_login_string + if [[ $? = 1 ]] + then + # Login string cannot be fetched. Cannot continue the deep probe. + ocf_log err "ASEHAagent: Cannot run the deep probe because of incorrect login file $OCF_RESKEY_login_file. Deep probe failed." + return 1 + fi + + rm -f $tmpfile1 + rm -f $tmpfile2 + + # The login file is correct. We have gotten the login account and password from it. + # Run isql command in background. + su $OCF_RESKEY_sybase_user -c ksh << EOF + # set required SYBASE environment by running SYBASE.sh. + . $OCF_RESKEY_sybase_home/SYBASE.sh + # Run a very simple SQL statement to make sure the server is still ok. The output will be put to + # tmpfile1. + (echo "select 1"; echo "go") | + \$SYBASE/\$SYBASE_OCS/bin/isql $login_string -S$OCF_RESKEY_server_name -I$OCF_RESKEY_interfaces_file -t $OCF_RESKEY_deep_probe_timeout -e -o$tmpfile1 & + # Record the isql command process id to temporary file. If the isql is hung, we need this process id + # to kill the hung process. + echo \$! > $tmpfile2 +EOF + + declare -i t=0 + + # Monitor the output file tmpfile1. + while [[ $t -lt $OCF_RESKEY_deep_probe_timeout ]] + do + # If the SQL statement is executed successfully, we will get the following output: + # 1> select 1 + # + # ----------- + # 1 + # + # (1 row affected) + # So, we determine if the execution is success by searching the keyword "(1 row affected)". + grep "(1 row affected)" $tmpfile1 + if [[ $? = 0 ]] + then + ocf_log debug "ASEHAagent: Deep probe sucess." + break + else + sleep 1 + t=`expr $t+1` + fi + done + + # If $t is larger than deep_probe_timeout, it means the isql command line cannot finish in given time. + # This means the deep probe failed. We need to kill the isql process manually. + if [[ $t -ge $OCF_RESKEY_deep_probe_timeout ]] + then + ocf_log err "ASEHAagent: Deep probe fail. The dataserver has no response." + + # Read the process id of isql process from tmpfile2 + pid=`cat $tmpfile2 | awk '{print $1}'` + + rm -f $tmpfile1 + rm -f $tmpfile2 + + # Kill the isql process directly. + kill -9 $pid + return 1 + fi + + rm -f $tmpfile1 + rm -f $tmpfile2 + + ocf_log debug "ASEHAagent: End 'deep_probe'." + + return 0 +} + +trap terminate SIGTERM + +############################# +# Do some real work here... # +############################# +case $1 in + start) + verify_all || exit 1 + ase_start + exit $? + ;; + stop) + verify_all || exit 1 + ase_stop + exit $? + ;; + status | monitor) + verify_all || exit 1 + ase_status $OCF_CHECK_LEVEL + exit $? + ;; + kill) + kill_ase $OCF_RESKEY_shutdown_timeout + ;; + meta-data) + meta_data + exit $? + ;; + validate-all) + verify_all + exit $? + ;; + *) + echo "Usage: $SCRIPT {start|stop|monitor|status|validate-all|meta-data}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit 0 + diff --git a/rgmanager/src/resources/Makefile.am b/rgmanager/src/resources/Makefile.am new file mode 100644 index 0000000..30b3be9 --- /dev/null +++ b/rgmanager/src/resources/Makefile.am @@ -0,0 +1,119 @@ +# +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = utils + +TARGET = fs.sh + +RESOURCES = service.sh ip.sh nfsclient.sh nfsexport.sh \ + script.sh netfs.sh clusterfs.sh smb.sh \ + apache.sh openldap.sh samba.sh mysql.sh \ + postgres-8.sh tomcat-5.sh lvm.sh \ + vm.sh SAPInstance SAPDatabase named.sh db2.sh \ + ASEHAagent.sh drbd.sh nfsserver.sh \ + tomcat-6.sh orainstance.sh oralistener.sh oracledb.sh \ + bind-mount.sh oradg.sh + +METADATA = apache.metadata openldap.metadata samba.metadata \ + mysql.metadata postgres-8.metadata \ + tomcat-5.metadata named.metadata lvm.metadata \ + drbd.metadata tomcat-6.metadata \ + orainstance.metadata oralistener.metadata \ + oradg.metadata + +HELPERS = ocf-shellfuncs svclib_nfslock \ + lvm_by_lv.sh lvm_by_vg.sh + +DTD = ra-api-1-modified.dtd + +XSL = ra2man.xsl ra2ref.xsl ra2rng.xsl + +RESRNG = resources.rng.head resources.rng.mid resources.rng.tail + +EXTRA_DIST = $(TARGET:=.in) \ + $(RESOURCES) \ + $(METADATA) \ + $(HELPERS) \ + $(DTD) \ + $(XSL) \ + $(RESRNG) + +rasdir = ${CLUSTERDATA} + +ras_SCRIPTS = $(TARGET) \ + $(RESOURCES) \ + $(HELPERS) + +ras_DATA = $(METADATA) + +rngdir = ${CLUSTERDATA}/relaxng + +rng_DATA = $(DTD) $(XSL) $(RESRNG) + +$(TARGET): + cat $(abs_srcdir)/$@.in | sed \ + -e 's#@''LOGDIR@#${LOGDIR}#g' \ + > $@.out + chmod +x $@.out + mv $@.out $@ + +clean-local: + rm -f $(TARGET) resources.rng + +ras-validation: $(RESOURCES) $(TARGET) $(DTD) + @echo Validating resource agent meta-data + @for f in $(RESOURCES); do \ + echo " $(abs_srcdir)/$$f "; \ + bash $(abs_srcdir)/$$f meta-data | xmllint --dtdvalid \ + $(abs_srcdir)/$(DTD) --noout -; \ + if [ $$? -ne 0 ]; then exit 1; fi \ + done + @for f in $(TARGET); do \ + echo " $(abs_builddir)/$$f "; \ + bash $(abs_builddir)/$$f meta-data | xmllint --dtdvalid \ + $(abs_srcdir)/$(DTD) --noout -; \ + if [ $$? -ne 0 ]; then exit 1; fi \ + done + +# +# Schema maintenance. Run 'make resources.rng' and paste it in to +# config/tools/xml/cluster.rng.in where it says 'autogenerated'. +# +# resources.rng.* should never be distributed by themselves. +# +resources.rng: $(RESOURCES) $(TARGET) utils/config-utils.sh +resources.rng: $(XSL) $(RESRNG) + rm -f resources.rng + cat resources.rng.head >> resources.rng + @echo Generating per-resource RelaxNG information... + @for f in $(RESOURCES) $(TARGET); do \ + echo " ./$$f"; \ + bash ./$$f meta-data | xsltproc ra2rng.xsl - >> resources.rng; \ + done + cat resources.rng.mid >> resources.rng + @echo Generating per-resource RelaxNG reference information... + @for f in $(RESOURCES) $(TARGET); do \ + echo " ./$$f"; \ + bash ./$$f meta-data | xsltproc ra2ref.xsl - >> resources.rng; \ + done + cat resources.rng.tail >> resources.rng + +utils/config-utils.sh: + make -C utils config-utils.sh diff --git a/rgmanager/src/resources/SAPDatabase b/rgmanager/src/resources/SAPDatabase new file mode 100644 index 0000000..92009d1 --- /dev/null +++ b/rgmanager/src/resources/SAPDatabase @@ -0,0 +1,1026 @@ +#!/bin/sh +# +# SAPDatabase +# +# Description: Manages any type of SAP supported database instance +# as a High-Availability OCF compliant resource. +# +# Author: Alexander Krauth, October 2006 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2006, 2007 Alexander Krauth +# +# An example usage: +# See usage() function below for more details... +# +# OCF instance parameters: +# OCF_RESKEY_SID +# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +# OCF_RESKEY_DBTYPE +# OCF_RESKEY_NETSERVICENAME (optional, non standard name of Oracle Listener) +# OCF_RESKEY_DBJ2EE_ONLY (optional, default is false) +# OCF_RESKEY_JAVA_HOME (optional, only needed if DBJ2EE_ONLY is true and JAVA_HOME enviroment variable is not set) +# OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) +# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) +# OCF_RESKEY_DIR_BOOTSTRAP (optional, if non standard J2EE server directory) +# OCF_RESKEY_DIR_SECSTORE (optional, if non standard J2EE secure store directory) +# OCF_RESKEY_DB_JARS (optional, if maintained in bootstrap.properties, mandatory for WebAS Java 7.10) +# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) +# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) +# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) +# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) +# +# ToDo: +# Remove all the database dependend stuff from the agent and use +# saphostcontrol daemon as soon as SAP will release it. +# +####################################################################### +# Initialization: + +if [ -f $(dirname $0)/.ocf-shellfuncs ]; then + . $(dirname $0)/.ocf-shellfuncs +elif [ -f $(dirname $0)/ocf-shellfuncs ]; then + LC_ALL=C + LANG=C + PATH=/bin:/sbin:/usr/bin:/usr/sbin + export LC_ALL LANG PATH + . $(dirname $0)/ocf-shellfuncs +else + echo Could not find ocf-shellfuncs! + exit 1 +fi + +####################################################################### + +SH=/bin/sh + +usage() { + methods=`sapdatabase_methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-! + usage: $0 ($methods) + + $0 manages a SAP database of any type as an HA resource. + Currently Oracle, MaxDB and DB/2 UDB are supported. + ABAP databases as well as JAVA only databases are supported. + + The 'start' operation starts the instance. + The 'stop' operation stops the instance. + The 'status' operation reports whether the instance is running + The 'monitor' operation reports whether the instance seems to be working + The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) + The 'validate-all' operation reports whether the parameters are valid + The 'methods' operation reports on the methods $0 supports + + ! +} + +meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="SAPDatabase"> +<version>1.92.1</version> + +<longdesc lang="en"> +Resource script for SAP databases. It manages a SAP database of any type as an HA resource. +</longdesc> +<shortdesc lang="en">SAP database resource agent</shortdesc> + +<parameters> + <parameter name="SID" unique="1" required="1" primary="1"> + <longdesc lang="en">The unique SAP system identifier. e.g. P01</longdesc> + <shortdesc lang="en">SAP system ID</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DIR_EXECUTABLE" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol.</longdesc> + <shortdesc lang="en">path of sapstartsrv and sapcontrol</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DBTYPE" unique="0" required="1"> + <longdesc lang="en">The name of the database vendor you use. Set either: ORA,DB6,ADA</longdesc> + <shortdesc lang="en">database vendor</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="NETSERVICENAME" unique="0" required="0"> + <longdesc lang="en">The Oracle TNS listener name.</longdesc> + <shortdesc lang="en">listener name</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DBJ2EE_ONLY" unique="0" required="0"> + <longdesc lang="en">If you do not have a ABAP stack installed in the SAP database, set this to TRUE</longdesc> + <shortdesc lang="en">only JAVA stack installed</shortdesc> + <content type="boolean" default="false"/> + </parameter> + <parameter name="JAVA_HOME" unique="0" required="0"> + <longdesc lang="en">This is only needed if the DBJ2EE_ONLY parameter is set to true. Enter the path to the Java SDK which is used by the SAP WebAS Java</longdesc> + <shortdesc lang="en">Path to Java SDK</shortdesc> + <content type="string" default=""/> + </parameter> + <parameter name="STRICT_MONITORING" unique="0" required="0"> + <longdesc lang="en">This controls how the resource agent monitors the database. If set to true, it will use SAP tools to test the connect to the database. Do not use with Oracle, because it will result in unwanted failovers in case of an archiver stuck</longdesc> + <shortdesc lang="en">Activates application level monitoring</shortdesc> + <content type="boolean" default="false"/> + </parameter> + <parameter name="AUTOMATIC_RECOVER" unique="0" required="0"> + <longdesc lang="en">The SAPDatabase resource agent tries to recover a failed start attempt automatically one time. This is done by running a forced abort of the RDBMS and/or executing recovery commands.</longdesc> + <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc> + <content type="boolean" default="false"/> + </parameter> + <parameter name="DIR_BOOTSTRAP" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find the J2EE instance bootstrap directory. e.g. /usr/sap/P01/J00/j2ee/cluster/bootstrap</longdesc> + <shortdesc lang="en">path to j2ee bootstrap directory</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DIR_SECSTORE" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find the J2EE security store directory. e.g. /usr/sap/P01/SYS/global/security/lib/tools</longdesc> + <shortdesc lang="en">path to j2ee secure store directory</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DB_JARS" unique="0" required="0"> + <longdesc lang="en">The full qualified filename of the jdbc driver for the database connection test. It will be automaticaly read from the bootstrap.properties file in Java engine 6.40 and 7.00. For Java engine 7.10 the parameter is mandatory.</longdesc> + <shortdesc lang="en">file name of the jdbc driver</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="PRE_START_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc> + <shortdesc lang="en">path to a pre-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="POST_START_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc> + <shortdesc lang="en">path to a post-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="PRE_STOP_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc> + <shortdesc lang="en">path to a pre-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="POST_STOP_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc> + <shortdesc lang="en">path to a post-start script</shortdesc> + <content type="string" default="" /> + </parameter> +</parameters> + +<actions> +<action name="start" timeout="1800" /> +<action name="stop" timeout="1800" /> +<action name="status" depth="0" timeout="60" interval="120" start-delay="180" /> +<action name="monitor" depth="0" timeout="60" interval="120" start-delay="180" /> +<action name="validate-all" timeout="5" /> +<action name="meta-data" timeout="5" /> +<action name="methods" timeout="5" /> +</actions> +</resource-agent> +END +} + +trap_handler() { + rm -f $TEMPFILE + exit $OCF_ERR_GENERIC +} + + +do_exit() { + # If we've got a tempfile variable and the tempfile exists... + # ... if the return code is 0 *or* the temp file is empty + # remove it. + if [ -n "$TEMPFILE" ] && [ -e "$TEMPFILE" ]; then + if [ $1 -eq 0 ] || [ "$(stat -c %s $TEMPFILE)" = "0" ]; then + rm -f $TEMPFILE + fi + fi + exit $1 +} + + +# +# listener_start: Start the given listener +# +listener_start() { + orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" + rc=$OCF_SUCCESS + output=`echo "lsnrctl start $NETSERVICENAME" | su - $orasid 2>&1` + if [ $? -eq 0 ] + then + ocf_log info "Oracle Listener $NETSERVICENAME started: $output" + rc=$OCF_SUCCESS + else + ocf_log err "Oracle Listener $NETSERVICENAME start failed: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# listener_stop: Stop the given listener +# +listener_stop() { + orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" + rc=$OCF_SUCCESS + if + listener_status + then + : listener is running, trying to stop it later... + else + return $OCF_SUCCESS + fi + output=`echo "lsnrctl stop $NETSERVICENAME" | su - $orasid 2>&1` + if [ $? -eq 0 ] + then + ocf_log info "Oracle Listener $NETSERVICENAME stopped: $output" + else + ocf_log err "Oracle Listener $NETSERVICENAME stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# listener_status: is the given listener running? +# +listener_status() { + orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" + # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here + # as the output might be to long. + cnt=`ps efo args --user $orasid | grep $NETSERVICENAME | grep -c tnslsnr` + if [ $cnt -eq 1 ] + then + rc=$OCF_SUCCESS + else + ocf_log info "listener process not running for $NETSERVICENAME for $SID" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# x_server_start: Start the given x_server +# +x_server_start() { + rc=$OCF_SUCCESS + output=`echo "x_server start" | su - $sidadm 2>&1` + if [ $? -eq 0 ] + then + ocf_log info "MaxDB x_server start: $output" + rc=$OCF_SUCCESS + else + ocf_log err "MaxDB x_server start failed: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# x_server_stop: Stop the x_server +# +x_server_stop() { + rc=$OCF_SUCCESS + output=`echo "x_server stop" | su - $sidadm 2>&1` + if [ $? -eq 0 ] + then + ocf_log info "MaxDB x_server stop: $output" + else + ocf_log err "MaxDB x_server stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# x_server_status: is the x_server running? +# +x_server_status() { + sdbuser=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` + # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here + # as the output might be to long. + cnt=`ps efo args --user $sdbuser | grep -c vserver` + if [ $cnt -ge 1 ] + then + rc=$OCF_SUCCESS + else + ocf_log info "x_server process not running" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# oracle_stop: Stop the Oracle database without any condition +# +oracle_stop() { +echo '#!/bin/sh +LOG=$HOME/stopdb.log +date > $LOG + +if [ -x "${ORACLE_HOME}/bin/sqlplus" ] +then + SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" +else + echo "Can not find executable sqlplus" >> $LOG + exit 1 +fi + +$SRVMGRDBA_EXE /NOLOG >> $LOG << ! +connect / as sysdba +shutdown immediate +exit +! +rc=$? +cat $LOG +exit $rc' > $TEMPFILE + +chmod 700 $TEMPFILE +chown $sidadm $TEMPFILE + +su - $sidadm -c $TEMPFILE +retcode=$? +rm -f $TEMPFILE + +if [ $retcode -eq 0 ]; then + sapdatabase_status + if [ $? -ne $OCF_NOT_RUNNING ]; then + retcode=1 + fi +fi + +return $retcode +} + +# +# maxdb_stop: Stop the MaxDB database without any condition +# +maxdb_stop() { + +# x_Server must be running to stop database +x_server_status +if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi + +if [ $DBJ2EE_ONLY -eq 1 ]; then + userkey=c_J2EE +else + userkey=c +fi + +echo "#!/bin/sh +LOG=\$HOME/stopdb.log +date > \$LOG +echo \"Stop database with xuserkey >$userkey<\" >> \$LOG +dbmcli -U ${userkey} db_offline >> \$LOG 2>&1 +exit \$?" > $TEMPFILE + +chmod 700 $TEMPFILE +chown $sidadm $TEMPFILE + +su - $sidadm -c $TEMPFILE +retcode=$? +rm -f $TEMPFILE + +if [ $retcode -eq 0 ]; then + sapdatabase_status + if [ $? -ne $OCF_NOT_RUNNING ]; then + retcode=1 + fi +fi + +return $retcode +} + +# +# db6udb_stop: Stop the DB2/UDB database without any condition +# +db6udb_stop() { +echo '#!/bin/sh +LOG=$HOME/stopdb.log +date > $LOG +echo "Shut down the database" >> $LOG +$INSTHOME/sqllib/bin/db2 deactivate database $DB2DBDFT |tee -a $LOG 2>&1 +$INSTHOME/sqllib/adm/db2stop force |tee -a $LOG 2>&1 +exit $?' > $TEMPFILE + +chmod 700 $TEMPFILE +chown $sidadm $TEMPFILE + +su - $sidadm -c $TEMPFILE +retcode=$? +rm -f $TEMPFILE + +if [ $retcode -eq 0 ]; then + sapdatabase_status + if [ $? -ne $OCF_NOT_RUNNING ]; then + retcode=1 + fi +fi + +return $retcode +} + +# +# oracle_recover: try to clean up oracle after a crash +# +oracle_recover() { +echo '#!/bin/sh +LOG=$HOME/recover.log +date > $LOG +echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG + +if [ -x "${ORACLE_HOME}/bin/sqlplus" ] +then + SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" +else + echo "Can not find executable sqlplus" >> $LOG + exit 1 +fi + +$SRVMGRDBA_EXE /NOLOG >> $LOG << ! +connect / as sysdba +shutdown abort +startup mount +WHENEVER SQLERROR EXIT SQL.SQLCODE +WHENEVER OSERROR EXIT FAILURE +alter database recover automatic database; +alter database open; +exit +! +rc=$? +cat $LOG +exit $rc' > $TEMPFILE + + chmod 700 $TEMPFILE + chown $sidadm $TEMPFILE + + su - $sidadm -c $TEMPFILE + retcode=$? + rm -f $TEMPFILE + + return $retcode +} + +# +# maxdb_recover: try to clean up MaxDB after a crash +# +maxdb_recover() { + # x_Server must be running to stop database + x_server_status + if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi + + if [ $DBJ2EE_ONLY -eq 1 ]; then + userkey=c_J2EE + else + userkey=c + fi + +echo "#!/bin/sh +LOG=\$HOME/recover.log +date > \$LOG +echo \"Logfile written by heartbeat SAPDatabase resource agent\" >> \$LOG +echo \"Cleanup database with xuserkey >$userkey<\" >> \$LOG +echo \"db_stop\" >> \$LOG 2>&1 +dbmcli -U ${userkey} db_stop >> \$LOG 2>&1 +echo \"db_clear\" >> \$LOG 2>&1 +dbmcli -U ${userkey} db_clear >> \$LOG 2>&1 +echo \"db_online\" >> \$LOG 2>&1 +dbmcli -U ${userkey} db_online >> \$LOG 2>&1 +rc=\$? +cat \$LOG +exit \$rc" > $TEMPFILE + + chmod 700 $TEMPFILE + chown $sidadm $TEMPFILE + + su - $sidadm -c $TEMPFILE + retcode=$? + rm -f $TEMPFILE + + return $retcode +} + +# +# db6udb_recover: try to recover DB/2 after a crash +# +db6udb_recover() { + db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" + +echo '#!/bin/sh +LOG=$HOME/recover.log +date > $LOG +echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG +$INSTHOME/sqllib/bin/db2_kill >> $LOG 2>&1 +$INSTHOME/sqllib/adm/db2start >> $LOG 2>&1 +$INSTHOME/sqllib/bin/db2 activate database $DB2DBDFT >> $LOG 2>&1 +rc=$? +cat $LOG +exit $rc' > $TEMPFILE + + chmod 700 $TEMPFILE + chown $db2sid $TEMPFILE + + su - $db2sid -c $TEMPFILE + retcode=$? + rm -f $TEMPFILE + + return $retcode +} + +# +# methods: What methods/operations do we support? +# +sapdatabase_methods() { + cat <<-! + start + stop + status + monitor + recover + validate-all + methods + meta-data + usage + ! +} + + +# +# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. +# This specialties do not allow a totally generic SAP cluster resource agent. +# Someone should write a resource agent for each additional process you need, if it +# is required to monitor that process within the cluster manager. To enable +# you to extent this resource agent without developing a new one, this user exit +# was introduced. +# +sapuserexit() { + NAME="$1" + VALUE="$2" + + if [ -n "$VALUE" ] + then + if [ -x "$VALUE" ] + then + ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" + eval "$VALUE" > /dev/null 2>&1 + ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" + else + ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" + fi + fi + return 0 +} + + +# +# sapdatabase_start : Start the SAP database +# +sapdatabase_start() { + sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" + + case $DBTYPE in + ADA) x_server_start + ;; + ORA) listener_start + ;; + esac + + output=`su - $sidadm -c $SAPSTARTDB` + rc=$? + + if [ $DBJ2EE_ONLY -eq 1 ] + then + sapdatabase_monitor 1 + rc=$? + fi + + if [ $rc -ne 0 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] + then + ocf_log warn "SAP database $SID start failed: $output" + ocf_log warn "Try to recover database $SID" + + output='' + sapdatabase_recover + rc=$? + fi + + if [ $rc -eq 0 ] + then + ocf_log info "SAP database $SID started: $output" + rc=$OCF_SUCCESS + sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" + else + ocf_log err "SAP database $SID start failed: $output" + rc=$OCF_ERR_GENERIC + fi + + return $rc +} + +# +# sapdatabase_stop: Stop the SAP database +# +sapdatabase_stop() { + + sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" + + # use of the stopdb kernel script is not possible, because there are to may checks in that + # script. We want to stop the database regardless of anything. + #output=`su - $sidadm -c $SAPSTOPDB` + + case $DBTYPE in + ORA) output=`oracle_stop` + ;; + ADA) output=`maxdb_stop` + ;; + DB6) output=`db6udb_stop` + ;; + esac + + if [ $? -eq 0 ] + then + ocf_log info "SAP database $SID stopped: $output" + rc=$OCF_SUCCESS + else + ocf_log err "SAP database $SID stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + + case $DBTYPE in + ORA) listener_stop + ;; + ADA) x_server_stop + ;; + esac + + sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" + + return $rc +} + + +# +# sapdatabase_monitor: Can the given database instance do anything useful? +# +sapdatabase_monitor() { + strict=$1 + rc=$OCF_SUCCESS + + case $DBTYPE in + ADA) x_server_status + if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi + ;; + ORA) listener_status + if [ $? -ne $OCF_SUCCESS ]; then listener_start; fi + ;; + esac + + if [ $strict -eq 0 ] + then + sapdatabase_status + rc=$? + else + if [ $DBJ2EE_ONLY -eq 0 ] + then + output=`echo "$SAPDBCONNECT -d -w /dev/null" | su $sidadm 2>&1` + if [ $? -le 4 ] + then + rc=$OCF_SUCCESS + else + rc=$OCF_NOT_RUNNING + fi + else + MYCP="" + EXECMD="" + + # WebAS Java 6.40+7.00 + IAIK_JCE="$SECSTORE"/iaik_jce.jar + IAIK_JCE_EXPORT="$SECSTORE"/iaik_jce_export.jar + EXCEPTION="$BOOTSTRAP"/exception.jar + LOGGING="$BOOTSTRAP"/logging.jar + OPENSQLSTA="$BOOTSTRAP"/opensqlsta.jar + TC_SEC_SECSTOREFS="$BOOTSTRAP"/tc_sec_secstorefs.jar + JDDI="$BOOTSTRAP"/../server0/bin/ext/jdbdictionary/jddi.jar + ANTLR="$BOOTSTRAP"/../server0/bin/ext/antlr/antlr.jar + FRAME="$BOOTSTRAP"/../server0/bin/system/frame.jar + + # only start jdbcconnect when all jars available + if [ -f "$EXCEPTION" -a -f "$LOGGING" -a -f "$OPENSQLSTA" -a -f "$TC_SEC_SECSTOREFS" -a -f "$JDDI" -a -f "$ANTLR" -a -f "$FRAME" -a -f "$SAPDBCONNECT" ] + then + MYCP=".:$FRAME:$ANTLR:$JDDI:$IAIK_JCE_EXPORT:$IAIK_JCE:$EXCEPTION:$LOGGING:$OPENSQLSTA:$TC_SEC_SECSTOREFS:$DB_JARS:$SAPDBCONNECT" + EXECMD="com.sap.inst.jdbc.connect.JdbcCon -sec $SID:$SID" + else + # WebAS Java 7.10 + LAUNCHER=${BOOTSTRAP}/sap.com~tc~bl~offline_launcher~impl.jar + + if [ -f "$DB_JARS" -a -f "$SAPDBCONNECT" -a -f "$LAUNCHER" ] + then + MYCP="$LAUNCHER" + EXECMD="com.sap.engine.offline.OfflineToolStart com.sap.inst.jdbc.connect.JdbcCon ${SAPDBCONNECT}:${SECSTORE}:${DB_JARS}:${BOOTSTRAP} -sec $SID:$SID" + fi + fi + + if [ -n "$EXECMD" ] + then + output=`eval ${JAVA_HOME}/bin/java -cp $MYCP $EXECMD` + if [ $? -le 0 ] + then + rc=$OCF_SUCCESS + else + rc=$OCF_NOT_RUNNING + fi + else + output="Cannot find all jar files needed for database monitoring." + rc=$OCF_ERR_GENERIC + fi + fi + fi + + if [ $rc -ne $OCF_SUCCESS ] + then + ocf_log err "The SAP database $SID is not running: $output" + fi + return $rc +} + + +# +# sapdatabase_status: Are there any database processes on this host ? +# +sapdatabase_status() { + case $DBTYPE in + ADA) SEARCH="$SID/db/pgm/kernel" + SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` + SNUM=2 + ;; + ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" + SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" + SNUM=4 + ;; + DB6) SEARCH="db2[a-z][a-z][a-z][a-z][a-z]" + SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" + SNUM=5 + ;; + esac + + # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here + # as the output might be to long. + cnt=`ps efo args --user $SUSER 2> /dev/null | grep -c "$SEARCH"` + if [ $cnt -ge $SNUM ] + then + rc=$OCF_SUCCESS + else + # ocf_log info "Database Instance $SID is not running on `hostname`" + rc=$OCF_NOT_RUNNING + fi + return $rc +} + + +# +# sapdatabase_recover: +# +sapdatabase_recover() { + + case $DBTYPE in + ORA) recoutput=`oracle_recover` + ;; + ADA) recoutput=`maxdb_recover` + ;; + DB6) recoutput=`db6udb_recover` + ;; + esac + + sapdatabase_monitor 1 + retcode=$? + + if [ $retcode -eq $OCF_SUCCESS ] + then + ocf_log info "Recover of SAP database $SID was successful: $recoutput" + else + ocf_log err "Recover of SAP database $SID failed: $recoutput" + fi + + return $retcode +} + + +# +# sapdatabase_validate: Check the symantic of the input parameters +# +sapdatabase_validate() { + rc=$OCF_SUCCESS + if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] + then + ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" + rc=$OCF_ERR_ARGS + fi + + case "$DBTYPE" in + ORA|ADA|DB6) ;; + *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" + rc=$OCF_ERR_ARGS ;; + esac + + return $rc +} + + +# +# 'main' starts here... +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations don't require OCF instance parameters to be set +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + + usage) usage + exit $OCF_SUCCESS;; + + methods) sapdatabase_methods + exit $?;; + + *);; +esac + +# Set a tempfile and make sure to clean it up again +TEMPFILE="$(mktemp /tmp/SAPDatabase.tmp.XXXXXX)" +trap trap_handler INT TERM + +# Everything after here must call do_exit to remove temp file + + +US=`id -u -n` +US=`echo $US` +if + [ $US != root ] +then + ocf_log err "$0 must be run as root" + do_exit $OCF_ERR_PERM +fi + +# mandatory parameter check +if [ -z "$OCF_RESKEY_SID" ]; then + ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" + do_exit $OCF_ERR_ARGS +fi +SID=`echo "$OCF_RESKEY_SID"` + +if [ -z "$OCF_RESKEY_DBTYPE" ]; then + ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ORA,ADA,DB6)!" + do_exit $OCF_ERR_ARGS +fi +DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[a-z]' '[A-Z]'` + +# optional OCF parameters, we try to guess which directories are correct +EXESTARTDB="startdb" +EXESTOPDB="stopdb" +EXEDBCONNECT="R3trans" +if [ -z "$OCF_RESKEY_DBJ2EE_ONLY" ]; then + DBJ2EE_ONLY=0 +else + case "$OCF_RESKEY_DBJ2EE_ONLY" in + 1|true|TRUE|yes|YES) DBJ2EE_ONLY=1 + EXESTARTDB="startj2eedb" + EXESTOPDB="stopj2eedb" + EXEDBCONNECT="jdbcconnect.jar" + ;; + 0|false|FALSE|no|NO) DBJ2EE_ONLY=0;; + *) ocf_log err "Parsing parameter DBJ2EE_ONLY: '$DBJ2EE_ONLY' is not a boolean value!" + do_exit $OCF_ERR_ARGS ;; + esac +fi + +if [ -z "$OCF_RESKEY_NETSERVICENAME" ]; then + case "$DBTYPE" in + ORA|ora) NETSERVICENAME="LISTENER";; + *) NETSERVICENAME="";; + esac +else + NETSERVICENAME="$OCF_RESKEY_NETSERVICENAME" +fi + +if [ -z "$OCF_RESKEY_STRICT_MONITORING" ]; then + OCF_RESKEY_STRICT_MONITORING=0 +else + case "$OCF_RESKEY_STRICT_MONITORING" in + 1|true|TRUE|yes|YES) OCF_RESKEY_STRICT_MONITORING=1;; + 0|false|FALSE|no|NO) OCF_RESKEY_STRICT_MONITORING=0;; + *) ocf_log err "Parsing parameter STRICT_MONITORING: '$OCF_RESKEY_STRICT_MONITORING' is not a boolean value!" + do_exit $OCF_ERR_ARGS ;; + esac +fi + +PATHLIST=" +$OCF_RESKEY_DIR_EXECUTABLE +/usr/sap/$SID/*/exe +/usr/sap/$SID/SYS/exe/run +/sapmnt/$SID/exe +" +DIR_EXECUTABLE="" +for EXEPATH in $PATHLIST +do + if [ -x $EXEPATH/$EXESTARTDB -a -x $EXEPATH/$EXESTOPDB -a -x $EXEPATH/$EXEDBCONNECT ] + then + DIR_EXECUTABLE=$EXEPATH + SAPSTARTDB=$EXEPATH/$EXESTARTDB + SAPSTOPDB=$EXEPATH/$EXESTOPDB + SAPDBCONNECT=$EXEPATH/$EXEDBCONNECT + break + fi +done +if [ -z "$DIR_EXECUTABLE" ] +then + ocf_log warn "Cannot find $EXESTARTDB,$EXESTOPDB and $EXEDBCONNECT executable, please set DIR_EXECUTABLE parameter!" + do_exit $OCF_NOT_RUNNING +fi + +if [ $DBJ2EE_ONLY -eq 1 ] +then + if [ -n "$OCF_RESKEY_DIR_BOOTSTRAP" ] + then + BOOTSTRAP="$OCF_RESKEY_DIR_BOOTSTRAP" + else + BOOTSTRAP=`echo /usr/sap/$SID/*/j2ee/cluster/bootstrap | head -1` + fi + + if [ -n "$OCF_RESKEY_DIR_SECSTORE" ] + then + SECSTORE="$OCF_RESKEY_DIR_SECSTORE" + else + SECSTORE=/usr/sap/$SID/SYS/global/security/lib/tools + fi + + if [ -n "$OCF_RESKEY_JAVA_HOME" ] + then + JAVA_HOME="$OCF_RESKEY_JAVA_HOME" + PATH=$JAVA_HOME/bin:$PATH + else + if [ -n "$JAVA_HOME" ] + then + PATH=$JAVA_HOME/bin:$PATH + else + ocf_log err "Cannot find JAVA_HOME directory, please set JAVA_HOME parameter!" + do_exit $OCF_NOT_RUNNING + fi + fi + + if [ -n "$OCF_RESKEY_DB_JARS" ] + then + DB_JARS=$OCF_RESKEY_DB_JARS + else + if [ -f "$BOOTSTRAP"/bootstrap.properties ]; then + DB_JARS=`cat $BOOTSTRAP/bootstrap.properties | grep -i rdbms.driverLocation | sed -e 's/\\\:/:/g' | awk -F= '{print $2}'` + fi + fi +fi + +if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] +then + OCF_RESKEY_AUTOMATIC_RECOVER=0 +else + case "$OCF_RESKEY_AUTOMATIC_RECOVER" in + 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; + 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; + esac +fi + +# as root user we need the library path to the SAP kernel to be able to call executables +if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then + LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH + export LD_LIBRARY_PATH +fi +sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" + +# What kind of method was invoked? +case "$1" in + + start) sapdatabase_start + do_exit $?;; + + stop) sapdatabase_stop + do_exit $?;; + + monitor) + sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING + do_exit $?;; + + status) + sapdatabase_status + do_exit $?;; + + recover) sapdatabase_recover + do_exit $?;; + + validate-all) sapdatabase_validate + do_exit $?;; + + *) sapdatabase_methods + do_exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/rgmanager/src/resources/SAPInstance b/rgmanager/src/resources/SAPInstance new file mode 100644 index 0000000..c64e021 --- /dev/null +++ b/rgmanager/src/resources/SAPInstance @@ -0,0 +1,630 @@ +#!/bin/sh +# +# SAPInstance +# +# Description: Manages a single SAP Instance as a High-Availability +# resource. One SAP Instance is defined by one +# SAP Instance-Profile. start/stop handels all services +# of the START-Profile, status and monitor care only +# about essential services. +# +# Author: Alexander Krauth, June 2006 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2006, 2007 Alexander Krauth +# +# An example usage: +# See usage() function below for more details... +# +# OCF instance parameters: +# OCF_RESKEY_InstanceName +# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) +# OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) +# OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) +# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) +# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) +# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) +# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) +# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) +# +####################################################################### +# Initialization: + +if [ -f $(dirname $0)/.ocf-shellfuncs ]; then + . $(dirname $0)/.ocf-shellfuncs +elif [ -f $(dirname $0)/ocf-shellfuncs ]; then + LC_ALL=C + LANG=C + PATH=/bin:/sbin:/usr/bin:/usr/sbin + export LC_ALL LANG PATH + . $(dirname $0)/ocf-shellfuncs +else + echo Could not find ocf-shellfuncs! + exit 1 +fi + + +####################################################################### + +SH=/bin/sh + +usage() { + methods=`sapinstance_methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-! + usage: $0 ($methods) + + $0 manages a SAP Instance as an HA resource. + + The 'start' operation starts the instance. + The 'stop' operation stops the instance. + The 'status' operation reports whether the instance is running + The 'monitor' operation reports whether the instance seems to be working + The 'validate-all' operation reports whether the parameters are valid + The 'methods' operation reports on the methods $0 supports + + ! +} + +meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="SAPInstance"> +<version>1.92.1</version> + +<longdesc lang="en"> +Resource script for SAP. It manages a SAP Instance as an HA resource. +</longdesc> +<shortdesc lang="en">SAP instance resource agent</shortdesc> + +<parameters> + <parameter name="InstanceName" unique="1" required="1" primary="1"> + <longdesc lang="en">The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci</longdesc> + <shortdesc lang="en">instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DIR_EXECUTABLE" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol.</longdesc> + <shortdesc lang="en">path of sapstartsrv and sapcontrol</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="DIR_PROFILE" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find the SAP START profile.</longdesc> + <shortdesc lang="en">path of start profile</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="START_PROFILE" unique="0" required="0"> + <longdesc lang="en">The name of the SAP START profile.</longdesc> + <shortdesc lang="en">start profile name</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="START_WAITTIME" unique="0" required="0"> + <longdesc lang="en">After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start is handled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.</longdesc> + <shortdesc lang="en">Check the successful start after that time (do not wait for J2EE-Addin)</shortdesc> + <content type="string" default="3600" /> + </parameter> + <parameter name="AUTOMATIC_RECOVER" unique="0" required="0"> + <longdesc lang="en">The SAPInstance resource agent tries to recover a failed start attempt automatically one time. This is done by killing running instance processes and executing cleanipc.</longdesc> + <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc> + <content type="boolean" default="false"/> + </parameter> + <parameter name="PRE_START_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc> + <shortdesc lang="en">path to a pre-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="POST_START_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc> + <shortdesc lang="en">path to a post-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="PRE_STOP_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc> + <shortdesc lang="en">path to a pre-start script</shortdesc> + <content type="string" default="" /> + </parameter> + <parameter name="POST_STOP_USEREXIT" unique="0" required="0"> + <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc> + <shortdesc lang="en">path to a post-start script</shortdesc> + <content type="string" default="" /> + </parameter> +</parameters> + +<actions> +<action name="start" timeout="180" /> +<action name="stop" timeout="240" /> +<action name="status" timeout="60" depth="0" interval="120" start-delay="240" /> +<action name="monitor" depth="0" timeout="60" interval="120" start-delay="240" /> +<action name="validate-all" timeout="5" /> +<action name="meta-data" timeout="5" /> +<action name="methods" timeout="5" /> +</actions> +</resource-agent> +END +} + + +# +# methods: What methods/operations do we support? +# +sapinstance_methods() { + cat <<-! + start + stop + status + monitor + validate-all + methods + meta-data + usage + ! +} + +# +# setup_limits: If sapstartsrv needs to be started by this resource agent we need to ensure that any resource +# limits configured in /usr/sap/sapservices are applied. +# Since sapstartsrv is started as root and then it downgrades its privileges by calling setuid() and +# setgid() any PAM limits at /etc/security/limits.conf are not applied. +# Should sapstartsrv need to be started, we look for values configured at /usr/sap/sapservices (as +# per SAP note 1437105) and, if found, we apply them before starting sapstartsrv. +# Instance processes are started by sapstartsrv and will inherit resource limits from it. +# +setup_limits() { + if [ -r $SAPSERVICES ] + then + descriptors=`grep "^limit.descriptors" $SAPSERVICES | sed -e "s/limit.descriptors=//" ` + if [ -n $descriptors ] + then + ocf_log info "found valid open file descriptors limit at ${SAPSERVICES}: ${descriptors}, applying..." + eval ulimit -n $descriptors + fi + + stacksize=`grep "^limit.stacksize" $SAPSERVICES | sed -e "s/limit.stacksize=//" ` + if [ -n $stacksize ] + then + ocf_log info "found valid stack size limit at ${SAPSERVICES}: ${stacksize}, applying..." + eval ulimit -s $stacksize + fi + + datasize=`grep "^limit.datasize" $SAPSERVICES | sed -e "s/limit.datasize=//" ` + if [ -n $datasize ] + then + ocf_log info "found valid process data segment size limit at ${SAPSERVICES}: ${datasize}, applying..." + eval ulimit -d $datasize + fi + fi +} + +# +# check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. +# We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, +# because then we have two instances with the same instance number. +# +check_sapstartsrv() { + restart=0 + runninginst="" + chkrc=$OCF_SUCCESS + + output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` + if [ $? -eq 0 ] + then + runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` + if [ "$runninginst" != "$InstanceName" ] + then + ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" + restart=1 + fi + else + ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" + restart=1 + fi + + if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi + + if [ $restart -eq 1 ] + then + pkill -9 -f "sapstartsrv.*$runninginst" + + setup_limits + $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm + + # now make sure the daemon has been started and is able to respond + srvrc=1 + while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] + do + sleep 1 + $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 + srvrc=$? + done + + if [ $srvrc -ne 1 ] + then + ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" + chkrc=$OCF_SUCCESS + else + ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" + chkrc=$OCF_NOT_RUNNING + fi + fi + + return $chkrc +} + + +# +# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. +# This specialties do not allow a totally generic SAP cluster resource agent. +# Someone should write a resource agent for each additional process you need, if it +# is required to monitor that process within the cluster manager. To enable +# you to extent this resource agent without developing a new one, this user exit +# was introduced. +# +sapuserexit() { + NAME="$1" + VALUE="$2" + + if [ -n "$VALUE" ] + then + if [ -x "$VALUE" ] + then + ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" + eval "$VALUE" > /dev/null 2>&1 + ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" + else + ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" + fi + fi + return 0 +} + + +# +# cleanup_instance : remove resources (processes and shared memory) from a crashed instance) +# +cleanup_instance() { + pkill -9 -f -U $sidadm $InstanceName + $DIR_EXECUTABLE/cleanipc $InstanceNr remove + return 0 +} + +# +# sapinstance_start : Start the SAP instance +# +sapinstance_start() { + + sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" + + rc=$OCF_NOT_RUNNING + loopcount=0 + while [ $loopcount -lt 2 ] + do + loopcount=$(($loopcount + 1)) + + check_sapstartsrv + output=`$SAPCONTROL -nr $InstanceNr -function Start` + rc=$? + ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" + + if [ $rc -ne 0 ] + then + ocf_log err "SAP Instance $SID-$InstanceName start failed." + return $OCF_ERR_GENERIC + fi + + startrc=1 + while [ $startrc -gt 0 ] + do + waittime_start=`date +%s` + output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` + startrc=$? + waittime_stop=`date +%s` + + if [ $startrc -ne 0 ] + then + if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] + then + sapinstance_monitor NOLOG + if [ $? -eq $OCF_SUCCESS ] + then + output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." + startrc=0; loopcount=2 + fi + else + if [ $loopcount -eq 1 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] + then + ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" + ocf_log warn "Try to recover $SID-$InstanceName" + cleanup_instance + else + loopcount=2 + fi + startrc=-1 + fi + else + loopcount=2 + fi + done + done + + if [ $startrc -eq 0 ] + then + ocf_log info "SAP Instance $SID-$InstanceName started: $output" + rc=$OCF_SUCCESS + sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" + else + ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" + rc=$OCF_NOT_RUNNING + fi + + return $rc +} + + +# +# sapinstance_recover: Try startup of failed instance by cleaning up resources +# +sapinstance_recover() { + cleanup_instance + sapinstance_start + return $? +} + + +# +# sapinstance_stop: Stop the SAP instance +# +sapinstance_stop() { + sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" + + check_sapstartsrv + + output=`$SAPCONTROL -nr $InstanceNr -function Stop` + if [ $? -eq 0 ] + then + output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` + if [ $? -eq 0 ] + then + ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" + rc=$OCF_SUCCESS + else + ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + else + ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + + sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" + + return $rc +} + + +# +# sapinstance_monitor: Can the given SAP instance do anything useful? +# +sapinstance_monitor() { + MONLOG=$1 + check_sapstartsrv + rc=$? + + if [ $rc -eq $OCF_SUCCESS ] + then + count=0 + LOCALHOST=`hostname` + output=`$SAPCONTROL -nr $InstanceNr -host $LOCALHOST -function GetProcessList -format script` + + # we have to parse the output, because the returncode doesn't tell anything about the instance status + for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` + do + COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` + SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` + STATE=0 + + case $COLOR in + GREEN|YELLOW) STATE=$OCF_SUCCESS;; + *) STATE=$OCF_NOT_RUNNING;; + esac + + case $SERVICE in + disp+work|msg_server|enserver|enrepserver|jcontrol|jstart) + if [ $STATE -eq $OCF_NOT_RUNNING ] + then + if [ "$MONLOG" != "NOLOG" ] + then + ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" + fi + rc=$STATE + fi + count=1;; + *);; + esac + done + + if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] + then + if [ "$MONLOG" != "NOLOG" ] + then + ocf_log err "The SAP instance does not run any services which this RA could monitor!" + fi + rc=$OCF_ERR_ARGS + fi + fi + + return $rc +} + +# +# sapinstance_validate: Check the symantic of the input parameters +# +sapinstance_validate() { + rc=$OCF_SUCCESS + if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" + rc=$OCF_ERR_ARGS + fi + + return $rc +} + + +# +# 'main' starts here... +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations don't require OCF instance parameters to be set +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + + usage) usage + exit $OCF_SUCCESS;; + + methods) sapinstance_methods + exit $?;; + + *);; +esac + +US=`id -u -n` +US=`echo $US` +if + [ $US != root ] +then + ocf_log err "$0 must be run as root" + exit $OCF_ERR_PERM +fi + +# parameter check +if [ -z "$OCF_RESKEY_InstanceName" ] +then + ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" + exit $OCF_ERR_ARGS +fi + +SID=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f1` +InstanceName=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f2` +InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` +SAPVIRHOST=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f3` + +# optional OCF parameters, we try to guess which directories are correct +if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] +then + if [ -x /usr/sap/$SID/$InstanceName/exe/sapstartsrv -a -x /usr/sap/$SID/$InstanceName/exe/sapcontrol ] + then + DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" + SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" + SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" + elif [ -x /usr/sap/$SID/SYS/exe/run/sapstartsrv -a -x /usr/sap/$SID/SYS/exe/run/sapcontrol ] + then + DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" + SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" + SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" + else + ocf_log warn "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" + exit $OCF_NOT_RUNNING + fi +else + DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" + SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" + SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" +fi + +if [ -z "$OCF_RESKEY_DIR_PROFILE" ] +then + if [ -d /usr/sap/$SID/SYS/profile/ ] + then + DIR_PROFILE="/usr/sap/$SID/SYS/profile" + else + ocf_log warn "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" + exit $OCF_NOT_RUNNING + fi +else + DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" +fi + +if [ -z "$OCF_RESKEY_START_PROFILE" ] +then + SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" + if [ ! -r $SAPSTARTPROFILE ] + then + ocf_log warn "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" + exit $OCF_NOT_RUNNING + fi +else + SAPSTARTPROFILE="$OCF_RESKEY_START_PROFILE" +fi + +if [ -z "$OCF_RESKEY_START_WAITTIME" ] +then + OCF_RESKEY_START_WAITTIME=3600 +fi + + +if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] +then + OCF_RESKEY_AUTOMATIC_RECOVER=0 +else + case "$OCF_RESKEY_AUTOMATIC_RECOVER" in + 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; + 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; + esac +fi + +# as root user we need the library path to the SAP kernel to be able to call sapcontrol +if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then + LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH + export LD_LIBRARY_PATH +fi +sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" + +SAPSERVICES=/usr/sap/sapservices + +# What kind of method was invoked? +case "$1" in + + start) sapinstance_start + exit $?;; + + stop) sapinstance_stop + exit $?;; + + status|monitor) + sapinstance_monitor + exit $?;; + + validate-all) sapinstance_validate + exit $?;; + + *) sapinstance_methods + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/rgmanager/src/resources/apache.metadata b/rgmanager/src/resources/apache.metadata new file mode 100644 index 0000000..e747d16 --- /dev/null +++ b/rgmanager/src/resources/apache.metadata @@ -0,0 +1,106 @@ +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent version="rgmanager 2.0" name="apache"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of Apache web server + </longdesc> + <shortdesc lang="en"> + Defines an Apache web server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Define a name for use in <IfDefine name> directive. + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="httpd" > + <longdesc lang="en"> + Define the full path to the httpd binary. + </longdesc> + <shortdesc lang="en"> + httpd binary + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="server_root"> + <longdesc lang="en"> + Define an alternate initial ServerRoot + </longdesc> + <shortdesc lang="en"> + Initial ServerRoot + </shortdesc> + <content type="string" default="/etc/httpd"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define an alternate ServerConfigFile + </longdesc> + <shortdesc lang="en"> + Initial ServerConfigFile + </shortdesc> + <content type="string" default="conf/httpd.conf"/> + </parameter> + + <parameter name="httpd_options"> + <longdesc lang="en"> + Other command-line options for httpd + </longdesc> + <shortdesc lang="en"> + Other command-line options for httpd + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="1"/> + <action name="validate-all" timeout="1"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/apache.sh.in b/rgmanager/src/resources/apache.sh.in new file mode 100644 index 0000000..6c02183 --- /dev/null +++ b/rgmanager/src/resources/apache.sh.in @@ -0,0 +1,298 @@ +#!@BASH_SHELL@ + +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +APACHE_HTTPD=$OCF_RESKEY_httpd +if [ -z "$APACHE_HTTPD" ]; then + if [ -x /usr/sbin/httpd ]; then + declare APACHE_HTTPD=/usr/sbin/httpd + elif [ -x /usr/sbin/apache2 ]; then + declare APACHE_HTTPD=/usr/sbin/apache2 + fi +fi +declare APACHE_serverConfigFile +declare APACHE_pid_file="`generate_name_for_pid_file`" +declare APACHE_conf_dir="`generate_name_for_conf_dir`" +declare APACHE_genConfig="$APACHE_conf_dir/httpd.conf" + +declare APACHE_parseConfig=$(dirname $0)/utils/httpd-parse-config.pl + +apache_serverConfigFile() +{ + if $(echo $OCF_RESKEY_config_file | grep -q "^/"); then + APACHE_serverConfigFile="$OCF_RESKEY_config_file" + else + APACHE_serverConfigFile="$OCF_RESKEY_server_root/$OCF_RESKEY_config_file" + fi + + return; +} + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -n "$OCF_RESKEY_httpd" ] && ! [ -e $OCF_RESKEY_httpd ]; then + clog_service_verify $CLOG_FAILED "Invalid httpd binary, $OCF_RESKEY_http does not exist" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_server_root" ]; then + clog_service_verify $CLOG_FAILED "Invalid ServerRoot" + return $OCF_ERR_ARGS + fi + + if [ ! -d "$OCF_RESKEY_server_root" ]; then + clog_service_verify $CLOG_FAILED "ServerRoot Directory Is Missing" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + return $OCF_ERR_ARGS + fi + + if [ ! -r "$APACHE_serverConfigFile" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE "$APACHE_serverConfigFile" + return $OCF_ERR_ARGS + fi + + if [ -z "$APACHE_pid_file" ]; then + clog_service_verify $CLOG_FAILED "Invalid name of PID file" + return $OCF_ERR_ARGS + fi + + clog_check_syntax $CLOG_INIT "$APACHE_serverConfigFile" + + "$APACHE_HTTPD" -t \ + -D"$OCF_RESKEY_name" \ + -d "$OCF_RESKEY_server_root" \ + -f "$APACHE_serverConfigFile" \ + $OCF_RESKEY_httpd_options &> /dev/null + + if [ $? -ne 0 ]; then + clog_check_syntax $CLOG_FAILED "$APACHE_serverConfigFile" + return $OCF_ERR_GENERIC + fi + + clog_check_syntax $CLOG_SUCCEED "$APACHE_serverConfigFile" + + return 0 +} + +generate_configFile() +{ + declare originalConfigFile=$1; + declare generatedConfigFile=$2; + declare ip_addresses=$3; + + if [ -f "$generatedConfigFile" ]; then + sha1_verify "$generatedConfigFile" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$originalConfigFile" "$generatedConfigFile" + + generate_configTemplate "$generatedConfigFile" "$1" + cat >> "$generatedConfigFile" << EOT +# From a cluster perspective, the key fields are: +# Listen - must be set to service floating IP address. +# ServerRoot - path to the ServerRoot (initial value is set in service conf) +# + +EOT + + IFS_old="$IFS" + IFS=$'\n' + for i in `"$APACHE_parseConfig" -D"$OCF_RESKEY_name" < "$originalConfigFile" | grep -P '(^Listen)|(^Port)' `; do + port=`echo $i | sed 's/^Listen \(.*\)/\1/;s/^Port \(.*\)/\1/'`; + testcond=`echo $port|grep :` + if [ $testcond ]; then + port=`echo $port|awk -F : '{print $2}'` + fi + IFS=$' '; + for z in $ip_addresses; do + if [ "${z//:/}" != "$z" ]; then + echo "Listen [$z]:$port" >> "$generatedConfigFile"; + else + echo "Listen $z:$port" >> "$generatedConfigFile"; + fi + done + IFS=$'\n'; + done; + IFS="$IFS_old" + + echo "PidFile \"$APACHE_pid_file\"" >> "$generatedConfigFile"; + echo >> "$generatedConfigFile" + + cat "$originalConfigFile" | sed 's/^Listen/### Listen/;s/^Port/### Port/;s/^PidFile/### PidFile/' | \ + "$APACHE_parseConfig" -D"$OCF_RESKEY_name" >> "$generatedConfigFile" + + sha1_addToFile "$generatedConfigFile" + clog_generate_config $CLOG_SUCCEED "$originalConfigFile" "$generatedConfigFile" +} + +start() +{ + if status; then + ocf_log info "Starting Service $OCF_RESOURCE_INSTANCE > Already running" + return $OCF_SUCCESS + fi + + declare ip_addresses + + clog_service_start $CLOG_INIT + + create_pid_directory + create_conf_directory "$APACHE_conf_dir" + check_pid_file "$APACHE_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$APACHE_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + generate_configFile "$APACHE_serverConfigFile" "$APACHE_genConfig" "$ip_addresses" + + "$APACHE_HTTPD" \ + "-D$OCF_RESKEY_name" \ + -d "$OCF_RESKEY_server_root" \ + -f "$APACHE_genConfig" \ + $OCF_RESKEY_httpd_options \ + -k start + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + else + clog_service_start $CLOG_SUCCEED + fi + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$APACHE_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$APACHE_pid_file" + case $? in + $OCF_NOT_RUNNING) + clog_service_status $CLOG_FAILED "$APACHE_pid_file" + return $OCF_NOT_RUNNING + ;; + 0) + clog_service_status $CLOG_SUCCEED + exit 0 + ;; + *) + clog_service_status $CLOG_FAILED "$APACHE_pid_file" + return $OCF_ERR_GENERIC + ;; + esac +} + +if [ "$1" != "meta-data" ]; then + apache_serverConfigFile +fi; + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all|verify-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/bind-mount.sh.in b/rgmanager/src/resources/bind-mount.sh.in new file mode 100644 index 0000000..db894f8 --- /dev/null +++ b/rgmanager/src/resources/bind-mount.sh.in @@ -0,0 +1,167 @@ +#!@BASH_SHELL@ +# +# Copyright Red Hat Inc., 2014 +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +# MA 02139, USA. +# + +# +# Bind mount script - mounts parent file system -o bind in another +# location +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/fs-lib.sh + +export IS_BIND_MOUNT=1 +export OCF_RESKEY_use_findmnt=0 +export OCF_RESKEY_options="bind" +export OCF_RESKEY_device="$OCF_RESKEY_source" +rv=0 + +do_metadata() +{ + cat <<EOT +<?xml version="1.0" ?> +<resource-agent name="bind-mount" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Defines a bind mount. + </longdesc> + + <shortdesc lang="en"> + Defines a bind mount. + </shortdesc> + + <parameters> + + <parameter name="name" primary="1" unique="1"> + <longdesc lang="en"> + Symbolic name for this bind mount. + </longdesc> + <shortdesc lang="en"> + Bind Mount Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="mountpoint" unique="1" required="1"> + <longdesc lang="en"> + Target of this bind mount + </longdesc> + <shortdesc lang="en"> + Target mountpoint + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="source" required="1"> + <longdesc lang="en"> + Source of the bind mount + </longdesc> + <shortdesc lang="en"> + Source of the bind mount + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="force_unmount"> + <longdesc lang="en"> + If set, the cluster will kill all processes using + this file system when the resource group is + stopped. Otherwise, the unmount will fail, and + the resource group will be restarted. + </longdesc> + <shortdesc lang="en"> + Force Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="5"/> + <action name="stop" timeout="5"/> + <action name="recover" timeout="5"/> + + <action name="status" timeout="5" interval="1h"/> + <action name="monitor" timeout="5" interval="1h"/> + + <action name="meta-data" timeout="5"/> + <action name="verify-all" timeout="30"/> + </actions> + + <special tag="rgmanager"> + <child type="nfsexport" forbid="1"/> + <child type="nfsclient"/> + </special> + +</resource-agent> +EOT +} + +verify_source() +{ + if [ -z "$OCF_RESKEY_source" ]; then + ocf_log err "No source specified." + return $OCF_ERR_ARGS + fi + + [ -d "$OCF_RESKEY_source" ] && return 0 + + ocf_log err "$OCF_RESKEY_source is not a directory" + + return $OCF_ERR_ARGS +} + +verify_mountpoint() +{ + if [ -z "$OCF_RESKEY_mountpoint" ]; then + ocf_log err "No target path specified." + return $OCF_ERR_ARGS + fi + + [ -d "$OCF_RESKEY_mountpoint" ] && return 0 + + mkdir -p $OCF_RESKEY_mountpoint && return 0 + + ocf_log err "$OCF_RESKEY_mountpoint is not a directory and could not be created" + + return $OCF_ERR_ARGS +} + +do_validate() +{ + declare -i ret=0 + + verify_source || ret=$OCF_ERR_ARGS + verify_mountpoint || ret=$OCF_ERR_ARGS + + return $ret +} + +do_pre_mount() +{ + do_validate || exit $OCF_ERR_ARGS +} + +main $* diff --git a/rgmanager/src/resources/clusterfs.sh.in b/rgmanager/src/resources/clusterfs.sh.in new file mode 100644 index 0000000..8f339d4 --- /dev/null +++ b/rgmanager/src/resources/clusterfs.sh.in @@ -0,0 +1,342 @@ +#!@BASH_SHELL@ + +# +# Cluster File System mount/umount/fsck/etc. agent +# +# Copyright (C) 2000 Mission Critical Linux +# Copyright (C) 2002-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +. $(dirname $0)/utils/fs-lib.sh + +do_metadata() +{ + cat <<EOT +<?xml version="1.0" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="clusterfs" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines a cluster file system mount (i.e. GFS) + </longdesc> + <shortdesc lang="en"> + Defines a cluster file system mount. + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Symbolic name for this file system. + </longdesc> + <shortdesc lang="en"> + File System Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="mountpoint" unique="1" required="1"> + <longdesc lang="en"> + Path in file system heirarchy to mount this file system. + </longdesc> + <shortdesc lang="en"> + Mount Point + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="device" unique="1" required="1"> + <longdesc lang="en"> + Block device, file system label, or UUID of file system. + </longdesc> + <shortdesc lang="en"> + Device or Label + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="fstype"> + <longdesc lang="en"> + File system type. If not specified, mount(8) will attempt to + determine the file system type. + </longdesc> + <shortdesc lang="en"> + File system type + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="force_unmount"> + <longdesc lang="en"> + If set, the cluster will kill all processes using + this file system when the resource group is + stopped. Otherwise, the unmount will fail, and + the resource group will be restarted. + </longdesc> + <shortdesc lang="en"> + Force Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="self_fence"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + immediately reboot. Generally, this is used in conjunction + with force_unmount support, but it is not required. + </longdesc> + <shortdesc lang="en"> + Seppuku Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="fsid"> + <longdesc lang="en"> + File system ID for NFS exports. This can be overridden + in individual nfsclient entries. + </longdesc> + <shortdesc lang="en"> + NFS File system ID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="nfslock" inherit="service%nfslock"> + <longdesc lang="en"> + If set, the node will try to kill lockd and issue + reclaims across all remaining network interface cards. + This happens always, regardless of unmounting failed. + </longdesc> + <shortdesc lang="en"> + Enable NFS lock workarounds + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="nfsrestart"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + try to restart nfs daemon and nfs lockd to drop all filesystem + references. Use this option as last resource. + This option requires force_unmount to be set and it is not + compatible with nfsserver resource. + </longdesc> + <shortdesc lang="en"> + Enable NFS daemon and lockd workaround + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="options"> + <longdesc lang="en"> + Options used when the file system is mounted. These + are often file-system specific. See mount(8) and/or + mount.gfs2(8) for supported mount options. + </longdesc> + <shortdesc lang="en"> + Mount Options + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="use_findmnt"> + <longdesc lang="en"> + Use findmnt to determine if and where a filesystem is mounted. + Disabling this uses the failback method (should be used if autofs + maps are located on network storage (ie. nfs, iscsi, etc). + </longdesc> + <shortdesc lang="en"> + Utilize findmnt to detect if and where filesystems are mounted + </shortdesc> + <content type="boolean"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="30"/> + <!-- Recovery isn't possible; we don't know if resources are using + the file system. --> + + <!-- Checks to see if it's mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <!-- Checks to see if we can write to the mountpoint (if !ROFS) --> + <action name="status" depth="20" timeout="30" interval="10m"/> + <action name="monitor" depth="20" timeout="30" interval="10m"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + </actions> + + <special tag="rgmanager"> + <child type="fs" start="1" stop="3"/> + <child type="clusterfs" start="1" stop="3"/> + <child type="nfsexport" start="3" stop="1"/> + </special> +</resource-agent> +EOT +} + + +verify_fstype() +{ + # Auto detect? + [ -z "$OCF_RESKEY_fstype" ] && return $OCF_SUCCESS + + case $OCF_RESKEY_fstype in + gfs|gfs2) + return $OCF_SUCCESS + ;; + *) + ocf_log err "File system type $OCF_RESKEY_fstype not supported" + return $OCF_ERR_ARGS + ;; + esac +} + + +verify_options() +{ + declare -i ret=$OCF_SUCCESS + + # + # From mount(8) + # + for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do + case $o in + async|atime|auto|defaults|dev|exec|_netdev|noatime) + continue + ;; + noauto|nodev|noexec|nosuid|nouser|ro|rw|suid|sync) + continue + ;; + dirsync|user|users) + continue + ;; + esac + + case $OCF_RESKEY_fstype in + gfs) + case $o in + lockproto=*|locktable=*|hostdata=*) + continue; + ;; + localcaching|localflocks|ignore_local_fs) + continue; + ;; + num_glockd|acl|suiddir) + continue + ;; + esac + ;; + gfs2) + # XXX + continue + ;; + esac + + + ocf_log err "Option $o not supported for $OCF_RESKEY_fstype" + ret=$OCF_ERR_ARGS + done + + return $ret +} + + +do_verify() +{ + verify_name || return $OCF_ERR_ARGS + verify_fstype || return $OCF_ERR_ARGS + verify_device || return $OCF_ERR_ARGS + verify_mountpoint || return $OCF_ERR_ARGS + verify_options || return $OCF_ERR_ARGS +} + + +do_pre_unmount() { + # + # Check the rgmanager-supplied reference count if one exists. + # If the reference count is <= 1, we can safely proceed + # + if [ -n "$OCF_RESKEY_RGMANAGER_meta_refcnt" ]; then + refs=$OCF_RESKEY_RGMANAGER_meta_refcnt + if [ $refs -gt 0 ]; then + ocf_log debug "Not unmounting $OCF_RESOURCE_INSTANCE - still in use by $refs other service(s)" + return 2 + fi + fi + + if [ -z "$force_umount" ]; then + ocf_log debug "Not umounting $dev (clustered file system)" + return 2 + fi + + # + # Always do this hackery on clustered file systems. + # + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + ocf_log warning "Dropping node-wide NFS locks" + mkdir -p $mp/.clumanager/statd + chown rpcuser.rpcuser $mp/.clumanager/statd + pkill -KILL -x lockd + # Copy out the notify list; our + # IPs are already torn down + if notify_list_store $mp/.clumanager/statd; then + notify_list_broadcast $mp/.clumanager/statd + fi + fi + + # Always invalidate buffers on clusterfs resources + clubufflush -f $dev + + return 0 +} + +do_force_unmount() { + if [ "$OCF_RESKEY_nfsrestart" = "yes" ] || \ + [ "$OCF_RESKEY_nfsrestart" = "1" ]; then + ocf_log warning "Restarting nfsd/nfslock" + nfsexports=$(cat /var/lib/nfs/etab) + service nfslock stop + service nfs stop + service nfs start + service nfslock start + echo "$nfsexports" | { while read line; do + nfsexp=$(echo $line | awk '{print $1}') + nfsopts=$(echo $line | sed -e 's#.*(##g' -e 's#).*##g') + nfsacl=$(echo $line | awk '{print $2}' | sed -e 's#(.*##g') + if [ -n "$nfsopts" ]; then + exportfs -i -o "$nfsopts" "$nfsacl":$nfsexp + else + exportfs -i "$nfsacl":$nfsexp + fi + done; } + fi + return 1 +} + +main $* diff --git a/rgmanager/src/resources/db2.sh.in b/rgmanager/src/resources/db2.sh.in new file mode 100644 index 0000000..66125aa --- /dev/null +++ b/rgmanager/src/resources/db2.sh.in @@ -0,0 +1,133 @@ +#!@BASH_SHELL@ +# +# Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de> +# Copyright (c) 2014 David Vossel <davidvossel@gmail.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# NOTE: +# +# This agent is a wrapper around the heartbeat/db2 agent which limits the heartbeat +# db2 agent to Standard role support. This allows cluster managers such as rgmanager +# which do not have multi-state resource support to manage db2 instances with +# a limited feature set. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin +. $(dirname $0)/ocf-shellfuncs + +meta_data() { +cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="db2.sh"> +<version>1.0</version> +<longdesc lang="en"> +Resource Agent that manages an IBM DB2 LUW databases in Standard role. Multiple partitions are supported. + +When partitions are in use, each partition must be configured as a separate primitive resource. + +</longdesc> +<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role with multiple partition support.</shortdesc> + +<parameters> +<parameter name="instance" unique="1" required="1"> +<longdesc lang="en"> +The instance of the database(s). +</longdesc> +<shortdesc lang="en">instance</shortdesc> +<content type="string" default="" /> +</parameter> +<parameter name="dblist" unique="0" required="0"> +<longdesc lang="en"> +List of databases to be managed, e.g "db1 db2". +Defaults to all databases in the instance. +</longdesc> +<shortdesc lang="en">List of databases to be managed</shortdesc> +<content type="string"/> +</parameter> +<parameter name="dbpartitionnum" unique="0" required="0"> +<longdesc lang="en"> +The number of the partition (DBPARTITIONNUM) to be managed. +</longdesc> +<shortdesc lang="en">database partition number (DBPARTITIONNUM)</shortdesc> +<content type="string" default="0" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="120"/> +<action name="stop" timeout="120"/> +<action name="monitor" depth="0" timeout="60" interval="20"/> +<action name="monitor" depth="0" timeout="60" role="Master" interval="22"/> +<action name="validate-all" timeout="5"/> +<action name="meta-data" timeout="5"/> +</actions> +</resource-agent> +END +} + +heartbeat_db2_wrapper() +{ + # default heartbeat agent ocf root. + export OCF_ROOT=/usr/lib/ocf + heartbeat_db2="${OCF_ROOT}/resource.d/heartbeat/db2" + + if ! [ -a $heartbeat_db2 ]; then + echo "heartbeat db2 agent not found at '${heartbeat_db2}'" + exit $OCF_ERR_INSTALLED + fi + + $heartbeat_db2 $1 +} + +case $1 in + meta-data) + meta_data + exit 0 + ;; + validate-all) + heartbeat_db2_wrapper $1 + exit $? + ;; + start) + heartbeat_db2_wrapper $1 + exit $? + ;; + stop) + heartbeat_db2_wrapper $1 + exit $? + ;; + status|monitor) + heartbeat_db2_wrapper "monitor" + exit $? + ;; + restart) + heartbeat_db2_wrapper "stop" + rc=$? + if [ $rc -ne 0 ]; then + exit $rc + fi + heartbeat_db2_wrapper "start" + exit $? + ;; + *) + echo "Usage: db2.sh {start|stop|monitor|validate-all|meta-data}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/drbd.metadata b/rgmanager/src/resources/drbd.metadata new file mode 100644 index 0000000..30fe122 --- /dev/null +++ b/rgmanager/src/resources/drbd.metadata @@ -0,0 +1,51 @@ +<?xml version="1.0" ?> +<resource-agent version="rgmanager 2.0" name="drbd"> + <version>1.0</version> + + <longdesc lang="en"> + This is a DRBD resource. The resource must be configured + in the configuration file (/etc/drbd.conf), and the + DRBD kernel module must be loaded. + </longdesc> + + <shortdesc lang="en"> + This is a DRBD resource. + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + Symbolic name for this resource. + </longdesc> + <shortdesc lang="en"> + Cluster resource name + </shortdesc> + <content type="string"/> + </parameter> + <parameter name="resource" unique="1" required="1"> + <longdesc lang="en"> + The DRBD resource name, as specified in /etc/drbd.conf. + </longdesc> + <shortdesc lang="en"> + DRBD resource name + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="20"/> + <action name="stop" timeout="20"/> + + <!-- Checks to see if the resource is Primary locally --> + <action name="status" depth="10" interval="60" timeout="20"/> + <action name="monitor" depth="10" interval="60" timeout="20"/> + + <action name="meta-data" timeout="20"/> + <action name="verify-all" timeout="20"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/drbd.sh.in b/rgmanager/src/resources/drbd.sh.in new file mode 100644 index 0000000..febe658 --- /dev/null +++ b/rgmanager/src/resources/drbd.sh.in @@ -0,0 +1,144 @@ +#!@BASH_SHELL@ +# +# Copyright LINBIT, 2008 +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, +# MA 02139, USA. +# + +# +# DRBD resource management using the drbdadm utility. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs + +drbd_verify_all() +{ + # Do we have the drbdadm utility? + if ! which drbdadm >/dev/null 2>&1 ; then + ocf_log error "drbdadm not installed, not found in PATH ($PATH), or not executable." + return $OCF_ERR_INSTALLED + fi + + # Is drbd loaded? + if ! grep drbd /proc/modules >/dev/null 2>&1; then + ocf_log error "drbd not found in /proc/modules. Do you need to modprobe?" + return $OCF_ERR_INSTALLED + fi + + # Do we have the "resource" parameter? + if [ -n "$OCF_RESKEY_resource" ]; then + + # Can drbdadm parse the resource name? + if ! drbdadm sh-dev $OCF_RESKEY_resource >/dev/null 2>&1; then + ocf_log error "DRBD resource \"$OCF_RESKEY_resource\" not found." + return $OCF_ERR_CONFIGURED + fi + + # Is the backing device a locally available block device? + backing_dev=$(drbdadm sh-ll-dev $OCF_RESKEY_resource) + if [ ! -b $backing_dev ]; then + ocf_log error "Backing device for DRBD resource \"$OCF_RESKEY_resource\" ($backing_dev) not found or not a block device." + return $OCF_ERR_INSTALLED + fi + + fi + + return 0 +} + +drbd_status() { + role=$(drbdadm role $OCF_RESKEY_resource) + case $role in + Primary/*) + return $OCF_SUCCESS + ;; + Secondary/*) + return $OCF_NOT_RUNNING + ;; + + esac + return $OCF_ERR_GENERIC +} + +drbd_promote() { + drbdadm primary $OCF_RESKEY_resource || return $? +} + +drbd_demote() { + drbdadm secondary $OCF_RESKEY_resource || return $? +} + + +if [ -z "$OCF_CHECK_LEVEL" ]; then + OCF_CHECK_LEVEL=0 +fi + +# This one doesn't need to pass the verify check +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` && exit 0 + exit $OCF_ERR_GENERIC + ;; +esac + +# Everything else does +drbd_verify_all || exit $? +case $1 in + start) + if drbd_status; then + ocf_log debug "DRBD resource ${OCF_RESKEY_resource} already configured" + exit 0 + fi + drbd_promote + if [ $? -ne 0 ]; then + exit $OCF_ERR_GENERIC + fi + + exit $? + ;; + stop) + if drbd_status; then + drbd_demote + if [ $? -ne 0 ]; then + exit $OCF_ERR_GENERIC + fi + else + ocf_log debug "DRBD resource ${OCF_RESKEY_resource} is not configured" + fi + exit 0 + ;; + status|monitor) + drbd_status + exit $? + ;; + restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + exit 0 + ;; + verify-all) + exit 0 + ;; + *) + echo "usage: $0 {start|stop|status|monitor|restart|meta-data|verify-all}" + exit $OCF_ERR_GENERIC + ;; +esac diff --git a/rgmanager/src/resources/fs.sh.in b/rgmanager/src/resources/fs.sh.in new file mode 100644 index 0000000..fb43dab --- /dev/null +++ b/rgmanager/src/resources/fs.sh.in @@ -0,0 +1,504 @@ +#!@BASH_SHELL@ + +# +# File system (normal) mount/umount/fsck/etc. agent +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +. $(dirname $0)/utils/fs-lib.sh + +do_metadata() +{ + cat <<EOT +<?xml version="1.0" encoding="ISO-8859-1" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="fs" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines a standard file system mount (= not a clustered + or otherwise shared file system). + </longdesc> + <shortdesc lang="en"> + Defines a file system mount. + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Symbolic name for this file system. + </longdesc> + <shortdesc lang="en"> + File System Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="mountpoint" unique="1" required="1"> + <longdesc lang="en"> + Path in file system heirarchy to mount this file system. + </longdesc> + <shortdesc lang="en"> + Mount Point + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="device" unique="1" required="1"> + <longdesc lang="en"> + Block device, file system label, or UUID of file system. + </longdesc> + <shortdesc lang="en"> + Device or Label + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="fstype"> + <longdesc lang="en"> + File system type. If not specified, mount(8) will attempt to + determine the file system type. + </longdesc> + <shortdesc lang="en"> + File system type + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="force_unmount"> + <longdesc lang="en"> + If set, the cluster will kill all processes using + this file system when the resource group is + stopped. Otherwise, the unmount will fail, and + the resource group will be restarted. + </longdesc> + <shortdesc lang="en"> + Force Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="quick_status"> + <longdesc lang="en"> + Use quick status checks. When set to 0 (the default), this + agent behaves normally. When set to 1, this agent will not + log errors incurred or perform the file system accessibility + check (e.g. it will not try to read from/write to the file + system). You should only set this to 1 if you have lots of + file systems on your cluster or you are seeing very high load + spikes as a direct result of this agent. + </longdesc> + <shortdesc lang="en"> + Quick/brief status checks. + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="self_fence"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + immediately reboot. Generally, this is used in conjunction + with force_unmount support, but it is not required. + </longdesc> + <shortdesc lang="en"> + Seppuku Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="nfslock" inherit="nfslock"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + </longdesc> + <shortdesc lang="en"> + Enable NFS lock workarounds + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="nfsrestart"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + try to restart nfs daemon and nfs lockd to drop all filesystem + references. Use this option as last resource. + This option requires force_unmount to be set and it is not + compatible with nfsserver resource. + </longdesc> + <shortdesc lang="en"> + Enable NFS daemon and lockd workaround + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="fsid"> + <longdesc lang="en"> + File system ID for NFS exports. This can be overridden + in individual nfsclient entries. + </longdesc> + <shortdesc lang="en"> + NFS File system ID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="force_fsck"> + <longdesc lang="en"> + If set, the file system will be checked (even if + it is a journalled file system). This option is + ignored for non-journalled file systems such as + ext2. + </longdesc> + <shortdesc lang="en"> + Force fsck support + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="options"> + <longdesc lang="en"> + Options used when the file system is mounted. These + are often file-system specific. See mount(8) for supported + mount options. + </longdesc> + <shortdesc lang="en"> + Mount Options + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="use_findmnt"> + <longdesc lang="en"> + Use findmnt to determine if and where a filesystem is mounted. + Disabling this uses the failback method (should be used if autofs + maps are located on network storage (ie. nfs, iscsi, etc). + </longdesc> + <shortdesc lang="en"> + Utilize findmnt to detect if and where filesystems are mounted + </shortdesc> + <content type="boolean"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="30"/> + <!-- Recovery isn't possible; we don't know if resources are using + the file system. --> + + <!-- Checks to see if it's mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Note: active monitoring is constant and supplants all + check depths --> + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="30"/> + <action name="monitor" depth="10" timeout="30" interval="30"/> + + <!-- Checks to see if we can write to the mountpoint (if !ROFS) --> + <action name="status" depth="20" timeout="30" interval="1m"/> + <action name="monitor" depth="20" timeout="30" interval="1m"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + <child type="fs" start="1" stop="3"/> + <child type="clusterfs" start="1" stop="3"/> + <child type="nfsexport" start="3" stop="1"/> + </special> +</resource-agent> +EOT +} + + +verify_fstype() +{ + # Auto detect? + [ -z "$OCF_RESKEY_fstype" ] && return 0 + + case $OCF_RESKEY_fstype in + ext2|ext3|ext4|btrfs|jfs|xfs|reiserfs|vfat|vxfs) + return 0 + ;; + *) + echo "File system type $OCF_RESKEY_fstype not supported" + return $OCF_ERR_ARGS + ;; + esac +} + + +verify_options() +{ + declare -i ret=$OCF_SUCCESS + declare o + + # + # From mount(8) + # + for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do + case $o in + async|atime|auto|defaults|dev|exec|_netdev|noatime) + continue + ;; + noauto|nodev|noexec|nosuid|nouser|ro|rw|suid|sync) + continue + ;; + dirsync|user|users) + continue + ;; + esac + + do_verify_option $OCF_RESKEY_fstype "$o" + + case $OCF_RESKEY_fstype in + ext2|ext3|ext4) + case $o in + bsddf|minixdf|check|check=*|nocheck|debug) + continue + ;; + errors=*|grpid|bsdgroups|nogrpid|sysvgroups) + continue + ;; + resgid=*|resuid=*|sb=*|grpquota|noquota) + continue + ;; + quota|usrquota|nouid32) + continue + ;; + esac + + if [ "$OCF_RESKEY_fstype" = "ext3" ] || + [ "$OCF_RESKEY_fstype" = "ext4" ]; then + case $o in + noload|data=*) + continue + ;; + esac + fi + ;; + vfat) + case $o in + blocksize=512|blocksize=1024|blocksize=2048) + continue + ;; + uid=*|gid=*|umask=*|dmask=*|fmask=*) + continue + ;; + check=r*|check=n*|check=s*|codepage=*) + continue + ;; + conv=b*|conv=t*|conv=a*|cvf_format=*) + continue + ;; + cvf_option=*|debug|fat=12|fat=16|fat=32) + continue + ;; + iocharset=*|quiet) + continue + ;; + esac + ;; + + jfs) + case $o in + conv|hash=rupasov|hash=tea|hash=r5|hash=detect) + continue + ;; + hashed_relocation|no_unhashed_relocation) + continue + ;; + noborder|nolog|notail|resize=*) + continue + ;; + esac + ;; + + xfs) + case $o in + biosize=*|dmapi|xdsm|logbufs=*|logbsize=*) + continue + ;; + logdev=*|rtdev=*|noalign|noatime) + continue + ;; + norecovery|osyncisdsync|quota|userquota) + continue + ;; + uqnoenforce|grpquota|gqnoenforce) + continue + ;; + sunit=*|swidth=*) + continue + ;; + esac + ;; + + btrfs) + # tbd + continue + ;; + esac + + echo Option $o not supported for $OCF_RESKEY_fstype + ret=$OCF_ERR_ARGS + done + + return $ret +} + + +do_validate() +{ + verify_name || return $OCF_ERR_ARGS + verify_fstype || return $OCF_ERR_ARGS + verify_device || return $OCF_ERR_ARGS + verify_mountpoint || return $OCF_ERR_ARGS + verify_options || return $OCF_ERR_ARGS +} + + +do_pre_mount() +{ + declare fstype="$OCF_RESKEY_fstype" + + # + # Check to determine if we need to fsck the filesystem. + # + # Note: this code should not indicate in any manner suggested + # file systems to use in the cluster. Known filesystems are + # listed here for correct operation. + # + case "$fstype" in + reiserfs) typeset fsck_needed="" ;; + ext3) typeset fsck_needed="" ;; + ext4) typeset fsck_needed="" ;; + btrfs) typeset fsck_needed="" ;; + jfs) typeset fsck_needed="" ;; + xfs) typeset fsck_needed="" ;; + vxfs) typeset fsck_needed="" ;; + ext2) typeset fsck_needed=yes ;; + minix) typeset fsck_needed=yes ;; + vfat) typeset fsck_needed=yes ;; + msdos) typeset fsck_needed=yes ;; + "") typeset fsck_needed=yes ;; # assume fsck + *) + typeset fsck_needed=yes # assume fsck + ocf_log warn "\ +Unknown file system type '$fstype' for device $dev. Assuming fsck is required." + ;; + esac + + + # + # Fsck the device, if needed. + # + if [ -n "$fsck_needed" ] || [ "${OCF_RESKEY_force_fsck}" = "yes" ] ||\ + [ "${OCF_RESKEY_force_fsck}" = "1" ]; then + typeset fsck_log=@LOGDIR@/$(basename $dev).fsck.log + ocf_log debug "Running fsck on $dev" + fsck -p $dev >> $fsck_log 2>&1 + ret_val=$? + if [ $ret_val -gt 1 ]; then + ocf_log err "\ +'fsck -p $dev' failed, error=$ret_val; check $fsck_log for errors" + ocf_log debug "Invalidating buffers for $dev" + $INVALIDATEBUFFERS -f $dev + return $OCF_ERR_GENERIC + fi + rm -f $fsck_log + fi + + return 0 +} + +do_post_mount() { + # + # Create this for the NFS NLM broadcast bit + # + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + mkdir -p "$mp"/.clumanager/statd + chown rpcuser.rpcuser "$mp"/.clumanager/statd + notify_list_merge "$mp"/.clumanager/statd + fi + fi + + return 0 +} + + +do_force_unmount() { + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + ocf_log warning "Dropping node-wide NFS locks" + pkill -KILL -x lockd + mkdir -p "$mp"/.clumanager/statd + chown rpcuser.rpcuser "$mp"/.clumanager/statd + # Copy out the notify list; our + # IPs are already torn down + notify_list_store "$mp"/.clumanager/statd + + # Save for post-umount phase + export nfslock_reclaim=1 + fi + + if [ "$OCF_RESKEY_nfsrestart" = "yes" ] || \ + [ "$OCF_RESKEY_nfsrestart" = "1" ]; then + ocf_log warning "Restarting nfsd/nfslock" + nfsexports=$(cat /var/lib/nfs/etab) + service nfslock stop + service nfs stop + service nfs start + service nfslock start + echo "$nfsexports" | { while read line; do + nfsexp=$(echo $line | awk '{print $1}') + nfsopts=$(echo $line | sed -e 's#.*(##g' -e 's#).*##g') + nfsacl=$(echo $line | awk '{print $2}' | sed -e 's#(.*##g') + if [ -n "$nfsopts" ]; then + exportfs -i -o "$nfsopts" "$nfsacl":$nfsexp + else + exportfs -i "$nfsacl":$nfsexp + fi + done; } + fi + + # Proceed with fuser -kvm... + return 1 +} + + +do_post_unmount() { + if [ "$nfslock_reclaim" = "1" ]; then + # If we have this flag set, do a full reclaim broadcast + notify_list_broadcast "$mp"/.clumanager/statd + fi + + return 0 +} + +main $* diff --git a/rgmanager/src/resources/ip.sh.in b/rgmanager/src/resources/ip.sh.in new file mode 100644 index 0000000..750708f --- /dev/null +++ b/rgmanager/src/resources/ip.sh.in @@ -0,0 +1,1052 @@ +#!@BASH_SHELL@ + +# +# IPv4/IPv6 address management using iproute2 (formerly: ifcfg, ifconfig). +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +SENDUA=/usr/libexec/heartbeat/send_ua + +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +fi + +. $(dirname $0)/ocf-shellfuncs + + +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent version="rgmanager 2.0" name="ip"> + <version>1.0</version> + + <longdesc lang="en"> + This is an IP address. Both IPv4 and IPv6 addresses are supported, + as well as NIC link monitoring for each IP address. + </longdesc> + <shortdesc lang="en"> + This is an IP address. + </shortdesc> + + <parameters> + <parameter name="address" unique="1" primary="1"> + <longdesc lang="en"> + IPv4 or IPv6 address to use as a virtual IP + resource. It may be followed by a slash and a decimal + number that encodes the network prefix length. + </longdesc> + + <shortdesc lang="en"> + IP Address + </shortdesc> + + <content type="string"/> + </parameter> + + <parameter name="family"> + <longdesc lang="en"> + IPv4 or IPv6 address protocol family. + </longdesc> + + <shortdesc lang="en"> + Family + </shortdesc> + + <!-- + <val>auto</val> + <val>inet</val> + <val>inet6</val> + --> + <content type="string"/> + </parameter> + + <parameter name="monitor_link"> + <longdesc lang="en"> + Enabling this causes the status check to fail if + the link on the NIC to which this IP address is + bound is not present. + </longdesc> + <shortdesc lang="en"> + Monitor NIC Link + </shortdesc> + <content type="boolean" default="1"/> + </parameter> + + <parameter name="nfslock" inherit="service%nfslock"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + </longdesc> + <shortdesc lang="en"> + Enable NFS lock workarounds + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="sleeptime"> + <longdesc lang="en"> + Amount of time to sleep after removing an IP address. + Value is specified in seconds. Default value is 10. + </longdesc> + <shortdesc lang="en"> + Amount of time (seconds) to sleep. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="disable_rdisc"> + <longdesc lang="en"> + Disable updating of routing using RDISC protocol and + preserve static routes. + </longdesc> + <shortdesc lang="en"> + Disable updating of routing using RDISC protocol + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="prefer_interface"> + <longdesc lang="en"> + The network interface to which the IP address should be added. The interface must already be configured and active. This parameter should be used only when at least two active interfaces have IP addresses on the same subnet and it is desired to have the IP address added to a particular interface. + </longdesc> + <shortdesc lang="en"> + Network interface + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="20"/> + <action name="stop" timeout="20"/> + <!-- No recover action. If the IP address is not useable, then + resources may or may not depend on it. If it's been + deconfigured, resources using it are in a bad state. --> + + <!-- Checks to see if the IP is up and (optionally) the link is + working --> + <action name="status" interval="20" timeout="10"/> + <action name="monitor" interval="20" timeout="10"/> + + <!-- Checks to see if we can ping the IP address locally --> + <action name="status" depth="10" interval="60" timeout="20"/> + <action name="monitor" depth="10" interval="60" timeout="20"/> + + <action name="meta-data" timeout="20"/> + <action name="validate-all" timeout="20"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + <child type="nfsclient" forbid="1"/> + <child type="nfsexport" forbid="1"/> + </special> +</resource-agent> +EOT +} + + +verify_address() +{ + # XXX TBD + return 0 +} + + +verify_all() +{ + # XXX TBD + return 0 +} + + +# +# Expand an IPv6 address. +# +ipv6_expand() +{ + typeset addr=$1 + typeset maskbits + typeset -i x + typeset tempaddr + + maskbits=${addr/*\//} + if [ "$maskbits" = "$addr" ]; then + maskbits="" + else + # chop off mask bits + addr=${addr/\/*/} + fi + + # grab each hex quad and expand it to 4 digits if it isn't already + # leave doublecolon in place for expansion out to the proper number of zeros later + tempaddr="" + for count in `seq 1 8`; do + quad=`echo $addr|awk -v count=$count -F : '{print $count}'` + quadlen=${#quad} + if [ $quadlen -eq 0 ]; then + quad=:: + elif [ $quadlen -eq 1 ]; then + quad=000$quad + elif [ $quadlen -eq 2 ]; then + quad=00$quad + elif [ $quadlen -eq 3 ]; then + quad=0$quad + fi + tempaddr=$tempaddr$quad + done + addr=$tempaddr + + # use space as placeholder + addr=${addr/::/\ } + + # get rid of colons + addr=${addr//:/} + + # add in zeroes where the doublecolon was + len=$((${#addr}-1)) + zeroes= + while [ $len -lt 32 ]; do + zeroes="0$zeroes" + ((len++)) + done + addr=${addr/\ /$zeroes} + + # probably a better way to do this + for (( x=0; x < ${#addr} ; x++)); do + naddr=$naddr${addr:x:1} + + if (( x < (${#addr} - 1) && x%4 == 3)); then + naddr=$naddr: + fi + done + + if [ -n "$maskbits" ]; then + echo "$naddr/$maskbits" + return 0 + fi + + echo "$naddr" + return 0 +} + + +# +# see if two ipv6 addrs are in the same subnet +# +ipv6_same_subnet() +{ + declare addrl=$1 + declare addrr=$2 + declare m=$3 + declare r x llsb rlsb + + if [ $# -lt 2 ]; then + ocf_log err "usage: ipv6_same_subnet addr1 addr2 [mask]" + return 255 + fi + + if [ -z "$m" ]; then + m=${addrl/*\//} + + [ -n "$m" ] || return 1 + + fi + + if [ "${addrr}" != "${addrr/*\//}" ] && + [ "$m" != "${addrr/*\//}" ]; then + return 1 + fi + + addrl=${addrl/\/*/} + if [ ${#addrl} -lt 39 ]; then + addrl=$(ipv6_expand $addrl) + fi + + addrr=${addrr/\/*/} + if [ ${#addrr} -lt 39 ]; then + addrr=$(ipv6_expand $addrr) + fi + + # Calculate the amount to compare directly + x=$(($m/4+$m/16-(($m%4)==0))) + + # and the remaining number of bits + r=$(($m%4)) + + if [ $r -ne 0 ]; then + # If we have any remaining bits, we will need to compare + # them later. Get them now. + llsb=`printf "%d" 0x${addrl:$x:1}` + rlsb=`printf "%d" 0x${addrr:$x:1}` + + # One less byte to compare directly, please + ((--x)) + fi + + # direct (string comparison) to see if they are equal + if [ "${addrl:0:$x}" != "${addrr:0:$x}" ]; then + return 1 + fi + + case $r in + 0) + return 0 + ;; + 1) + [ $(($llsb & 8)) -eq $(($rlsb & 8)) ] + return $? + ;; + 2) + [ $(($llsb & 12)) -eq $(($rlsb & 12)) ] + return $? + ;; + 3) + [ $(($llsb & 14)) -eq $(($rlsb & 14)) ] + return $? + ;; + esac + + return 1 +} + + +ipv4_same_subnet() +{ + declare addrl=$1 + declare addrr=$2 + declare m=$3 + declare r x llsb rlsb + + if [ $# -lt 2 ]; then + ocf_log err "usage: ipv4_same_subnet current_addr new_addr [maskbits]" + return 255 + fi + + + # + # Chop the netmask off of the ipaddr: + # e.g. 1.2.3.4/22 -> 22 + # + if [ -z "$m" ]; then + m=${addrl/*\//} + [ -n "$m" ] || return 1 + fi + + # + # Check to see if there was a subnet mask provided on the + # new IP address. If there was one and it does not match + # our expected subnet mask, we are done. + # + if [ "${addrr}" != "${addrr/\/*/}" ] && + [ "$m" != "${addrr/*\//}" ]; then + return 1 + fi + + # + # Chop off subnet bits for good. + # + addrl=${addrl/\/*/} + addrr=${addrr/\/*/} + + # + # Remove '.' characters from dotted decimal notation and save + # in arrays. i.e. + # + # 192.168.1.163 -> array[0] = 192 + # array[1] = 168 + # array[2] = 1 + # array[3] = 163 + # + + let x=0 + for quad in ${addrl//./\ }; do + ip1[((x++))]=$quad + done + + x=0 + for quad in ${addrr//./\ }; do + ip2[((x++))]=$quad + done + + x=0 + + while [ $m -ge 8 ]; do + ((m-=8)) + if [ ${ip1[x]} -ne ${ip2[x]} ]; then + return 1 + fi + ((x++)) + done + + case $m in + 0) + return 0 + ;; + 1) + [ $((${ip1[x]} & 128)) -eq $((${ip2[x]} & 128)) ] + return $? + ;; + 2) + [ $((${ip1[x]} & 192)) -eq $((${ip2[x]} & 192)) ] + return $? + ;; + 3) + [ $((${ip1[x]} & 224)) -eq $((${ip2[x]} & 224)) ] + return $? + ;; + 4) + [ $((${ip1[x]} & 240)) -eq $((${ip2[x]} & 240)) ] + return $? + ;; + 5) + [ $((${ip1[x]} & 248)) -eq $((${ip2[x]} & 248)) ] + return $? + ;; + 6) + [ $((${ip1[x]} & 252)) -eq $((${ip2[x]} & 252)) ] + return $? + ;; + 7) + [ $((${ip1[x]} & 254)) -eq $((${ip2[x]} & 254)) ] + return $? + ;; + esac + + return 1 +} + + +ipv6_list_interfaces() +{ + declare idx dev ifaddr + declare ifaddr_exp + + while read idx dev ifaddr; do + + isSlave $dev + if [ $? -ne 2 ]; then + continue + fi + + idx=${idx/:/} + + ifaddr_exp=$(ipv6_expand $ifaddr) + + echo $dev ${ifaddr_exp/\/*/} ${ifaddr_exp/*\//} + + done < <(/sbin/ip -o -f inet6 addr | awk '{print $1,$2,$4}') + + return 0 +} + + +isSlave() +{ + declare intf=$1 + declare line + + if [ -z "$intf" ]; then + ocf_log err "usage: isSlave <I/F>" + return $OCF_ERR_ARGS + fi + + line=$(/sbin/ip link list dev $intf) + if [ $? -ne 0 ]; then + ocf_log err "$intf not found" + return $OCF_ERR_GENERIC + fi + + if [ "$line" = "${line/<*SLAVE*>/}" ]; then + return 2 + fi + + # Yes, it is a slave device. Ignore. + return 0 +} + + +# +# Check if interface is in UP state +# +interface_up() +{ + declare intf=$1 + + if [ -z "$intf" ]; then + ocf_log err "usage: interface_up <I/F>" + return 1 + fi + + line=$(/sbin/ip -o link show up dev $intf 2> /dev/null) + [ -z "$line" ] && return 2 + + return 0 +} + + +ethernet_link_up() +{ + declare linkstate=$(ethtool $1 | grep "Link detected:" |\ + awk '{print $3}') + + [ -n "$linkstate" ] || return 0 + + case $linkstate in + yes) + return 0 + ;; + *) + return 1 + ;; + esac + + return 1 +} + + +# +# Checks the physical link status of an ethernet or bonded interface. +# +network_link_up() +{ + declare slaves + declare intf_arg=$1 + declare link_up=1 # Assume link down + declare intf_test + + if [ -z "$intf_arg" ]; then + ocf_log err "usage: network_link_up <intf>" + return 1 + fi + + ethernet_link_up $intf_arg + link_up=$? + + if [ $link_up -eq 0 ]; then + ocf_log debug "Link for $intf_arg: Detected" + else + ocf_log warn "Link for $intf_arg: Not detected" + fi + + return $link_up +} + + +ipv4_list_interfaces() +{ + declare idx dev ifaddr + + while read idx dev ifaddr; do + + isSlave $dev + if [ $? -ne 2 ]; then + continue + fi + + idx=${idx/:/} + + echo $dev ${ifaddr/\/*/} ${ifaddr/*\//} + + done < <(/sbin/ip -o -f inet addr | awk '{print $1,$2,$4}') + + return 0 +} + + +# +# Add an IP address to our interface or remove it. +# +ipv6() +{ + declare dev maskbits + declare addr=$2 + declare addr_exp=$(ipv6_expand $addr) + + while read dev ifaddr_exp maskbits; do + if [ -z "$dev" ]; then + continue + fi + + if [ "$1" = "add" ]; then + if [ -n "$OCF_RESKEY_prefer_interface" ] && \ + [ "$OCF_RESKEY_prefer_interface" != $dev ]; then + continue + fi + ipv6_same_subnet $ifaddr_exp/$maskbits $addr_exp + if [ $? -ne 0 ]; then + continue + fi + interface_up $dev + if [ $? -ne 0 ]; then + continue + fi + if [ "$OCF_RESKEY_monitor_link" = "yes" ]; then + network_link_up $dev + if [ $? -ne 0 ]; then + continue + fi + fi + + if [ "${addr/\/*/}" = "${addr}" ]; then + addr="$addr/$maskbits" + fi + ocf_log info "Adding IPv6 address $addr to $dev" + fi + if [ "$1" = "del" ]; then + if [ "${addr_exp/\/*/}" != "$ifaddr_exp" ]; then + continue + fi + addr=`/sbin/ip addr list | grep "$addr" | head -n 1 | awk '{print $2}'` + ocf_log info "Removing IPv6 address $addr from $dev" + fi + + /sbin/ip -f inet6 addr $1 dev $dev $addr + [ $? -ne 0 ] && return 1 + + # Duplicate Address Detection [DAD] + # Kernel will flag the IP as 'tentative' until it ensured that + # there is no duplicates. + # if there is, it will flag it as 'dadfailed' + if [ "$1" = "add" ]; then + for i in {1..10}; do + ipstatus=$(/sbin/ip -o -f inet6 addr show dev $dev to $addr) + if [[ $ipstatus == *dadfailed* ]]; then + ocf_log err "IPv6 address collision ${addr%%/*} [DAD]" + ip -f inet6 addr del dev $dev $addr + if [[ $? -ne 0 ]]; then + ocf_log err "Could not delete IPv6 address" + fi + return 1 + elif [[ $ipstatus != *tentative* ]]; then + break + elif [[ $i -eq 10 ]]; then + ofc_log warn "IPv6 address : DAD is still in tentative" + fi + sleep 0.5 + done + # Now the address should be useable + # Try to send Unsolicited Neighbor Advertisements if send_ua is available + if [ -x $SENDUA ]; then + ARGS="-i 200 -c 5 ${addr%%/*} $maskbits $dev" + ocf_log info "$SENDUA $ARGS" + $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements." + fi + fi + + # + # NDP should take of figuring out our new address. Plus, + # we do not have something (like arping) to do this for ipv6 + # anyway. + # + # RFC 2461, section 7.2.6 states thusly: + # + # Note that because unsolicited Neighbor Advertisements do not + # reliably update caches in all nodes (the advertisements might + # not be received by all nodes), they should only be viewed as + # a performance optimization to quickly update the caches in + # most neighbors. + # + + # Not sure if this is necessary for ipv6 either. + file=$(which rdisc 2>/dev/null) + if [ -f "$file" ]; then + if [ "$OCF_RESKEY_disable_rdisc" != "yes" ] && \ + [ "$OCF_RESKEY_disable_rdisc" != "1" ]; then + killall -HUP rdisc || rdisc -fs + fi + fi + + return 0 + done < <(ipv6_list_interfaces) + + return 1 +} + + +# +# Add an IP address to our interface or remove it. +# +ipv4() +{ + declare dev ifaddr maskbits + declare addr=$2 + + while read dev ifaddr maskbits; do + if [ -z "$dev" ]; then + continue + fi + + if [ "$1" = "add" ]; then + if [ -n "$OCF_RESKEY_prefer_interface" ] && \ + [ "$OCF_RESKEY_prefer_interface" != $dev ]; then + continue + fi + ipv4_same_subnet $ifaddr/$maskbits $addr + if [ $? -ne 0 ]; then + continue + fi + interface_up $dev + if [ $? -ne 0 ]; then + continue + fi + if [ "$OCF_RESKEY_monitor_link" = "yes" ]; then + network_link_up $dev + if [ $? -ne 0 ]; then + continue + fi + fi + + if [ "${addr/\/*/}" = "${addr}" ]; then + addr="$addr/$maskbits" + fi + ocf_log info "Adding IPv4 address $addr to $dev" + fi + if [ "$1" = "del" ]; then + if [ "${addr/\/*/}" != "$ifaddr" ]; then + continue + fi + addr=`/sbin/ip addr list | grep "$ifaddr/" | head -n 1 | awk '{print $2}'` + ocf_log info "Removing IPv4 address $addr from $dev" + fi + + if [ "$1" = "add" ]; then + ocf_log debug "Pinging addr ${addr%%/*} from dev $dev" + if ping_check inet ${addr%%/*} $dev; then + ocf_log err "IPv4 address collision ${addr%%/*}" + return 1 + fi + fi + /sbin/ip -f inet addr $1 dev $dev $addr + [ $? -ne 0 ] && return 1 + + # + # XXX: Following needed? ifconfig:YES, ifcfg:NO, iproute2:??? + # + if [ "$1" = "add" ]; then + # do that freak arp thing + + hwaddr=$(/sbin/ip -o link show $dev) + hwaddr=${hwaddr/*link\/ether\ /} + hwaddr=${hwaddr/\ \*/} + + addr=${addr/\/*/} + ocf_log debug "Sending gratuitous ARP: $addr $hwaddr" + arping -q -c 2 -U -I $dev $addr + fi + + file=$(which rdisc 2>/dev/null) + if [ -f "$file" ]; then + if [ "$OCF_RESKEY_disable_rdisc" != "yes" ] && \ + [ "$OCF_RESKEY_disable_rdisc" != "1" ]; then + killall -HUP rdisc || rdisc -fs + fi + fi + + return 0 + done < <(ipv4_list_interfaces) + + return 1 +} + + +# +# Usage: +# ping_check <family> <address> [interface] +# +ping_check() +{ + declare ops="-c 1 -w 2" + declare pingcmd="" + + if [ "$1" = "inet6" ]; then + pingcmd="ping6" + else + pingcmd="ping" + fi + + if [ -n "$3" ]; then + ops="$ops -I $3" + fi + + return $($pingcmd $ops $2 &> /dev/null) +} + + +# +# Usage: +# check_interface_up <family> <address> +# +check_interface_up() +{ + declare dev + declare addr=${2/\/*/} + declare currentAddr caExpanded + + if [ "$1" == "inet6" ]; then + addrExpanded=$(ipv6_expand $addr) + for currentAddr in `/sbin/ip -f $1 -o addr|awk '{print $4}'`; do + caExpanded=$(ipv6_expand $currentAddr) + caExpanded=${caExpanded/\/*/} + if [ "$addrExpanded" == "$caExpanded" ]; then + dev=$(/sbin/ip -f $1 -o addr | grep " ${currentAddr/\/*/}" | awk '{print $2}') + break + fi + done + else + dev=$(/sbin/ip -f $1 -o addr | grep " $addr/" | awk '{print $2}') + fi + + if [ -z "$dev" ]; then + return 1 + fi + + interface_up $dev + return $? +} + + +# +# Usage: +# address_configured <family> <address> +# +address_configured() +{ + declare line + declare addr + declare currentAddr caExpanded + + # Chop off mask bits + addr=${2/\/*/} + + if [ "$1" == "inet6" ]; then + addrExpanded=$(ipv6_expand $addr) + for currentAddr in `/sbin/ip -f $1 -o addr|awk '{print $4}'`; do + caExpanded=$(ipv6_expand $currentAddr) + caExpanded=${caExpanded/\/*/} + if [ "$addrExpanded" == "$caExpanded" ]; then + line=$(/sbin/ip -f $1 -o addr | grep " ${currentAddr/\/*/}"); + break + fi + done + else + line=$(/sbin/ip -f $1 -o addr | grep " $addr/") + fi + + if [ -z "$line" ]; then + return 1 + fi + return 0 +} + + +# +# Usage: +# ip_op <family> <operation> <address> [quiet] +# +ip_op() +{ + declare dev + declare rtr + declare addr=${3/\/*/} + declare caExpanded currentAddr + + if [ "$2" = "status" ]; then + + ocf_log debug "Checking $3, Level $OCF_CHECK_LEVEL" + + if [ "$1" == "inet6" ]; then + addrExpanded=$(ipv6_expand $addr) + for currentAddr in `/sbin/ip -f $1 -o addr|awk '{print $4}'`; do + caExpanded=$(ipv6_expand $currentAddr) + caExpanded=${caExpanded/\/*/} + if [ "$addrExpanded" == "$caExpanded" ]; then + dev=$(/sbin/ip -f $1 -o addr | grep " ${currentAddr/\/*/}" | awk '{print $2}') + break + fi + done + else + dev=$(/sbin/ip -f $1 -o addr | grep " $addr/" | awk '{print $2}') + fi + + if [ -z "$dev" ]; then + ocf_log warn "$3 is not configured" + return 1 + fi + ocf_log debug "$3 present on $dev" + + if [ "$OCF_RESKEY_monitor_link" = "yes" ]; then + if ! network_link_up $dev; then + ocf_log warn "No link on $dev..." + return 1 + fi + ocf_log debug "Link detected on $dev" + fi + + [ $OCF_CHECK_LEVEL -lt 10 ] && return 0 + if ! ping_check $1 $addr $dev; then + ocf_log warn "Failed to ping $addr" + return 1 + fi + ocf_log debug "Local ping to $addr succeeded" + + return 0 + fi + + case $1 in + inet) + ipv4 $2 $3 + return $? + ;; + inet6) + if [ "$2" = "del" ]; then + addrExpanded=$(ipv6_expand $addr) + for currentAddr in `/sbin/ip -f $1 -o addr|awk '{print $4}'`; do + caExpanded=$(ipv6_expand $currentAddr) + caExpanded=${caExpanded/\/*/} + if [ "$addrExpanded" == "$caExpanded" ]; then + addr6=$(/sbin/ip -f $1 -o addr | grep " ${currentAddr/\/*/}" | awk '{print $4}') + ipv6 $2 $addr6 + return $? + fi + done + fi + + ipv6 $2 $3 + return $? + ;; + esac + return 1 +} + + +case ${OCF_RESKEY_family} in +inet) + ;; +inet6) + ;; +*) + if [ "${OCF_RESKEY_address//:/}" != "${OCF_RESKEY_address}" ]; then + export OCF_RESKEY_family=inet6 + else + export OCF_RESKEY_family=inet + fi + ;; +esac + +# Force ipv6 addresses to lower case +if [ "$OCF_RESKEY_family" = "inet6" ]; then + OCF_RESKEY_address=$(echo $OCF_RESKEY_address | tr '[:upper:]' '[:lower:]') +fi + +if [ -z "$OCF_CHECK_LEVEL" ]; then + OCF_CHECK_LEVEL=0 +fi + +if [ "${OCF_RESKEY_monitor_link}" = "no" ] || + [ "${OCF_RESKEY_monitor_link}" = "0" ]; then + OCF_RESKEY_monitor_link="no" +else + OCF_RESKEY_monitor_link="yes" +fi + +case $1 in +start) + if address_configured ${OCF_RESKEY_family} ${OCF_RESKEY_address}; then + ocf_log debug "${OCF_RESKEY_address} already configured" + exit 0 + fi + ip_op ${OCF_RESKEY_family} add ${OCF_RESKEY_address} + if [ $? -ne 0 ]; then + exit $OCF_ERR_GENERIC + fi + + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + notify_list_broadcast /var/lib/nfs/statd + fi + fi + + exit $? + ;; +stop) + if address_configured ${OCF_RESKEY_family} ${OCF_RESKEY_address}; then + + ip_op ${OCF_RESKEY_family} del ${OCF_RESKEY_address} + + # Make sure it's down + if address_configured ${OCF_RESKEY_family} ${OCF_RESKEY_address}; then + ocf_log err "Failed to remove ${OCF_RESKEY_address}" + exit 1 + fi + + # XXX Let nfsd/lockd clear their queues; we hope to have a + # way to enforce this in the future + if [ -z "$OCF_RESKEY_sleeptime" ]; then + sleep 10 + else + if [ "$OCF_RESKEY_sleeptime" -gt "0" ]; then + sleep $OCF_RESKEY_sleeptime + fi + fi + else + ocf_log debug "${OCF_RESKEY_address} is not configured" + fi + exit 0 + ;; +status|monitor) + ip_op ${OCF_RESKEY_family} status ${OCF_RESKEY_address} + [ $? -ne 0 ] && exit $OCF_NOT_RUNNING + + check_interface_up ${OCF_RESKEY_family} ${OCF_RESKEY_address} + exit $? + ;; +restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + exit 0 + ;; +meta-data) + meta_data + exit 0 + ;; +validate-all|verify_all) + verify_all + exit $? + ;; +*) + echo "usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + + diff --git a/rgmanager/src/resources/lvm.metadata b/rgmanager/src/resources/lvm.metadata new file mode 100644 index 0000000..3759063 --- /dev/null +++ b/rgmanager/src/resources/lvm.metadata @@ -0,0 +1,86 @@ +<?xml version="1.0" ?> +<resource-agent name="lvm" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines a LVM volume group that is ... + </longdesc> + + <shortdesc lang="en"> + LVM Failover script + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Unique name for this LVM resource + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="vg_name" required="1"> + <longdesc lang="en"> + Name of the volume group being managed + </longdesc> + <shortdesc lang="en"> + Volume group name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="lv_name"> + <longdesc lang="en"> + Name of the logical volume being managed. This + parameter is optional if there are more than one + logical volumes in the volume group to be managed. + </longdesc> + <shortdesc lang="en"> + Logical Volume name (optional). + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="self_fence"> + <longdesc lang="en"> + If set and the clean up of the tags fails, the node will + immediately reboot. + </longdesc> + <shortdesc lang="en"> + Fence the node if it is not able to clean up LVM tags + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="nfslock" inherit="service%nfslock"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + </longdesc> + <shortdesc lang="en"> + Enable NFS lock workarounds + </shortdesc> + <content type="boolean"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="5"/> + <action name="stop" timeout="5"/> + + <action name="status" timeout="5" interval="1h"/> + <action name="monitor" timeout="5" interval="1h"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="30"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + </special> + +</resource-agent> diff --git a/rgmanager/src/resources/lvm.sh.in b/rgmanager/src/resources/lvm.sh.in new file mode 100644 index 0000000..a3a6c30 --- /dev/null +++ b/rgmanager/src/resources/lvm.sh.in @@ -0,0 +1,180 @@ +#!@BASH_SHELL@ + +# +# LVM Failover Script. +# NOTE: Changes to /etc/lvm/lvm.conf are required for proper operation. +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/member_util.sh +. $(dirname $0)/lvm_by_lv.sh +. $(dirname $0)/lvm_by_vg.sh + +rv=0 + +################################################################################ +# ha_lvm_proper_setup_check +# +################################################################################ +function ha_lvm_proper_setup_check +{ + ## + # Does the Volume Group exist? + # 1) User may have forgotten to create it + # 2) User may have misspelled it in the config file + ## + if ! vgs $OCF_RESKEY_vg_name --config 'global{locking_type=0}'>& /dev/null; then + ocf_log err "HA LVM: Unable to get volume group attributes for $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ## + # Are we using the "tagging" or "CLVM" variant? + # The CLVM variant will have the cluster attribute set + ## + if [[ "$(vgs -o attr --noheadings --config 'global{locking_type=0}' $OCF_RESKEY_vg_name 2>/dev/null)" =~ .....c ]]; then + # Is clvmd running? + if ! ps -C clvmd >& /dev/null; then + ocf_log err "HA LVM: $OCF_RESKEY_vg_name has the cluster attribute set, but 'clvmd' is not running" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS + fi + + ## + # The "tagging" variant is being used if we have gotten this far. + ## + + ## + # The default for lvm.conf:activation/volume_list is empty, + # this must be changed for HA LVM. + ## + if ! lvm dumpconfig activation/volume_list >& /dev/null; then + ocf_log err "HA LVM: Improper setup detected" + ocf_log err "* \"volume_list\" not specified in lvm.conf." + return $OCF_ERR_GENERIC + fi + + ## + # Machine's cluster node name must be present as + # a tag in lvm.conf:activation/volume_list + ## + if ! lvm dumpconfig activation/volume_list | grep $(local_node_name); then + ocf_log err "HA LVM: Improper setup detected" + ocf_log err "* @$(local_node_name) missing from \"volume_list\" in lvm.conf" + return $OCF_ERR_GENERIC + fi + + ## + # The volume group to be failed over must NOT be in + # lvm.conf:activation/volume_list; otherwise, machines + # will be able to activate the VG regardless of the tags + ## + if lvm dumpconfig activation/volume_list | grep "\"$OCF_RESKEY_vg_name\""; then + ocf_log err "HA LVM: Improper setup detected" + ocf_log err "* $OCF_RESKEY_vg_name found in \"volume_list\" in lvm.conf" + return $OCF_ERR_GENERIC + fi + + ## + # Next, we need to ensure that their initrd has been updated + # If not, the machine could boot and activate the VG outside + # the control of rgmanager + ## + # Fixme: we might be able to perform a better check... + if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" == "" ]; then + ocf_log err "HA LVM: Improper setup detected" + ocf_log err "* initrd image needs to be newer than lvm.conf" + + # While dangerous if not done the first time, there are many + # cases where we don't simply want to fail here. Instead, + # keep warning until the user remakes the initrd - or has + # it done for them by upgrading the kernel. + #return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +################################################################################ +# MAIN +################################################################################ + +case $1 in +start) + ha_lvm_proper_setup_check || exit 1 + + if [ -z "$OCF_RESKEY_lv_name" ]; then + vg_start || exit 1 + else + lv_start || exit 1 + fi + ;; + +status|monitor) + ocf_log notice "Getting status" + + if [ -z "$OCF_RESKEY_lv_name" ]; then + vg_status + exit $? + else + lv_status + exit $? + fi + ;; + +stop) + ha_lvm_proper_setup_check + + if [ -z "$OCF_RESKEY_lv_name" ]; then + vg_stop || exit 1 + else + lv_stop || exit 1 + fi + ;; + +recover|restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + ;; + +meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + ;; + +validate-all|verify-all) + if [ -z "$OCF_RESKEY_lv_name" ]; then + vg_verify || exit 1 + else + lv_verify || exit 1 + fi + ;; +*) + echo "usage: $0 {start|status|monitor|stop|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $rv diff --git a/rgmanager/src/resources/lvm_by_lv.sh.in b/rgmanager/src/resources/lvm_by_lv.sh.in new file mode 100644 index 0000000..78befd3 --- /dev/null +++ b/rgmanager/src/resources/lvm_by_lv.sh.in @@ -0,0 +1,534 @@ +#!@BASH_SHELL@ + +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# lv_verify +# +# Verify the parameters passed in +# +lv_verify() +{ + # Anything to verify? Perhaps the names? + return $OCF_SUCCESS +} + +# lv_owner +# +# Returns: +# 1 == We are the owner +# 2 == We can claim it +# 0 == Owned by someone else +function lv_owner +{ + local my_name=$1 + local owner=$2 + + if [ -z "$my_name" ]; then + ocf_log err "Unable to determine cluster node name" + return 0 + fi + + if [ -z "$owner" ]; then + # No-one owns this LV yet, so we can claim it + return 2 + fi + + if [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + return 0 + fi + return 2 + fi + + return 1 +} + +steal_tag() +{ + local owner=$1 + local lv_path=$2 + + ocf_log notice "Owner of $lv_path is not in the cluster" + ocf_log notice "Stealing $lv_path" + + lvchange --deltag $owner $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Failed to steal $lv_path from $owner" + return $OCF_ERR_GENERIC + fi + + # Warning --deltag doesn't always result in failure + if [ ! -z `lvs -o tags --noheadings $lv_path` ]; then + ocf_log err "Failed to steal $lv_path from $owner." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +restore_transient_failed_pvs() +{ + local a=0 + local -a results + + results=(`pvs -o name,vg_name,attr --noheadings | grep $OCF_RESKEY_vg_name | grep -v 'unknown device'`) + while [ ! -z "${results[$a]}" ] ; do + if [[ ${results[$(($a + 2))]} =~ ..m ]] && + [ $OCF_RESKEY_vg_name == ${results[$(($a + 1))]} ]; then + ocf_log notice "Attempting to restore missing PV, ${results[$a]} in $OCF_RESKEY_vg_name" + vgextend --restoremissing $OCF_RESKEY_vg_name ${results[$a]} + if [ $? -ne 0 ]; then + ocf_log notice "Failed to restore ${results[$a]}" + else + ocf_log notice " ${results[$a]} restored" + fi + fi + a=$(($a + 3)) + done +} + +# lv_exec_resilient +# +# Sometimes, devices can come back. Their metadata will conflict +# with the good devices that remain. This function filters out those +# failed devices when executing the given command +# +# Finishing with vgscan resets the cache/filter +lv_exec_resilient() +{ + declare command=$1 + declare all_pvs + + ocf_log notice "Making resilient : $command" + + if [ -z "$command" ]; then + ocf_log err "lv_exec_resilient: Arguments not supplied" + return $OCF_ERR_ARGS + fi + + # pvs will print out only those devices that are valid + # If a device dies and comes back, it will not appear + # in pvs output (but you will get a Warning). + all_pvs=(`pvs --noheadings -o pv_name | grep -v Warning`) + + # Now we use those valid devices in a filter which we set up. + # The device will then be activated because there are no + # metadata conflicts. + command=$command" --config devices{filter=[" + for i in ${all_pvs[*]}; do + command=$command'"a|'$i'|",' + done + command=$command"\"r|.*|\"]}" + + ocf_log notice "Resilient command: $command" + if ! $command ; then + ocf_log err "lv_exec_resilient failed" + vgscan + return $OCF_ERR_GENERIC + else + vgscan + return $OCF_SUCCESS + fi +} + +# lv_activate_resilient +# +# Sometimes, devices can come back. Their metadata will conflict +# with the good devices that remain. We must filter out those +# failed devices when trying to reactivate +lv_activate_resilient() +{ + declare action=$1 + declare lv_path=$2 + declare op="-ay" + + if [ -z "$action" ] || [ -z "$lv_path" ]; then + ocf_log err "lv_activate_resilient: Arguments not supplied" + return $OCF_ERR_ARGS + fi + + if [ $action != "start" ]; then + op="-an" + elif [[ "$(lvs -o attr --noheadings $lv_path)" =~ r.......p ]] || + [[ "$(lvs -o attr --noheadings $lv_path)" =~ R.......p ]]; then + # We can activate partial RAID LVs and run just fine. + ocf_log notice "Attempting activation of partial RAID LV, $lv_path" + op="-ay --partial" + fi + + if ! lv_exec_resilient "lvchange $op $lv_path" ; then + ocf_log err "lv_activate_resilient $action failed on $lv_path" + return $OCF_ERR_GENERIC + else + return $OCF_SUCCESS + fi +} + +lv_status_clustered() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + + # + # Check if device is active + # + if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then + return $OCF_NOT_RUNNING + fi + + return $OCF_SUCCESS +} + +# lv_status +# +# Is the LV active? +lv_status_single() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare dev="/dev/$lv_path" + declare realdev + declare owner + declare my_name + + # + # Check if device is active + # + if [[ ! "$(lvs -o attr --noheadings $lv_path)" =~ ....a. ]]; then + return $OCF_NOT_RUNNING + fi + + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." + return $OCF_SUCCESS + fi + + # + # Check if all links/device nodes are present + # + if [ -h "$dev" ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + ocf_log err "Failed to follow link, $dev" + return $OCF_ERR_ARGS + fi + + if [ ! -b $realdev ]; then + ocf_log err "Device node for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Symbolic link for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + + # + # Verify that we are the correct owner + # + owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` + my_name=$(local_node_name) + if [ -z "$my_name" ]; then + ocf_log err "Unable to determine local machine name" + + # FIXME: I don't really want to fail on 1st offense + return $OCF_SUCCESS + fi + + if [ -z "$owner" ] || [ "$my_name" != "$owner" ]; then + ocf_log err "WARNING: $lv_path should not be active" + ocf_log err "WARNING: $my_name does not own $lv_path" + ocf_log err "WARNING: Attempting shutdown of $lv_path" + + lv_activate_resilient "stop" $lv_path + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +function lv_status +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + lv_status_clustered + else + lv_status_single + fi +} + +# lv_activate_and_tag +lv_activate_and_tag() +{ + declare action=$1 + declare tag=$2 + declare lv_path=$3 + typeset self_fence="" + + case ${OCF_RESKEY_self_fence} in + "yes") self_fence=1 ;; + 1) self_fence=1 ;; + *) self_fence="" ;; + esac + + if [ -z "$action" ] || [ -z "$tag" ] || [ -z "$lv_path" ]; then + ocf_log err "Supplied args: 1) $action, 2) $tag, 3) $lv_path" + return $OCF_ERR_ARGS + fi + + if [ "$action" == "start" ]; then + ocf_log notice "Activating $lv_path" + lvchange --addtag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to add tag to $lv_path" + return $OCF_ERR_GENERIC + fi + + if ! lv_activate_resilient $action $lv_path; then + ocf_log err "Unable to activate $lv_path" + return $OCF_ERR_GENERIC + fi + else + ocf_log notice "Deactivating $lv_path" + if ! lv_activate_resilient $action $lv_path; then + if [ "$self_fence" ]; then + ocf_log err "Unable to deactivate $lv_path: REBOOTING" + sync + reboot -fn + else + ocf_log err "Unable to deactivate $lv_path" + fi + return $OCF_ERR_GENERIC + fi + + # Only try to remove tag if it is our tag + if [ "`lvs --noheadings -o lv_tags $lv_path | tr -d ' '`" == $tag ]; then + ocf_log notice "Removing ownership tag ($tag) from $lv_path" + lvchange --deltag $tag $lv_path + if [ $? -ne 0 ]; then + ocf_log err "Unable to delete tag from $lv_path" + + # Newer versions of LVM require the missing PVs to + # be removed from the VG via a separate call before + # the tag can be removed. + ocf_log err "Attempting volume group clean-up and retry" + vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name + + # Retry tag deletion + lvchange --deltag $tag $lv_path + if [ $? -ne 0 ]; then + if [ "$self_fence" ]; then + ocf_log err "Failed to delete tag from $lv_path: REBOOTING" + sync + reboot -fn + else + ocf_log err "Failed to delete tag from $lv_path" + fi + return $OCF_ERR_GENERIC + fi + fi + fi + fi + + return $OCF_SUCCESS +} + +# lv_activate +# $1: start/stop only +# +# Basically, if we want to [de]activate an LVM volume, +# we must own it. That means that our tag must be on it. +# This requires a change to /etc/lvm/lvm.conf: +# volume_list = [ "root_volume", "@my_hostname" ] +# where "root_volume" is your root volume group and +# "my_hostname" is $(local_node_name) +# +# If there is a node failure, we may wish to "steal" the +# LV. For that, we need to check if the node that owns +# it is still part of the cluster. We use the tag to +# determine who owns the volume then query for their +# liveness. If they are dead, we can steal. +lv_activate() +{ + declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + declare owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` + declare my_name=$(local_node_name) + local owned + + lv_owner $my_name $owner + owned=$? + if [ $owned -eq 0 ]; then + ocf_log info "Someone else owns this logical volume" + return $OCF_ERR_GENERIC + fi + + # If this is a partial VG, attempt to + # restore any transiently failed PVs + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ ...p ]]; then + ocf_log err "Volume group \"$OCF_RESKEY_vg_name\" has PVs marked as missing" + restore_transient_failed_pvs + fi + + if [ ! -z "$owner" ] && [ $owned -eq 2 ]; then + steal_tag $owner $lv_path + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed to $1 $lv_path" + + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if vgreduce --removemissing --mirrorsonly --force --config \ + "activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \ + $OCF_RESKEY_vg_name; then + ocf_log notice "$OCF_RESKEY_vg_name now consistent" + owner=`lvs -o tags --noheadings $lv_path | tr -d ' '` + + lv_owner $my_name $owner + owned=$? + if [ ! -z "$owner" ] && [ $owned -eq 2 ]; then + steal_tag $owner $lv_path + ret=$? + if [ $ret -ne $OCF_SUCCESS ]; then + return $ret + fi + elif [ $owned -eq 0 ]; then + ocf_log info "Someone else owns this logical volume" + return $OCF_ERR_GENERIC + fi + + if ! lv_activate_and_tag $1 $my_name $lv_path; then + ocf_log err "Failed second attempt to $1 $lv_path" + return $OCF_ERR_GENERIC + else + ocf_log notice "Second attempt to $1 $lv_path successful" + return $OCF_SUCCESS + fi + else + ocf_log err "Failed to $1 $lv_path" + return $OCF_ERR_GENERIC + fi + fi + return $OCF_SUCCESS +} + +function lv_start_clustered +{ + if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + return $OCF_SUCCESS + fi + + # FAILED exclusive activation: + # This can be caused by an LV being active remotely. + # Before attempting a repair effort, we should attempt + # to deactivate the LV cluster-wide; but only if the LV + # is not open. Otherwise, it is senseless to attempt. + if ! [[ "$(lvs -o attr --noheadings $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name)" =~ ....ao ]]; then + # We'll wait a small amount of time for some settling before + # attempting to deactivate. Then the deactivate will be + # immediately followed by another exclusive activation attempt. + sleep 5 + if ! lvchange -an $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + # Someone could have the device open. + # We can't do anything about that. + ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name before starting" + return $OCF_ERR_GENERIC + fi + + if lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + # Second attempt after deactivation was successful, we now + # have the lock exclusively + return $OCF_SUCCESS + fi + fi + + # Failed to activate: + # This could be due to a device failure (or another machine could + # have snuck in between the deactivation/activation). We don't yet + # have a mechanism to check for remote activation, so we will proceed + # with repair action. + ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + + if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC + fi + + if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" + return $OCF_SUCCESS +} + +function lv_start_single +{ + if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then + lv_count=0 + else + lv_count=`lvs --noheadings -o name $OCF_RESKEY_vg_name | grep -v _mlog | grep -v _mimage | grep -v nconsistent | wc -l` + fi + if [ $lv_count -gt 1 ]; then + ocf_log err "HA LVM requires Only one logical volume per volume group." + ocf_log err "There are currently $lv_count logical volumes in $OCF_RESKEY_vg_name" + ocf_log err "Failing HA LVM start of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + exit $OCF_ERR_GENERIC + fi + + if ! lv_activate start; then + return 1 + fi + + return 0 +} + +function lv_start +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + lv_start_clustered + else + lv_start_single + fi +} + +function lv_stop_clustered +{ + lvchange -aln $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name +} + +function lv_stop_single +{ + if ! lv_activate stop; then + return 1 + fi + + return 0 +} + +function lv_stop +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + lv_stop_clustered + else + lv_stop_single + fi +} diff --git a/rgmanager/src/resources/lvm_by_vg.sh.in b/rgmanager/src/resources/lvm_by_vg.sh.in new file mode 100644 index 0000000..4f40545 --- /dev/null +++ b/rgmanager/src/resources/lvm_by_vg.sh.in @@ -0,0 +1,529 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# vg_owner +# +# Returns: +# 1 == We are the owner +# 2 == We can claim it +# 0 == Owned by someone else +function vg_owner +{ + local owner=`vgs -o tags --noheadings $OCF_RESKEY_vg_name | tr -d ' '` + local my_name=$(local_node_name) + + if [ -z "$my_name" ]; then + ocf_log err "Unable to determine cluster node name" + return 0 + fi + + if [ -z "$owner" ]; then + # No-one owns this VG yet, so we can claim it + return 2 + fi + + if [ $owner != $my_name ]; then + if is_node_member_clustat $owner ; then + ocf_log err " $owner owns $OCF_RESKEY_vg_name and is still a cluster member" + return 0 + fi + return 2 + fi + + return 1 +} + +restore_transient_failed_pvs() +{ + local a=0 + local -a results + + results=(`pvs -o name,vg_name,attr --noheadings | grep $OCF_RESKEY_vg_name | grep -v 'unknown device'`) + while [ ! -z "${results[$a]}" ] ; do + if [[ ${results[$(($a + 2))]} =~ ..m ]] && + [ $OCF_RESKEY_vg_name == ${results[$(($a + 1))]} ]; then + ocf_log notice "Attempting to restore missing PV, ${results[$a]} in $OCF_RESKEY_vg_name" + vgextend --restoremissing $OCF_RESKEY_vg_name ${results[$a]} + if [ $? -ne 0 ]; then + ocf_log notice "Failed to restore ${results[$a]}" + else + ocf_log notice " ${results[$a]} restored" + fi + fi + a=$(($a + 3)) + done +} + +function strip_tags +{ + local i + + for i in `vgs --noheadings -o tags $OCF_RESKEY_vg_name | sed s/","/" "/g`; do + ocf_log info "Stripping tag, $i" + + # LVM version 2.02.98 allows changing tags if PARTIAL + vgchange --deltag $i $OCF_RESKEY_vg_name + done + + if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_vg_name | tr -d ' '` ]; then + ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +function strip_and_add_tag +{ + if ! strip_tags; then + ocf_log err "Failed to remove tags from volume group, $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + vgchange --addtag $(local_node_name) $OCF_RESKEY_vg_name + if [ $? -ne 0 ]; then + ocf_log err "Failed to add ownership tag to $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ocf_log info "New tag \"$(local_node_name)\" added to $OCF_RESKEY_vg_name" + + return $OCF_SUCCESS +} + +function vg_status_clustered +{ + return $OCF_SUCCESS +} + +# vg_status +# +# Are all the LVs active? +function vg_status_single +{ + local i + local dev + local my_name=$(local_node_name) + + # + # Check that all LVs are active + # + for i in `lvs $OCF_RESKEY_vg_name --noheadings -o attr`; do + if [[ ! $i =~ ....a. ]]; then + return $OCF_NOT_RUNNING + fi + done + + # + # Check if all links/device nodes are present + # + for i in `lvs $OCF_RESKEY_vg_name --noheadings -o name`; do + dev="/dev/$OCF_RESKEY_vg_name/$i" + + if [ -h $dev ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + ocf_log err "Failed to follow link, $dev" + return $OCF_ERR_GENERIC + fi + + if [ ! -b $realdev ]; then + ocf_log err "Device node for $dev is not present" + return $OCF_ERR_GENERIC + fi + else + ocf_log err "Symbolic link for $lv_path is not present" + return $OCF_ERR_GENERIC + fi + done + + # + # Verify that we are the correct owner + # + vg_owner + if [ $? -ne 1 ]; then + ocf_log err "WARNING: $OCF_RESKEY_vg_name should not be active" + ocf_log err "WARNING: $my_name does not own $OCF_RESKEY_vg_name" + ocf_log err "WARNING: Attempting shutdown of $OCF_RESKEY_vg_name" + + # FIXME: may need more force to shut this down + vgchange -an $OCF_RESKEY_vg_name + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +## +# Main status function for volume groups +## +function vg_status +{ + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + vg_status_clustered + else + vg_status_single + fi +} + +function vg_verify +{ + # Anything to verify? + return $OCF_SUCCESS +} + +function vg_start_clustered +{ + local a + local results + local all_pvs + local resilience + local try_again=false + + ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" + + if ! vgchange -aey $OCF_RESKEY_vg_name; then + try_again=true + + # Failure to activate: + # This could be caused by a remotely active LV. Before + # attempting any repair of the VG, we will first attempt + # to deactivate the VG cluster-wide. + # We must check for open LVs though, since these cannot + # be deactivated. We have no choice but to go one-by-one. + + # Allow for some settling + sleep 5 + + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z "${results[$a]}" ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....ao ]]; then + if ! lvchange -an $OCF_RESKEY_vg_name/${results[$a]}; then + ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_vg_name before starting" + return $OCF_ERR_GENERIC + fi + fi + a=$(($a + 2)) + done + fi + + if $try_again && ! vgchange -aey $OCF_RESKEY_vg_name; then + ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if ! vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name; then + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + + if ! vgchange -aey $OCF_RESKEY_vg_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" + return $OCF_SUCCESS + else + # The activation commands succeeded, but did they do anything? + # Make sure all the logical volumes are active + results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) + a=0 + while [ ! -z "${results[$a]}" ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + all_pvs=(`pvs --noheadings -o name 2> /dev/null`) + resilience=" --config devices{filter=[" + for i in ${all_pvs[*]}; do + resilience=$resilience'"a|'$i'|",' + done + resilience=$resilience"\"r|.*|\"]}" + + vgchange -aey $OCF_RESKEY_vg_name $resilience + break + fi + a=$(($a + 2)) + done + + # We need to check the LVs again if we made the command resilient + if [ ! -z "$resilience" ]; then + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + ocf_log err "Failed to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" + fi + fi + + return $OCF_SUCCESS +} + +function vg_start_single +{ + local a + local results + local all_pvs + local resilience + + ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" + + vg_owner + case $? in + 0) + ocf_log info "Someone else owns this volume group" + return $OCF_ERR_GENERIC + ;; + 1) + ocf_log info "I own this volume group" + ;; + 2) + ocf_log info "I can claim this volume group" + ;; + esac + + if ! strip_and_add_tag; then + # Errors printed by sub-function + return $OCF_ERR_GENERIC + fi + + if ! vgchange -ay $OCF_RESKEY_vg_name; then + ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" + ocf_log err "Attempting activation of logical volumes one-by-one." + + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ${results[$(($a + 1))]} =~ r.......p ]] || + [[ ${results[$(($a + 1))]} =~ R.......p ]]; then + # Attempt "partial" activation of any RAID LVs + ocf_log err "Attempting partial activation of ${OCF_RESKEY_vg_name}/${results[$a]}" + if ! lvchange -ay --partial ${OCF_RESKEY_vg_name}/${results[$a]}; then + ocf_log err "Failed attempt to activate ${OCF_RESKEY_vg_name}/${results[$a]} in partial mode" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Activation of ${OCF_RESKEY_vg_name}/${results[$a]} in partial mode succeeded" + elif [[ ${results[$(($a + 1))]} =~ m.......p ]] || + [[ ${results[$(($a + 1))]} =~ M.......p ]]; then + ocf_log err "Attempting repair and activation of ${OCF_RESKEY_vg_name}/${results[$a]}" + if ! lvconvert --repair --use-policies ${OCF_RESKEY_vg_name}/${results[$a]}; then + ocf_log err "Failed to repair ${OCF_RESKEY_vg_name}/${results[$a]}" + return $OCF_ERR_GENERIC + fi + if ! lvchange -ay ${OCF_RESKEY_vg_name}/${results[$a]}; then + ocf_log err "Failed to activate ${OCF_RESKEY_vg_name}/${results[$a]}" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Repair and activation of ${OCF_RESKEY_vg_name}/${results[$a]} succeeded" + else + ocf_log err "Attempting activation of non-redundant LV ${OCF_RESKEY_vg_name}/${results[$a]}" + if ! lvchange -ay ${OCF_RESKEY_vg_name}/${results[$a]}; then + ocf_log err "Failed to activate ${OCF_RESKEY_vg_name}/${results[$a]}" + return $OCF_ERR_GENERIC + fi + ocf_log notice "Successfully activated non-redundant LV ${OCF_RESKEY_vg_name}/${results[$a]}" + fi + a=$(($a + 2)) + done + + return $OCF_SUCCESS + else + # The activation commands succeeded, but did they do anything? + # Make sure all the logical volumes are active + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + all_pvs=(`pvs --noheadings -o name 2> /dev/null`) + resilience=" --config devices{filter=[" + for i in ${all_pvs[*]}; do + resilience=$resilience'"a|'$i'|",' + done + resilience=$resilience"\"r|.*|\"]}" + + vgchange -ay $OCF_RESKEY_vg_name $resilience + break + fi + a=$(($a + 2)) + done + + # We need to check the LVs again if we made the command resilient + if [ ! -z "$resilience" ]; then + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + ocf_log err "Failed to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" + fi + fi + + return $OCF_SUCCESS +} + +## +# Main start function for volume groups +## +function vg_start +{ + local a=0 + local results + + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ ...p ]]; then + ocf_log err "Volume group \"$OCF_RESKEY_vg_name\" has PVs marked as missing" + restore_transient_failed_pvs + fi + + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + vg_start_clustered + else + vg_start_single + fi +} + +function vg_stop_clustered +{ + local a + local results + typeset self_fence="" + + case ${OCF_RESKEY_self_fence} in + "yes") self_fence=1 ;; + 1) self_fence=1 ;; + *) self_fence="" ;; + esac + + # Shut down the volume group + # Do we need to make this resilient? + a=0 + while ! vgchange -aln $OCF_RESKEY_vg_name; do + a=$(($a + 1)) + if [ $a -gt 10 ]; then + break; + fi + ocf_log err "Unable to deactivate $OCF_RESKEY_vg_name, retrying($a)" + sleep 1 + which udevadm >& /dev/null && udevadm settle + done + + # Make sure all the logical volumes are inactive + active=0 + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then + active=1 + break + fi + a=$(($a + 2)) + done + + # lvs may not show active volumes if all PVs in VG are gone + dmsetup table | grep -q "^${OCF_RESKEY_vg_name//-/--}-[^-]" + if [ $? -eq 0 ]; then + active=1 + fi + + if [ $active -ne 0 ]; then + if [ "$self_fence" ]; then + ocf_log err "Unable to deactivate $lv_path REBOOT" + sync + reboot -fn + else + ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" + fi + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +function vg_stop_single +{ + local a + local results + typeset self_fence="" + + case ${OCF_RESKEY_self_fence} in + "yes") self_fence=1 ;; + 1) self_fence=1 ;; + *) self_fence="" ;; + esac + + # Shut down the volume group + # Do we need to make this resilient? + vgchange -an $OCF_RESKEY_vg_name + + # Make sure all the logical volumes are inactive + active=0 + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then + active=1 + break + fi + a=$(($a + 2)) + done + + # lvs may not show active volumes if all PVs in VG are gone + dmsetup table | grep -q "^${OCF_RESKEY_vg_name//-/--}-[^-]" + if [ $? -eq 0 ]; then + active=1 + fi + + if [ $active -ne 0 ]; then + if [ "$self_fence" ]; then + ocf_log err "Unable to deactivate $lv_path REBOOT" + sync + reboot -fn + else + ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" + fi + return $OCF_ERR_GENERIC + fi + + # Make sure we are the owner before we strip the tags + vg_owner + if [ $? -eq 1 ]; then + strip_tags + fi + + return $OCF_SUCCESS +} + +## +# Main stop function for volume groups +## +function vg_stop +{ + if [[ "$(vgs -o attr --noheadings $OCF_RESKEY_vg_name)" =~ .....c ]]; then + vg_stop_clustered + else + vg_stop_single + fi +} diff --git a/rgmanager/src/resources/mysql.metadata b/rgmanager/src/resources/mysql.metadata new file mode 100644 index 0000000..d3ade15 --- /dev/null +++ b/rgmanager/src/resources/mysql.metadata @@ -0,0 +1,107 @@ +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent version="rgmanager 2.0" name="mysql"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of MySQL database server + </longdesc> + <shortdesc lang="en"> + Defines a MySQL database server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Define a name + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define configuration file + </longdesc> + <shortdesc lang="en"> + Define configuration file + </shortdesc> + <content type="string" default="/etc/my.cnf"/> + </parameter> + + <parameter name="listen_address"> + <longdesc lang="en"> + Define an IP address for MySQL + </longdesc> + <shortdesc lang="en"> + Define an IP address for MySQL server. If the address + is not given then first IP address from the service is taken. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="mysqld_options"> + <longdesc lang="en"> + Other command-line options for mysqld + </longdesc> + <shortdesc lang="en"> + Other command-line options for mysqld + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="startup_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service startup + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service startup + </shortdesc> + <content type="integer" default="30" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/mysql.sh.in b/rgmanager/src/resources/mysql.sh.in new file mode 100644 index 0000000..a69cd80 --- /dev/null +++ b/rgmanager/src/resources/mysql.sh.in @@ -0,0 +1,232 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare MYSQL_MYSQLD=/usr/bin/mysqld_safe +declare MYSQL_ipAddress +declare MYSQL_pid_file="`generate_name_for_pid_file`" + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ -z "$MYSQL_pid_file" ]; then + clog_service_verify $CLOG_FAILED "Invalid name of PID file" + return $OCF_ERR_ARGS + fi + + clog_service_verify $CLOG_SUCCEED + return 0 +} + +start() +{ + declare username="" + + if status; then + ocf_log info "Starting Service $OCF_RESOURCE_INSTANCE > Already running" + return $OCF_SUCCESS + fi + + clog_service_start $CLOG_INIT + + # Pull out the user name from the options argument if it is set. + # We need this to properly set the pidfile permissions + if [ -n "$OCF_RESKEY_mysqld_options" ]; then + username=$(echo "$OCF_RESKEY_mysqld_options" | sed -n -e 's/^.*--user=\(\S*\)[[:space:]]*.*$/\1/p;s/^.*-u[[:space:]]*\(\S*\)[[:space:]]*.*$/\1/p') + fi + + create_pid_directory "$username" + check_pid_file "$MYSQL_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$MYSQL_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + if [ -n "$OCF_RESKEY_listen_address" ]; then + MYSQL_ipAddress="$OCF_RESKEY_listen_address" + else + clog_looking_for $CLOG_INIT "IP Address" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -n "$ip_addresses" ]; then + for i in $ip_addresses; do + MYSQL_ipAddress="$i" + break; + done + else + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Address" + fi + fi + + clog_looking_for $CLOG_SUCCEED "IP Address" + + $MYSQL_MYSQLD --defaults-file="$OCF_RESKEY_config_file" \ + --pid-file="$MYSQL_pid_file" \ + --bind-address="$MYSQL_ipAddress" \ + $OCF_RESKEY_mysqld_options > /dev/null 2>&1 & + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + + # Sleep 1 sec before checking status so mysqld can start + sleep 1 + + status + + return $?; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$MYSQL_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$MYSQL_pid_file" + case $? in + $OCF_NOT_RUNNING) + ps auxww | grep -Pv "grep|$MYSQL_MYSQLD" | grep "$MYSQL_pid_file" &> /dev/null + if [ "$?" -eq "0" ];then + declare i=$OCF_RESKEY_startup_wait + while [ "$i" -gt 0 ]; do + if [ -f "$MYSQL_pid_file" ]; then + break; + fi + sleep 1 + let i=$i-1 + done + + if [ "$i" -eq 0 ]; then + clog_service_start $CLOG_FAILED_TIMEOUT + return $OCF_ERR_GENERIC + else + clog_service_status $CLOG_SUCCEED + exit 0 + fi + fi + + clog_service_status $CLOG_FAILED "$MYSQL_pid_file" + return $OCF_NOT_RUNNING + ;; + 0) + clog_service_status $CLOG_SUCCEED + exit 0 + ;; + *) + clog_service_status $CLOG_FAILED "$MYSQL_pid_file" + return $OCF_ERR_GENERIC + ;; + esac + + + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$MYSQL_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/named.metadata b/rgmanager/src/resources/named.metadata new file mode 100644 index 0000000..548d03b --- /dev/null +++ b/rgmanager/src/resources/named.metadata @@ -0,0 +1,116 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="named"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of DNS named server + </longdesc> + <shortdesc lang="en"> + Defines an instance of named server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Specifies a service name for logging and other purposes + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/etc/named.conf"/> + </parameter> + + <parameter name="named_sdb"> + <longdesc lang="en"> + Simplified Database Backend + </longdesc> + <shortdesc lang="en"> + Simplified Database Backend + </shortdesc> + <content type="boolean" default="0"/> + </parameter> + + <parameter name="named_working_dir"> + <longdesc lang="en"> + Other command-line options for named + </longdesc> + <shortdesc lang="en"> + Other command-line options for named + </shortdesc> + <content type="string" default="/var/named" /> + </parameter> + + <parameter name="named_options"> + <longdesc lang="en"> + Other command-line options for named + </longdesc> + <shortdesc lang="en"> + Other command-line options for named + </shortdesc> + <content type="string" /> + </parameter> + + + <parameter name="update_source"> + <longdesc lang="en"> + Update 'notify-source', 'transfer-source', and 'query-source' to match + the cluster ip associated with this service. + </longdesc> + <shortdesc lang="en"> + Use cluster ip as source. + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" default="5" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/named.sh.in b/rgmanager/src/resources/named.sh.in new file mode 100644 index 0000000..5e1ef35 --- /dev/null +++ b/rgmanager/src/resources/named.sh.in @@ -0,0 +1,224 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare NAMED_NAMED=/usr/sbin/named +declare NAMED_pid_file="`generate_name_for_pid_file`" +declare NAMED_conf_dir="`generate_name_for_conf_dir`" +declare NAMED_gen_config_file="$NAMED_conf_dir/named.conf" +declare NAMED_url_list +declare NAMED_parse_config=$(dirname $0)/utils/named-parse-config.pl +declare NAMED_update_src="false" + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ -n "$OCF_RESKEY_update_source" ]; then + NAMED_update_src=$OCF_RESKEY_update_source + fi + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + declare ip_address="$3" + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + generate_configTemplate "$generated_file" "$1" + cat "$original_file" | grep -v "^[[:space:]]*listen-on" | \ + grep -v "^[[:space:]]*pid-file" | \ + grep -v "^[[:space:]]*directory" >> "$generated_file" + + declare tmp_file=`mktemp -t cluster.XXXXXXXXXX` + mv "$generated_file" "$tmp_file" + + "$NAMED_parse_config" "$OCF_RESKEY_named_working_dir" "$NAMED_pid_file" "$ip_address" "$NAMED_update_src"\ + < "$tmp_file" > "$generated_file" + + rm "$tmp_file" + sha1_addToFile "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + declare ip_list; + declare username="" + + clog_service_start $CLOG_INIT + + # Pull out the user name from the options argument if it is set. + # We need this to properly set the pidfile permissions + if [ -n "$OCF_RESKEY_named_options" ]; then + username=$(echo "$OCF_RESKEY_named_options" | sed -n -e 's/^.*-u[[:space:]]*\(\S*\)[[:space:]]*.*$/\1/p') + fi + + create_pid_directory "$username" + create_conf_directory "$NAMED_conf_dir" + check_pid_file "$NAMED_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$NAMED_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + ip_list=`echo $ip_addresses | sed 's/ /;/;s/\([[:digit:]]\)$/\1;/' ` + + if [ -z "$ip_list" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + [ -x /sbin/portrelease ] && /sbin/portrelease named &>/dev/null + + generate_config_file "$OCF_RESKEY_config_file" "$NAMED_gen_config_file" "$ip_list" + + $NAMED_NAMED -c "$NAMED_gen_config_file" $OCF_RESKEY_named_options + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$NAMED_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$NAMED_pid_file" + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$NAMED_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/netfs.sh.in b/rgmanager/src/resources/netfs.sh.in new file mode 100644 index 0000000..5b75f9d --- /dev/null +++ b/rgmanager/src/resources/netfs.sh.in @@ -0,0 +1,488 @@ +#!@BASH_SHELL@ + +# +# NFS/CIFS file system mount/umount/etc. agent +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +. $(dirname $0)/utils/fs-lib.sh + +do_metadata() +{ + cat <<EOT +<?xml version="1.0" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="netfs" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an NFS/CIFS mount for use by cluster services. + </longdesc> + <shortdesc lang="en"> + Defines an NFS/CIFS file system mount. + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Symbolic name for this file system. + </longdesc> + <shortdesc lang="en"> + File System Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="mountpoint" unique="1" required="1"> + <longdesc lang="en"> + Path in file system heirarchy to mount this file system. + </longdesc> + <shortdesc lang="en"> + Mount Point + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="host" required="1"> + <longdesc lang="en"> + Server IP address or hostname + </longdesc> + <shortdesc lang="en"> + IP or Host + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="export" required="1"> + <longdesc lang="en"> + NFS Export directory name or CIFS share + </longdesc> + <shortdesc lang="en"> + Export + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="fstype"> + <longdesc lang="en"> + File System type (nfs, nfs4 or cifs) + </longdesc> + <shortdesc lang="en"> + File System Type + </shortdesc> + <content type="string" default="nfs"/> + </parameter> + + <parameter name="no_unmount" required="0"> + <longdesc lang="en"> + Do not unmount the filesystem during a stop or relocation operation + </longdesc> + <shortdesc lang="en"> + Skip unmount opration + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="force_unmount"> + <longdesc lang="en"> + If set, the cluster will kill all processes using + this file system when the resource group is + stopped. Otherwise, the unmount will fail, and + the resource group will be restarted. + </longdesc> + <shortdesc lang="en"> + Force Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="self_fence"> + <longdesc lang="en"> + If set and unmounting the file system fails, the node will + immediately reboot. Generally, this is used in conjunction + with force_unmount support, but it is not required. + </longdesc> + <shortdesc lang="en"> + Seppuku Unmount + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="options"> + <longdesc lang="en"> + Provides a list of mount options. If none are specified, + the NFS file system is mounted -o sync. + </longdesc> + <shortdesc lang="en"> + Mount Options + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="use_findmnt"> + <longdesc lang="en"> + Use findmnt to determine if and where a filesystem is mounted. + Disabling this uses the failback method (should be used if autofs + maps are located on network storage (ie. nfs, iscsi, etc). + </longdesc> + <shortdesc lang="en"> + Utilize findmnt to detect if and where filesystems are mounted + </shortdesc> + <content type="boolean"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="30"/> + <!-- Recovery isn't possible; we don't know if resources are using + the file system. --> + + <!-- Checks to see if it's mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <!-- Checks to see if we can write to the mountpoint (if !ROFS) --> + <action name="status" depth="20" timeout="30" interval="10m"/> + <action name="monitor" depth="20" timeout="30" interval="10m"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + </actions> + + <special tag="rgmanager"> + <child type="nfsexport" forbid="1"/> + <child type="nfsclient" forbid="1"/> + </special> +</resource-agent> +EOT +} + + +verify_host() +{ + if [ -z "$OCF_RESKEY_host" ]; then + ocf_log err "No server hostname or IP address specified." + return 1 + fi + + host $OCF_RESKEY_host 2>&1 | grep -vq "not found" + if [ $? -eq 0 ]; then + return 0 + fi + + ocf_log err "Hostname or IP address \"$OCF_RESKEY_host\" not valid" + + return $OCF_ERR_ARGS +} + + +verify_fstype() +{ + # Auto detect? + [ -z "$OCF_RESKEY_fstype" ] && return 0 + + case $OCF_RESKEY_fstype in + nfs|nfs4|cifs) + return 0 + ;; + *) + ocf_log err "File system type $OCF_RESKEY_fstype not supported" + return $OCF_ERR_ARGS + ;; + esac +} + + +verify_options() +{ + declare -i ret=0 + + # + # From mount(1) + # + for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do + case $o in + async|atime|auto|defaults|dev|exec|_netdev|noatime) + continue + ;; + noauto|nodev|noexec|nosuid|nouser|ro|rw|suid|sync) + continue + ;; + dirsync|user|users) + continue + ;; + esac + + case $OCF_RESKEY_fstype in + cifs) + continue + ;; + nfs|nfs4) + case $o in + # + # NFS / NFS4 common + # + rsize=*|wsize=*|timeo=*|retrans=*|acregmin=*) + continue + ;; + acregmax=*|acdirmin=*|acdirmax=*|actimeo=*) + continue + ;; + retry=*|port=*|bg|fg|soft|hard|intr|cto|ac|noac) + continue + ;; + esac + + # + # NFS v2/v3 only + # + if [ "$OCF_RESKEY_fstype" = "nfs" ]; then + case $o in + mountport=*|mounthost=*) + continue + ;; + mountprog=*|mountvers=*|nfsprog=*|nfsvers=*) + continue + ;; + namelen=*) + continue + ;; + tcp|udp|lock|nolock) + continue + ;; + esac + fi + + # + # NFS4 only + # + if [ "$OCF_RESKEY_fstype" = "nfs4" ]; then + case $o in + proto=*|clientaddr=*|sec=*) + continue + ;; + esac + fi + + ;; + esac + + ocf_log err "Option $o not supported for $OCF_RESKEY_fstype" + ret=$OCF_ERR_ARGS + done + + return $ret +} + + +do_validate() +{ + verify_name || return $OCF_ERR_ARGS + verify_fstype|| return $OCF_ERR_ARGS + verify_host || return $OCF_ERR_ARGS + verify_mountpoint || return $OCF_ERR_ARGS + verify_options || return $OCF_ERR_ARGS + # verify_target || return $OCF_ERR_ARGS +} + + +# +# Override real_device to use fs-lib's functions for start/stop_filesystem +# +real_device() { + export REAL_DEVICE="$1" +} + + +# +# do_mount - nfs / cifs are mounted differently than blockdevs +# +do_mount() { + declare opts="" + declare mount_options="" + declare ret_val + declare mp="$OCF_RESKEY_mountpoint" + + # + # Get the filesystem type, if specified. + # + fstype_option="" + fstype=${OCF_RESKEY_fstype} + case "$fstype" in + ""|"[ ]*") + fstype="" + ;; + *) # found it + fstype_option="-t $fstype" + ;; + esac + + # + # Get the mount options, if they exist. + # + mount_options="" + opts=${OCF_RESKEY_options} + case "$opts" in + ""|"[ ]*") + opts="" + ;; + *) # found it + mount_options="-o $opts" + ;; + esac + + case $OCF_RESKEY_fstype in + nfs|nfs4) + mount -t $OCF_RESKEY_fstype $mount_options $OCF_RESKEY_host:"$OCF_RESKEY_export" "$mp" + ;; + cifs) + mount -t $OCF_RESKEY_fstype $mount_options //$OCF_RESKEY_host/"$OCF_RESKEY_export" "$mp" + ;; + esac + + ret_val=$? + if [ $ret_val -ne 0 ]; then + ocf_log err "\ +'mount $fstype_option $mount_options $OCF_RESKEY_host:$OCF_RESKEY_export $mp' failed, error=$ret_val" + return 1 + fi + + return 0 +} + +do_nfs_rpc_check() { + # see man nfs TRANSPORT PROTOCOL section for defaults + local nfsproto=tcp + local nfsmountproto=udp + + # follow the same logic as mount.nfs option parser. + # the rightmost option wins over previous ones, so don't break when + # we find something. + + for o in $(echo ${OCF_RESKEY_options} | sed -e s/,/\ /g); do + if echo $o | grep -q "^proto=" ; then + nfsproto="$(echo $o | cut -d "=" -f 2)" + fi + if echo $o | grep -q "^mountproto=" ; then + nfsmountproto="$(echo $o | cut -d "=" -f 2)" + fi + case $o in + tcp) nfsproto=tcp;; + udp) nfsproto=udp;; + rdma) nfsproto=rdma;; + esac + done + + ocf_log debug "Testing generic rpc access on server ${OCF_RESKEY_host} with protocol $nfsproto" + if ! rpcinfo -T $nfsproto ${OCF_RESKEY_host} > /dev/null 2>&1; then + ocf_log alert "RPC server on ${OCF_RESKEY_host} with $nfsproto is not responding" + return 1 + fi + + ocf_log debug "Testing nfs rcp access on server ${OCF_RESKEY_host} with protocol $nfsproto" + if ! rpcinfo -T $nfsproto ${OCF_RESKEY_host} nfs > /dev/null 2>&1; then + ocf_log alert "NFS server on ${OCF_RESKEY_host} with $nfsproto is not responding" + return 1 + fi + + if [ "$OCF_RESKEY_fstype" = nfs ]; then + ocf_log debug "Testing mountd rpc access on server ${OCF_RESKEY_host} with protocol $nfsmountproto" + if ! rpcinfo -T $nfsmountproto ${OCF_RESKEY_host} mountd; then + ocf_log alert "MOUNTD server on ${OCF_RESKEY_host} with $nfsmountproto is not responding" + return 1 + fi + fi + + return 0 +} + +do_pre_unmount() { + case $OCF_RESKEY_fstype in + nfs|nfs4) + if [ "$self_fence" != $YES ]; then + ocf_log debug "Skipping pre unmount checks: self_fence is disabled" + return 0 + fi + + is_mounted "$dev" "$mp" + case $? in + $NO) + ocf_log debug "Skipping pre unmount checks: device is not mounted" + return 0 + ;; + esac + + ocf_log info "pre unmount: checking if nfs server ${OCF_RESKEY_host} is alive" + if ! do_nfs_rpc_check; then + ocf_log alert "NFS server not responding - REBOOTING" + sleep 2 + reboot -fn + fi + ;; + esac + + return 0 +} + +do_force_unmount() { + case $OCF_RESKEY_fstype in + nfs|nfs4) + ocf_log warning "Calling 'umount -f $mp'" + umount -f "$OCF_RESKEY_mountpoint" + return $? + ;; + *) + ;; + esac + + return 1 # Returning 1 lets stop_filesystem do add'l checks +} + + +populate_defaults() +{ + case $OCF_RESKEY_fstype in + nfs|nfs4) + export OCF_RESKEY_device="$OCF_RESKEY_host:$OCF_RESKEY_export" + if [ -z "$OCF_RESKEY_options" ]; then + export OCF_RESKEY_options=sync,soft,noac + fi + ;; + cifs) + export OCF_RESKEY_device="//$OCF_RESKEY_host/$OCF_RESKEY_export" + if [ -z "$OCF_RESKEY_options" ]; then + export OCF_RESKEY_options=guest + fi + ;; + esac +} + + +# +# Main... +# +populate_defaults +main $* diff --git a/rgmanager/src/resources/nfsclient.sh.in b/rgmanager/src/resources/nfsclient.sh.in new file mode 100755 index 0000000..ef68dbe --- /dev/null +++ b/rgmanager/src/resources/nfsclient.sh.in @@ -0,0 +1,479 @@ +#!@BASH_SHELL@ + +# +# NFS Export Client handler agent script +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs + +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent version="rgmanager 2.0" name="nfsclient"> + <version>1.0</version> + + <longdesc lang="en"> + This defines how a machine or group of machines may access + an NFS export on the cluster. IP addresses, IP wildcards, + hostnames, hostname wildcards, and netgroups are supported. + </longdesc> + <shortdesc lang="en"> + Defines an NFS client. + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + This is a symbolic name of a client used to reference + it in the resource tree. This is NOT the same thing + as the target option. + </longdesc> + <shortdesc lang="en"> + Client Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="target" required="1"> + <longdesc lang="en"> + This is either a hostname, a wildcard (IP address or + hostname based), or a netgroup to which defining a + host or hosts to export to. + </longdesc> + <shortdesc lang="en"> + Target Hostname, Wildcard, or Netgroup + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="path" inherit="path"> + <longdesc lang="en"> + This is the path to export to the target. This + field is generally left blank, as it inherits the + path from the parent export. + </longdesc> + <shortdesc lang="en"> + Path to Export + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="svcname" inherit="service%name"> + <longdesc lang="en"> + </longdesc> + <shortdesc lang="en"> + </shortdesc> + <content type="string"/> + </parameter> + + + <parameter name="fsid" inherit="fsid"> + <longdesc lang="en"> + File system ID inherited from the parent nfsexport/ + clusterfs/fs resource. Putting fsid in the options + field will override this. + </longdesc> + <shortdesc lang="en"> + File system ID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="options"> + <longdesc lang="en">Defines a list of options for this + particular client. See 'man 5 exports' for a list + of available options. + </longdesc> + <shortdesc lang="en"> + Export Options + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="allow_recover"> + <longdesc lang="en"> + Allows recovery of this NFS client (default = 1) if it + disappears from the export list. If set to 0, the service + will be restarted. This is useful to help preserve export + ordering. + </longdesc> + <shortdesc lang="en"> + Allow recovery + </shortdesc> + <content type="boolean"/> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Service this NFS export belongs to. Used for caching + exports on a per-service basis. + </longdesc> + <shortdesc lang="en"> + Service Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="use_cache" inherit="service%nfs_client_cache"> + <longdesc lang="en"> + On systems with large numbers of exports, a performance + problem in the exportfs command can cause inordinately long + status check times for services with lots of mounted + NFS clients. This occurs because exportfs does DNS queries + on all clients in the export list. + + Setting this option to '1' will enable caching of the export + list returned from the exportfs command on a per-service + basis. The cache will last for 30 seconds before expiring + instead of being generated each time an nfsclient resource + is called. + </longdesc> + <shortdesc lang="en"> + Enable exportfs list caching + </shortdesc> + <content type="integer"/> + </parameter> + + + </parameters> + + <actions> + <action name="start" timeout="90"/> + <action name="stop" timeout="5"/> + <action name="recover" timeout="90"/> + + <!-- Checks to see if the export exists in /var/lib/nfs/etab --> + <action name="status" timeout="5" interval="1m"/> + <action name="monitor" timeout="5" interval="1m"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="30"/> + </actions> + +</resource-agent> +EOT +} + + +verify_options() +{ + declare o + declare -i ret=0 + + [ -z "$OCF_RESKEY_options" ] && return 0 + + # + # From exports(5) + # + for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do + case $o in + fsid=*) + ocf_log debug "Using designated $o instead of fsid=$OCF_RESKEY_fsid" + unset OCF_RESKEY_fsid + ;; + secure) + ;; + insecure) + ;; + sec=*) + ;; + rw) + ;; + ro) + ;; + async) + ;; + sync) + ;; + wdelay) + ;; + no_wdelay) + ;; + hide) + ;; + nohide) + ;; + subtree_check) + ;; + no_subtree_check) + ;; + secure_locks) + ;; + insecure_locks) + ;; + auth_nlm) + ;; + no_auth_nlm) + ;; + mountpoint=*) + ;; + mp=*) + ;; + root_squash) + ;; + no_root_squash) + ;; + all_squash) + ;; + no_all_squash) + ;; + anonuid=*) + ;; + anongid=*) + ;; + *) + ocf_log err "Export Option $o invalid" + ret=$OCF_ERR_ARGS + ;; + esac + done + + return $ret +} + + +verify_target() +{ + # XXX need to add wildcards, hostname, ip, etc. + [ -n "$OCF_RESKEY_target" ] && return 0 + + return $OCF_ERR_ARGS +} + + +verify_path() +{ + if [ -z "$OCF_RESKEY_path" ]; then + ocf_log err "No export path specified." + return $OCF_ERR_ARGS + fi + + OCF_RESKEY_path="${OCF_RESKEY_path%/}" + + [ -d "$OCF_RESKEY_path" ] && return 0 + + ocf_log err "$OCF_RESKEY_path is not a directory" + + return $OCF_ERR_ARGS +} + + +verify_type() +{ + [ -z "$OCF_RESKEY_type" ] && return 0 + [ "$OCF_RESKEY_type" = "nfs" ] && return 0 + + ocf_log err "Export type $OCF_RESKEY_type not supported yet" + return $OCF_ERR_ARGS +} + + +verify_all() +{ + declare -i ret=0 + + verify_type || ret=$OCF_ERR_ARGS + verify_options || ret=$OCF_ERR_ARGS + verify_target || ret=$OCF_ERR_ARGS + verify_path || ret=$OCF_ERR_ARGS + + return $ret +} + + +case $1 in +start) + declare option_str + + verify_all || exit $OCF_ERR_ARGS + + # + # XXX + # Bad: Side-effect of verify_options: unset OCF_RESKEY_fsid if + # fsid is specified in the options string. + # + if [ -z "$OCF_RESKEY_options" ] && [ -n "$OCF_RESKEY_fsid" ]; then + option_str="fsid=$OCF_RESKEY_fsid" + elif [ -n "$OCF_RESKEY_options" ] && [ -z "$OCF_RESKEY_fsid" ]; then + option_str="$OCF_RESKEY_options" + elif [ -n "$OCF_RESKEY_fsid" ] && [ -n "$OCF_RESKEY_options" ]; then + option_str="fsid=$OCF_RESKEY_fsid,$OCF_RESKEY_options" + fi + + if [ -z "$option_str" ]; then + ocf_log info "Adding export: ${OCF_RESKEY_target}:${OCF_RESKEY_path}" + exportfs -i "${OCF_RESKEY_target}:${OCF_RESKEY_path}" + rv=$? + else + ocf_log info "Adding export: ${OCF_RESKEY_target}:${OCF_RESKEY_path} ($option_str)" + exportfs -i -o $option_str "${OCF_RESKEY_target}:${OCF_RESKEY_path}" + rv=$? + fi + ;; + +stop) + verify_all || exit $OCF_ERR_ARGS + + $0 status + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Removing export: ${OCF_RESKEY_target}:${OCF_RESKEY_path}" + exportfs -u "${OCF_RESKEY_target}:${OCF_RESKEY_path}" + rv=$? + else + # If status check fails then there is no matching nfs export for + # this resource. + rv=0 + fi + ;; + +status|monitor) + verify_all || exit $OCF_ERR_ARGS + + if [ "${OCF_RESKEY_target}" = "*" ]; then + export OCF_RESKEY_target="\<world\>" + fi + + # + # Status check fix from Birger Wathne: + # * Exports longer than 14 chars have line breaks inserted, which + # broke the way the status check worked. + # + # Status check fix from Craig Lewis: + # * Exports with RegExp metacharacters need to be escaped. + # These metacharacters are: * ? . + # + export OCF_RESKEY_target_regexp=$(echo $OCF_RESKEY_target | \ + sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') + + declare tmpfn + declare time_created time_now + declare -i delta=0 + + # + # Don't let anyone read the cache files. + # + umask 066 + + mkdir -p /var/cache/cluster + + if [ -n "$OCF_RESKEY_service_name" ] && [ "$OCF_RESKEY_use_cache" = "1" ]; then + + # + # For large #s of exports, we need to cache the information + # + tmpfn=/var/cache/cluster/nfsclient-status-cache-$OCF_RESKEY_service_name + + if [ -f "$tmpfn" ]; then + time_created=$(stat -c "%Y" $tmpfn) + time_now=$(date +"%s") + delta=$((time_now-time_created)) + fi + #echo "Cache age = $delta seconds" + else + delta=100 + # + # Create a different file if this is a separate instance + # + tmpfn=/var/cache/cluster/nfsclient-status-cache-$$ + fi + + if ! [ -f "$tmpfn" ] || [ $delta -gt 30 ]; then + #echo "Create $tmpfn. Nonexistent / expired / no service name" + exportfs -v > $tmpfn + fi + + cat $tmpfn | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -Piq \ + "^${OCF_RESKEY_path}[\t ]+${OCF_RESKEY_target_regexp}" + rv=$? + + if [ $rv -eq 0 ]; then + [ "$OCF_RESKEY_use_cache" = "1" ] || rm -f $tmpfn + exit 0 + fi + + declare OCF_RESKEY_target_tmp + OCF_RESKEY_target_tmp=$(clufindhostname -i "$OCF_RESKEY_target") + if [ $? -ne 0 ]; then + OCF_RESKEY_target_tmp=$(clufindhostname -n "$OCF_RESKEY_target") + if [ $? -ne 0 ]; then + [ "$OCF_RESKEY_use_cache" = "1" ] || rm -f $tmpfn + ocf_log err "nfsclient:$OCF_RESKEY_name is missing!" + exit 1 + fi + fi + + cat $tmpfn | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -Pq \ + "^${OCF_RESKEY_path}[\t ]+${OCF_RESKEY_target_tmp}" + rv=$? + + [ "$OCF_RESKEY_use_cache" = "1" ] || rm -f $tmpfn + if [ $rv -eq 0 ]; then + exit 0 + fi + + ocf_log err "nfsclient:$OCF_RESKEY_name is missing!" + exit $OCF_NOT_RUNNING + ;; + +recover) + if [ "$OCF_RESKEY_allow_recover" = "0" ] || \ + [ "$OCF_RESKEY_allow_recover" = "no" ] || \ + [ "$OCF_RESKEY_allow_recover" = "false" ]; then + exit 1 + fi + + $0 stop || exit 1 + $0 start || exit 1 + ;; + +restart) + # + # Recover might better be "exportfs -r" - reexport + # + $0 stop || exit 1 + $0 start || exit 1 + ;; + +meta-data) + meta_data + exit 0 + ;; + +validate-all) + verify_all + rv=$? + ;; + +*) + echo "usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + rv=$OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $rv diff --git a/rgmanager/src/resources/nfsexport.sh.in b/rgmanager/src/resources/nfsexport.sh.in new file mode 100644 index 0000000..26084a2 --- /dev/null +++ b/rgmanager/src/resources/nfsexport.sh.in @@ -0,0 +1,256 @@ +#!@BASH_SHELL@ + +# +# NFS Export Script. Handles starting/stopping clurmtabd and doing +# the strange NFS stuff to get it to fail over properly. +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs + + +rmtabpid="" +nfsop_arg="" +rv=0 + +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<resource-agent name="nfsexport" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an NFS export path. Generally, these are + defined inline and implicitly; you should not have to + configure one of these. All of the relevant information + is inherited from the parent. + </longdesc> + + <shortdesc lang="en"> + This defines an NFS export. + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Descriptive name for this export. Generally, only + one export is ever defined, and it's called "generic + nfs export". + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="device" inherit="device"> + <longdesc lang="en"> + If you can see this, your GUI is broken. + </longdesc> + <shortdesc lang="en"> + If you can see this, your GUI is broken. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="path" inherit="mountpoint"> + <longdesc lang="en"> + If you can see this, your GUI is broken. + </longdesc> + <shortdesc lang="en"> + If you can see this, your GUI is broken. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="fsid" inherit="fsid"> + <longdesc lang="en"> + If you can see this, your GUI is broken. + </longdesc> + <shortdesc lang="en"> + If you can see this, your GUI is broken. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="5"/> + <action name="stop" timeout="5"/> + <action name="recover" timeout="5"/> + + <!-- NFS Exports really don't do anything except provide a path + for nfs clients. So, status and monitor are no-ops --> + <action name="status" timeout="5" interval="1h"/> + <action name="monitor" timeout="5" interval="1h"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="30"/> + </actions> + + <special tag="rgmanager"> + <child type="nfsexport" forbid="1"/> + <child type="nfsclient"/> + </special> + +</resource-agent> +EOT +} + + +verify_device() +{ + if [ -z "$OCF_RESKEY_device" ]; then + ocf_log err "No device or label specified." + return $OCF_ERR_ARGS + fi + + [ -b "$OCF_RESKEY_device" ] && return 0 + [ -b "`findfs $OCF_RESKEY_device`" ] && return 0 + + ocf_log err "Device or label \"$OCF_RESKEY_device\" not valid" + + return $OCF_ERR_ARGS +} + + +verify_path() +{ + if [ -z "$OCF_RESKEY_path" ]; then + ocf_log err "No export path specified." + return $OCF_ERR_ARGS + fi + + [ -d "$OCF_RESKEY_path" ] && return 0 + + ocf_log err "$OCF_RESKEY_path is not a directory" + + return $OCF_ERR_ARGS +} + + +verify_all() +{ + declare -i ret=0 + + verify_device || ret=$OCF_ERR_ARGS + verify_path || ret=$OCF_ERR_ARGS + + return $ret +} + + +# +# Check if the NFS daemons are running. +# +nfs_daemons_running() +{ + declare NFS_DAEMONS="nfsd rpc.mountd rpc.statd" + + for daemon in $NFS_DAEMONS; do + ps -ef | grep "$daemon" | grep -v grep >/dev/null 2>&1 + if [ $? -ne 0 ]; then + ocf_log err \ + "NFS daemon $daemon is not running." + ocf_log err \ + "Verify that the NFS service run level script is enabled." + return 1 + fi + done + + return 0 +} + + +nfs_check() +{ + declare junk + + if nfs_daemons_running; then + return 0 + fi + + # + # Don't restart daemons during status check. + # + if [ "$1" = "status" ]; then + return 1; + fi + + ocf_log err "Restarting NFS daemons" + # Note restart does less than stop/start + junk=$(/sbin/service nfslock stop) + junk=$(/sbin/service nfslock start) + junk=$(/sbin/service nfs stop) + junk=$(/sbin/service nfs start) + sleep 2 + + if ! nfs_daemons_running; then + ocf_log err "Failed restarting NFS daemons" + return 1 + fi + ocf_log notice "Successfully restarted NFS daemons" +} + + +case $1 in +start) + nfs_check start || exit 1 + rv=0 + ;; + +status|monitor) + nfs_check status || exit 1 + rv=0 + ;; + +stop) + nfs_check restart || exit 1 + rv=0 + ;; + +recover|restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + rv=0 + ;; + +meta-data) + meta_data + rv=0 + ;; + +validate-all) + verify_all + rv=$? + ;; +*) + echo "usage: $0 {start|status|monitor|stop|recover|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $rv diff --git a/rgmanager/src/resources/nfsserver.sh.in b/rgmanager/src/resources/nfsserver.sh.in new file mode 100644 index 0000000..e7130f0 --- /dev/null +++ b/rgmanager/src/resources/nfsserver.sh.in @@ -0,0 +1,611 @@ +#!@BASH_SHELL@ + +# +# NFS Server Script. Handles starting/stopping Servand doing +# the strange NFS stuff to get it to fail over properly. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin + +V4RECOVERY="/var/lib/nfs/v4recovery" +PROC_V4RECOVERY="/proc/fs/nfsd/nfsv4recoverydir" + +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs + +# SELinux information +which restorecon &> /dev/null && selinuxenabled &> /dev/null +export SELINUX_ENABLED=$? +if [ $SELINUX_ENABLED ]; then + export SELINUX_LABEL="$(ls -ldZ /var/lib/nfs/statd | cut -f4 -d' ')" +fi + +# strip trailing / off so pattern matching will work consistently. +while [ "${OCF_RESKEY_path#${OCF_RESKEY_path%?}}" = "/" ] +do + OCF_RESKEY_path="${OCF_RESKEY_path%/}" +done + +log_do() +{ + ocf_log debug $* + $* &> /dev/null + ret=$? + if [ $ret -ne 0 ]; then + ocf_log debug "Failed: $*" + fi + return $ret +} + + +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent name="nfsserver" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an NFS server resource. The NFS server + resource is useful for exporting NFSv4 file systems + to clients. Because of the way NFSv4 works, only + one NFSv4 resource may exist on a server at a + time. Additionally, it is not possible to use + the nfsserver resource when also using local instances + of NFS on each cluster node. + </longdesc> + + <shortdesc lang="en"> + This defines an NFS server resource. + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Descriptive name for this server. Generally, only + one server is ever defined per service. + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="path" inherit="mountpoint"> + <longdesc lang="en"> + This is the path you intend to export. Usually, this is + left blank, and the mountpoint of the parent file system + is used. This path is passed to nfsclient resources as + the export path when exportfs is called. + </longdesc> + <shortdesc lang="en"> + This is the path you intend to export. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="nfspath"> + <longdesc lang="en"> + This is the path containing shared NFS information which + is used for NFS recovery after a failover. This + is relative to the export path, and defaults to + ".clumanager/nfs". + </longdesc> + <shortdesc lang="en"> + This is the path containing shared NFS recovery + information, relative to the path parameter. + </shortdesc> + <content type="string" default=".clumanager/nfs"/> + </parameter> + + <parameter name="statdport"> + <longdesc lang="en"> + Specifies the port number used for RPC listener sockets. If + this option is not specified, rpc.statd chooses a random + ephemeral port for each listener socket. This option can be + used to fix the port value of its listeners when SM_NOTIFY + requests must traverse a firewall between + clients and servers. + </longdesc> + <shortdesc lang="en"> + This is the port where rpc.statd should listen on. + </shortdesc> + <content type="integer" default=""/> + </parameter> + + <parameter name="krbhost"> + <longdesc lang="en"> + This is the Kerberos hostname, which should be set according + to the floating IP. + </longdesc> + <shortdesc lang="en"> + This is the Kerberos hostname. + </shortdesc> + <content type="string"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="5"/> + <action name="stop" timeout="5"/> + <action name="recover" timeout="5"/> + + <action name="status" timeout="5" interval="30"/> + <action name="monitor" timeout="5" interval="30"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="30"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + <child type="nfsexport" forbid="1"/> + <child type="nfsserver" forbid="1"/> + <child type="nfsclient" start="1" stop="2"/> + <child type="ip" start="2" stop="1"/> + </special> + +</resource-agent> +EOT +} + + +verify_path() +{ + if [ -z "$OCF_RESKEY_path" ]; then + ocf_log err "No server path specified." + return $OCF_ERR_ARGS + fi + + [ -d "$OCF_RESKEY_path" ] && return 0 + + ocf_log err "$OCF_RESKEY_path is not a directory" + + return $OCF_ERR_ARGS +} + + +verify_nfspath() +{ + if [ -z "$OCF_RESKEY_nfspath" ]; then + echo No NFS data path specified. + return 1 + fi + + [ -d "$OCF_RESKEY_path" ] && return 0 + + # xxx do nothing for now. + return 0 +} + + +verify_statdport() +{ + if [ -z "$OCF_RESKEY_statdport" ]; then + # this is fine, statdport is optional + return 0 + fi + + [ $OCF_RESKEY_statdport -gt 0 && $OCF_RESKEY_statdport -le 65535 ] && return 0 + + ocf_log err "$OCF_RESKEY_statdport is not a valid port number" + + return $OCF_ERR_ARGS +} + + +verify_all() +{ + verify_path || return 1 + verify_nfspath || return 1 + verify_statdport || return 1 + + return 0 +} + + +nfs_daemons() +{ + declare oper + declare val + + case $1 in + start) + ocf_log info "Starting NFS daemons" + if [ -z "$OCF_RESKEY_krbhost" ]; then + /etc/init.d/nfs start + rv=$? + else + unshare -u @BASH_SHELL@ -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/nfs start" + rv=$? + unshare -u @BASH_SHELL@ -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/rpcgssd start" + if [ $rv -ne 0 ]; then + ocf_log err "Failed to start rpcgssd" + return $OCF_ERR_GENERIC + fi + unshare -u @BASH_SHELL@ -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/rpcidmapd start" + if [ $rv -ne 0 ]; then + ocf_log err "Failed to start rpcidmapd" + return $OCF_ERR_GENERIC + fi + fi + + if [ $rv -ne 0 ]; then + ocf_log err "Failed to start NFS daemons" + return $OCF_ERR_GENERIC + fi + + ocf_log debug "NFS daemons are running" + return $OCF_SUCCESS + ;; + stop) + ocf_log info "Stopping NFS daemons" + if [ -n "$OCF_RESKEY_krbhost"]; then + if ! /etc/init.d/rpcidmapd stop; then + ocf_log err "Failed to stop rpcidmapd" + return $OCF_ERR_GENERIC + fi + if ! /etc/init.d/rpcgssd stop; then + ocf_log err "Failed to stop rpcgssd" + return $OCF_ERR_GENERIC + fi + fi + + if ! /etc/init.d/nfs stop; then + ocf_log err "Failed to stop NFS daemons" + return $OCF_ERR_GENERIC + fi + + ocf_log debug "NFS daemons are stopped" + + return $OCF_SUCCESS + ;; + status|monitor) + declare recoverydir="$OCF_RESKEY_path/$OCF_RESKEY_nfspath/v4recovery" + val=$(cat $PROC_V4RECOVERY) + + [ "$val" = "$recoverydir" ] || ocf_log warning \ + "NFSv4 recovery directory is $val instead of $recoverydir" + /etc/init.d/nfs status + if [ $? -ne 0 ]; then + ocf_log err "NFS is not running" + return $OCF_NOT_RUNNING + fi + /etc/init.d/rpcgssd status + if [ $? -ne 0 ]; then + ocf_log err "rpcgssd is not running" + return $OCF_NOT_RUNNING + fi + /etc/init.d/rpcidmapd status + if [ $? -ne 0 ]; then + ocf_log err "rpcidmapd is not running" + return $OCF_NOT_RUNNING + fi + + ocf_log debug "NFS daemons are running" + return $OCF_SUCCESS + ;; + esac +} + + +create_tree() +{ + declare fp="$OCF_RESKEY_path/$OCF_RESKEY_nfspath" + + [ -d "$fp" ] || mkdir -p "$fp" + + [ -d "$fp/statd" ] || mkdir -p "$fp/statd" + [ -d "$fp/v4recovery" ] || mkdir -p "$fp/v4recovery" + + # + # Create our own private copy which we use for notifies. + # This way, we can be sure to advertise on possibly multiple + # IP addresses. + # + [ -d "$fp/statd/sm" ] || mkdir -p "$fp/statd/sm" + [ -d "$fp/statd/sm.bak" ] || mkdir -p "$fp/statd/sm.bak" + [ -d "$fp/statd/sm-ha" ] || mkdir -p "$fp/statd/sm-ha" + [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/statd" + + # Create if they don't exist + [ -f "$fp/etab" ] || touch "$fp/etab" + [ -f "$fp/xtab" ] || touch "$fp/xtab" + [ -f "$fp/rmtab" ] || touch "$fp/rmtab" + + [ $SELINUX_ENABLED ] && chcon -R "$SELINUX_LABEL" "$fp" + + # + # Generate a random state file. If this ends up being what a client + # already has in its list, that's bad, but the chances of this + # are small - and relocations should be rare. + # + dd if=/dev/urandom of=$fp/state bs=1 count=4 &> /dev/null + [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/state" +} + +setup_v4recovery() +{ + declare recoverydir="$OCF_RESKEY_path/$OCF_RESKEY_nfspath/v4recovery" + + # mounts /proc/fs/nfsd for us + lsmod | grep -q nfsd + if [ $? -ne 0 ]; then + modprobe nfsd + fi + + val=$(cat "$PROC_V4RECOVERY") + + # Ensure start-after-start works + if [ "$val" = "$recoverydir" ]; then + return 0 + fi + + # + # If the value is not default, there may be another + # cluster service here already which has replaced + # the v4 recovery directory. In that case, + # we must refuse to go any further. + # + if [ "$val" != "$V4RECOVERY" ]; then + ocf_log err "NFSv4 recovery directory has an unexpected value: $val" + return 1 + fi + + # + # Redirect nfs v4 recovery dir to shared storage + # + echo "$recoverydir" > "$PROC_V4RECOVERY" + if [ $? -ne 0 ]; then + echo "Uh oh... echo failed!?" + fi + + val="$(cat $PROC_V4RECOVERY)" + if [ "$val" != "$recoverydir" ]; then + ocf_log err "Failed to change NFSv4 recovery path" + ocf_log err "Wanted: $recoverydir; got $val" + return 1 + fi + + return 0 +} + + +cleanup_v4recovery() +{ + # + # Restore nfsv4 recovery directory to default + # + echo "$V4RECOVERY" > "$PROC_V4RECOVERY" + return $? +} + + +is_bound() +{ + mount | grep -q "$1 on $2 type none (.*bind.*)" + return $? +} + + +setup_tree() +{ + declare fp="$OCF_RESKEY_path/$OCF_RESKEY_nfspath" + + if is_bound $fp/statd /var/lib/nfs/statd; then + ocf_log debug "$fp is already bound to /var/lib/nfs/statd" + return 0 + fi + + mount -o bind "$fp/statd" /var/lib/nfs/statd + cp -a "$fp"/*tab /var/lib/nfs + [ $SELINUX_ENABLED ] && restorecon /var/lib/nfs +} + + +cleanup_tree() +{ + declare fp="$OCF_RESKEY_path/$OCF_RESKEY_nfspath" + + if is_bound "$fp/statd" /var/lib/nfs/statd; then + log_do umount /var/lib/nfs/statd || return 1 + else + ocf_log debug "$fp is not bound to /var/lib/nfs/statd" + fi + + cp -a /var/lib/nfs/*tab "$fp" + + return 0 +} + +start_locking() +{ + declare ret + declare statdport="" + [ -x /sbin/rpc.statd ] || return 1 + + # + # Synchronize these before starting statd + # + cp -f /var/lib/nfs/statd/sm-ha/* /var/lib/nfs/statd/sm 2> /dev/null + cp -f /var/lib/nfs/statd/sm/* /var/lib/nfs/statd/sm-ha 2> /dev/null + + if pidof rpc.statd &> /dev/null; then + ocf_log debug "rpc.statd is already running" + return 0 + fi + + if [ -n "$OCF_RESKEY_statdport" ]; then + statdport="-p $OCF_RESKEY_statdport" + fi + + # + # Set this resrouce script as the callout program. We are evil. + # In cases where we want to preserve lock information, this is needed + # because we can't do the "copy" that we do on the down-state... + # + ocf_log info "Starting rpc.statd" + rm -f /var/run/sm-notify.pid + rpc.statd -H $0 -d $statdport + ret=$? + if [ $ret -ne 0 ]; then + ocf_log err "Failed to start rpc.statd" + return $ret + fi + touch /var/lock/subsys/nfslock + return $ret +} + + +terminate() +{ + declare pids + declare i=0 + + while : ; do + pids=$(pidof $1) + [ -z "$pids" ] && return 0 + kill $pids + sleep 1 + ((i++)) + [ $i -gt 3 ] && return 1 + done +} + + +killkill() +{ + declare pids + declare i=0 + + while : ; do + pids=$(pidof $1) + [ -z "$pids" ] && return 0 + kill -9 $pids + sleep 1 + ((i++)) + [ $i -gt 3 ] && return 1 + done +} + +stop_process() +{ + declare process=$1 + + ocf_log info "Stopping $process" + if terminate $process; then + ocf_log debug "$process is stopped" + else + if killkill $process; then + ocf_log debug "$process is stopped" + else + ocf_log debug "Failed to stop $process" + return 1 + fi + fi + return 0 +} + +stop_locking() +{ + ret=0 + + # sm-notify can prevent umount of /var/lib/nfs/statd if + # it is still trying to notify unresponsive clients. + stop_process sm-notify + if [ $? -ne 0 ]; then + ret=1 + fi + + stop_process rpc.statd + if [ $? -ne 0 ]; then + ret=1 + fi + + return $ret +} + + +case $1 in +start) + # Check for and source configuration file + ocf_log info "Starting NFS Server $OCF_RESKEY_name" + create_tree || exit 1 + setup_tree || exit 1 + setup_v4recovery || exit 1 + + start_locking + nfs_daemons start + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Started NFS Server $OCF_RESKEY_name" + exit 0 + fi + + ocf_log err "Failed to start NFS Server $OCF_RESKEY_name" + exit $rv + ;; + +status|monitor) + nfs_daemons status + exit $? + ;; + +stop) + if ! nfs_daemons stop; then + ocf_log err "Failed to stop NFS Server $OCF_RESKEY_name" + exit $OCF_ERR_GENERIC + fi + + # Copy the current notify list into our private area + ocf_log debug "Copying sm files for future notification..." + rm -f /var/lib/nfs/statd/sm-ha/* &> /dev/null + cp -f /var/lib/nfs/statd/sm/* /var/lib/nfs/statd/sm-ha &> /dev/null + + stop_locking || exit 1 + cleanup_v4recovery + cleanup_tree || exit 1 + exit 0 + ;; + +add-client) + ocf_log debug "$0 $1 $2 $3" + touch /var/lib/nfs/statd/sm/$2 + touch /var/lib/nfs/statd/sm-ha/$2 + exit 0 + ;; + +del-client) + ocf_log debug "$0 $1 $2 $3" + touch /var/lib/nfs/statd/sm/$2 + rm -f /var/lib/nfs/statd/sm-ha/$2 + exit 0 + ;; + +recover|restart) + $0 stop || exit $OCF_ERR_GENERIC + $0 start || exit $OCF_ERR_GENERIC + exit 0 + ;; + +meta-data) + meta_data + exit 0 + ;; + +validate-all) + verify_all + exit $? + ;; +*) + echo "usage: $0 {start|stop|status|monitor|restart|recover|add-client|del-client|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit 0 diff --git a/rgmanager/src/resources/ocf-shellfuncs.in b/rgmanager/src/resources/ocf-shellfuncs.in new file mode 100644 index 0000000..e895f5d --- /dev/null +++ b/rgmanager/src/resources/ocf-shellfuncs.in @@ -0,0 +1,167 @@ +#!@BASH_SHELL@ +# +# $Id$ +# +# Common helper functions for the OCF Resource Agents supplied by +# heartbeat. +# +# Copyright (C) 2004 SUSE LINUX AG, Lars Marowsky-Bree. All Rights Reserved. +# + +# TODO: Some of this should probably split out into a generic OCF +# library for shell scripts, but for the time being, we'll just use it +# ourselves... +# + +# TODO wish-list: +# - Generic function for evaluating version numbers +# - Generic function(s) to extract stuff from our own meta-data +# - Logging function which automatically adds resource identifier etc +# prefixes +# TODO: Move more common functionality for OCF RAs here. +# +__SCRIPT_NAME=`basename $0` + +# lhh - determine if we're a dumb terminal +consoletype &> /dev/null +if [ $? -eq 1 ]; then + __SERIAL="yes" +fi + +__LOG_PID=$PPID +__LOG_NAME=$(basename $(readlink /proc/$PPID/exe)) + +pretty_echo() { + declare pretty + declare n="[0m" + declare __OCF_PRIO="$1" + shift + declare __OCF_MSG="$*" + + if [ -n "$__SERIAL" ]; then + echo "<$__OCF_PRIO> $__OCF_MSG" + return 0 + fi + + case $__OCF_PRIO in + emerg) pretty="[34;1;5m";; + alert) pretty="[34;1m";; + crit|critical) pretty="[34;1m";; + err|error) pretty="[0;34m";; + warn|warning) pretty="[35;1m";; + note|notice) pretty="[37;1m";; + info) pretty="[37;1m";; + debug|dbg) pretty="[0m";; + *) pretty="[37;1m";; + esac + + echo "$n<$pretty$__OCF_PRIO$n> [10G$__OCF_MSG" + return 0 +} + +__ocf_set_defaults() { + __OCF_ACTION="$1" + + # Return to sanity for the agents... + unset LANG + LC_ALL=C + export LC_ALL + + # TODO: Review whether we really should source this. Or rewrite + # to match some emerging helper function syntax...? This imports + # things which no OCF RA should be using... + + OCF_SUCCESS=0 + OCF_ERR_GENERIC=1 + OCF_ERR_ARGS=2 + OCF_ERR_UNIMPLEMENTED=3 + OCF_ERR_PERM=4 + OCF_ERR_INSTALLED=5 + OCF_ERR_CONFIGURED=6 + OCF_NOT_RUNNING=7 + + if [ -z "$OCF_RESOURCE_TYPE" ]; then + : ${OCF_RESOURCE_TYPE:=$__SCRIPT_NAME} + fi + + if [ -z "$OCF_RA_VERSION_MAJOR" ]; then + : We are being invoked as an init script. + : Fill in some things with reasonable values. + : ${OCF_RESOURCE_INSTANCE:="default"} + return 0 + fi + + if [ -z "$OCF_ROOT" ]; then + OCF_ROOT=$(dirname $0) + fi + if [ ! -d "$OCF_ROOT" ]; then + ocf_log err "OCF_ROOT points to non-directory $OCF_ROOT." + exit $OCF_ERR_GENERIC + fi + + # TODO: Anything else we should be setting and thus checking? + # There is nothing in this script which depends on the version + # of the API. TESTING THIS HERE IS A BUG. THIS SHOULD BE + # tested by the script that's invoked us. FIXME!! + if [ "x$OCF_RA_VERSION_MAJOR" != "x1" ]; then + ocf_log err "This script is OCF RA API 1.x compliant only!" + exit $OCF_ERR_UNIMPLEMENTED + fi + # TODO: Should the minor level really be a number and not rather + # a list of flags...? + # AlanR says -- absolutely not -- a list of flags is good for a list + # of implemented features, not a version compiliance + # perhaps some future version might have such a list of + # flags, but that would be _in addition to_ the minor version number + if [ -z "$OCF_RA_VERSION_MINOR" ]; then + ocf_log err "No OCF RA minor version set." + exit $OCF_ERR_UNIMPLEMENTED + fi + + if [ "x$__OCF_ACTION" = "xmeta-data" ]; then + OCF_RESOURCE_INSTANCE="undef" + fi + + if [ -z "$OCF_RESOURCE_INSTANCE" ]; then + ocf_log err "Need to tell us our resource instance name." + exit $OCF_ERR_ARGS + fi +} + + +ocf_log() { + # TODO: Revisit and implement internally. + if + [ $# -lt 2 ] + then + ocf_log err "Not enough arguments [$#] to ocf_log." + fi + + declare __OCF_PRIO="$1" + declare -i __OCF_PRIO_N + + shift + + declare __OCF_MSG="$*" + + case "${__OCF_PRIO}" in + emerg) __OCF_PRIO_N=0;; # Not in original ocf-shellfuncs + alert) __OCF_PRIO_N=1;; # Not in original ocf-shellfuncs + crit|critical) __OCF_PRIO_N=2;; + err|error) __OCF_PRIO_N=3;; + warn|warning) __OCF_PRIO_N=4;; + note|notice) __OCF_PRIO_N=5;; # Not in original ocf-shellfuncs + info) __OCF_PRIO_N=6;; + debug|dbg) __OCF_PRIO_N=7;; + *) __OCF_PRIO_N=5;; # Defaults to INFO + esac + + pretty_echo $__OCF_PRIO "$__OCF_MSG" + + if [ -z "`which clulog 2> /dev/null`" ]; then + return 0 + fi + clulog -m "$OCF_RESOURCE_TYPE" -s $__OCF_PRIO_N "$__OCF_MSG" +} + +__ocf_set_defaults "$@" diff --git a/rgmanager/src/resources/openldap.metadata b/rgmanager/src/resources/openldap.metadata new file mode 100644 index 0000000..9bd7193 --- /dev/null +++ b/rgmanager/src/resources/openldap.metadata @@ -0,0 +1,98 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="openldap"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of Open LDAP + </longdesc> + <shortdesc lang="en"> + Defines an Open LDAP server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Specifies a service name for logging and other purposes + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/etc/openldap/slapd.conf"/> + </parameter> + + <parameter name="url_list"> + <longdesc lang="en"> + Serve URL list. Default behaviour of URL list is changed and + enhanced. eg. ldap:/// won't bind all IP address on the + computer but to all IP addresses in service. Using + ldap://0:port/ will bind to all IP addresses for service on + given port. + </longdesc> + <shortdesc lang="en"> + URL list + </shortdesc> + <content type="string" default="ldap:///"/> + </parameter> + + <parameter name="slapd_options"> + <longdesc lang="en"> + Other command-line options for slapd + </longdesc> + <shortdesc lang="en"> + Other command-line options for slapd + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/openldap.sh.in b/rgmanager/src/resources/openldap.sh.in new file mode 100644 index 0000000..62377c6 --- /dev/null +++ b/rgmanager/src/resources/openldap.sh.in @@ -0,0 +1,227 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare LDAP_SLAPD=/usr/sbin/slapd +declare LDAP_pid_file="`generate_name_for_pid_file`" +declare LDAP_conf_dir="`generate_name_for_conf_dir`" +declare LDAP_gen_config_file="$LDAP_conf_dir/slapd.conf" +declare LDAP_url_list + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_url_list() +{ + declare ldap_url_source=$1 + declare ip_addresses=$2 + declare url_list + declare tmp; + + for u in $ldap_url_source; do + if [[ "$u" =~ ':///' ]]; then + for z in $ip_addresses; do + tmp=`echo $u | sed "s,://,://$z,"` + url_list="$url_list $tmp" + done + elif [[ "$u" =~ '://0:' ]]; then + for z in $ip_addresses; do + tmp=`echo $u | sed "s,://0:,://$z:,"` + url_list="$url_list $tmp" + done + else + url_list="$url_list $u" + fi + done + + echo $url_list +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + generate_configTemplate "$generated_file" "$1" + echo "pidfile \"$LDAP_pid_file\"" >> $generated_file + echo >> $generated_file + sed 's/^[[:space:]]*pidfile/### pidfile/i' < "$original_file" >> "$generated_file" + + sha1_addToFile "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + clog_service_start $CLOG_INIT + + create_pid_directory + create_conf_directory "$LDAP_conf_dir" + check_pid_file "$LDAP_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$LDAP_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + LDAP_url_list=`generate_url_list "$OCF_RESKEY_url_list" "$ip_addresses"` + + if [ -z "$LDAP_url_list" ]; then + ocf_log error "Generating URL List for $OCF_RESOURCE_INSTANCE > Failed" + return $OCF_ERR_GENERIC + fi + + generate_config_file "$OCF_RESKEY_config_file" "$LDAP_gen_config_file" + + $LDAP_SLAPD -f "$LDAP_gen_config_file" -n "$OCF_RESOURCE_INSTANCE" \ + -h "$LDAP_url_list" $OCF_RESKEY_slapd_options + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$LDAP_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$LDAP_pid_file" + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$LDAP_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/oracledb.sh.in b/rgmanager/src/resources/oracledb.sh.in new file mode 100644 index 0000000..0ff4d4a --- /dev/null +++ b/rgmanager/src/resources/oracledb.sh.in @@ -0,0 +1,1031 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Author(s): +# Hardy Merrill <hmerrill at redhat.com> +# Lon Hohberger <lhh at redhat.com> +# Michael Moon <Michael dot Moon at oracle.com> +# Ryan McCabe <rmccabe at redhat.com> +# +# NOTES: +# +# (1) You can comment out the LOCKFILE declaration below. This will prevent +# the need for this script to access anything outside of the ORACLE_HOME +# path. +# +# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and +# ORACLE_HOSTNAME to match your installation if not running from within +# rgmanager. +# +# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's +# home directory in non-clustered environments and /usr/share/cluster +# in rgmanager/Red Hat cluster environments. +# +# Oracle is a registered trademark of Oracle Corporation. +# Oracle9i is a trademark of Oracle Corporation. +# Oracle10g is a trademark of Oracle Corporation. +# Oracle11g is a trademark of Oracle Corporation. +# All other trademarks are property of their respective owners. +# + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +. /etc/init.d/functions + +declare SCRIPT="`basename $0`" +declare SCRIPTDIR="`dirname $0`" + +[ -n "$OCF_RESKEY_user" ] && ORACLE_USER=$OCF_RESKEY_user +[ -n "$OCF_RESKEY_home" ] && ORACLE_HOME=$OCF_RESKEY_home +[ -n "$OCF_RESKEY_name" ] && ORACLE_SID=$OCF_RESKEY_name +[ -n "$OCF_RESKEY_listener_name" ] && ORACLE_LISTENER=$OCF_RESKEY_listener_name +[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile +[ -n "$OCF_RESKEY_type" ] && ORACLE_TYPE=$OCF_RESKEY_type +[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost +[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin + +###################################################### +# Customize these to match your Oracle installation. # +###################################################### +# +# 1. Oracle user. Must be the same across all cluster members. In the event +# that this script is run by the super-user, it will automatically switch +# to the Oracle user and restart. Oracle needs to run as the Oracle +# user, not as root. +# +#[ -n "$ORACLE_USER" ] || ORACLE_USER=oracle + +# +# 2. Oracle home. This is set up during the installation phase of Oracle. +# From the perspective of the cluster, this is generally the mount point +# you intend to use as the mount point for your Oracle Infrastructure +# service. +# +#[ -n "$ORACLE_HOME" ] || ORACLE_HOME=/mnt/oracle/home + +# +# 3. This is your SID. This is set up during oracle installation as well. +# +#[ -n "$ORACLE_SID" ] || ORACLE_SID=orcl + +# +# 4. The oracle user probably doesn't have the permission to write to +# /var/lock/subsys, so use the user's home directory. +# +#[ -n "$LOCKFILE" ] || LOCKFILE="/home/$ORACLE_USER/.oracle-ias.lock" +[ -n "$LOCKFILE" ] || LOCKFILE="$ORACLE_HOME/.oracle-ias.lock" +#[ -n "$LOCKFILE" ] || LOCKFILE="/var/lock/subsys/oracle-ias" # Watch privileges + +# +# 5. Type of Oracle Database. Currently supported: 10g 10g-iAS(untested!) +# +[ -n "$ORACLE_TYPE" ] || ORACLE_TYPE="base-em" + +# +# 6. Oracle virtual hostname. This is the hostname you gave Oracle during +# installation. +# +#[ -n "$ORACLE_HOSTNAME" ] || ORACLE_HOSTNAME=svc0.foo.test.com + + + +########################################################################### +ORACLE_TYPE=`echo $ORACLE_TYPE | tr A-Z a-z` +export ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_TYPE +export ORACLE_HOSTNAME + + +########################## +# Set up paths we'll use. Not all are used by all the different types of +# Oracle installations +# +export LD_LIBRARY_PATH=$ORACLE_HOME/lib:$ORACLE_HOME/opmn/lib +export PATH=$ORACLE_HOME/bin:$ORACLE_HOME/opmn/bin:$ORACLE_HOME/dcm/bin:$PATH + +declare -i RESTART_RETRIES=0 +declare -r DB_PROCNAMES="pmon" +#declare -r DB_PROCNAMES="pmonXX" # testing +#declare -r DB_PROCNAMES="pmon smon dbw0 lgwr" + +declare -r LSNR_PROCNAME="tnslsnr" +#declare -r LSNR_PROCNAME="tnslsnrXX" # testing + +# clulog will not log messages when run by the oracle user. +# This is a hack to work around that. +if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then + ocf_log() { + prio=$1 + shift + logger -i -p daemon."$prio" -- "$*" + } +fi + +########################################################## +# (Hopefully) No user-serviceable parts below this line. # +########################################################## +meta_data() +{ + cat <<EOT +<?xml version="1.0" ?> +<resource-agent name="oracledb" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Oracle 10g/11g Failover Instance + </longdesc> + <shortdesc lang="en"> + Oracle 10g/11g Failover Instance + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Instance name (SID) of oracle instance + </longdesc> + <shortdesc lang="en"> + Oracle SID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="listener_name" unique="1"> + <longdesc lang="en"> + Oracle Listener Instance Name. If you have multiple + instances of Oracle running, it may be necessary to + have multiple listeners on the same machine with + different names. + </longdesc> + <shortdesc lang="en"> + Oracle Listener Instance Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="user" required="1"> + <longdesc lang="en"> + Oracle user name. This is the user name of the Oracle + user which the Oracle AS instance runs as. + </longdesc> + <shortdesc lang="en"> + Oracle User Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="home" required="1"> + <longdesc lang="en"> + This is the Oracle (application, not user) home directory. + This is configured when you install Oracle. + </longdesc> + <shortdesc lang="en"> + Oracle Home Directory + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="type" required="0"> + <longdesc lang="en"> + This is the Oracle installation type: + base - Database Instance and Listener only + base-11g - Oracle11g Database Instance and Listener Only + base-em (or 10g) - Database, Listener, Enterprise Manager, + and iSQL*Plus + base-em-11g - Database, Listener, Enterprise Manager dbconsole + ias (or 10g-ias) - Internet Application Server (Infrastructure) + </longdesc> + <shortdesc lang="en"> + Oracle Installation Type + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="vhost" required="0" unique="1"> + <longdesc lang="en"> + Virtual Hostname matching the installation hostname of + Oracle 10g. Note that during the start/stop of an oracledb + resource, your hostname will temporarily be changed to + this hostname. As such, it is recommended that oracledb + resources be instanced as part of an exclusive service only. + </longdesc> + <shortdesc lang="en"> + Virtual Hostname + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="tns_admin" required="0" unique="1"> + <longdesc lang="en"> + Full path to the directory that contains the Oracle + listener tnsnames.ora configuration file. The shell + variable TNS_ADMIN is set to the value provided. + </longdesc> + <shortdesc lang="en"> + Full path to the directory containing tnsnames.ora + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="90"/> + <action name="recover" timeout="990"/> + + <!-- Checks to see if it's mounted in the right place --> + <action name="status" timeout="10"/> + <action name="monitor" timeout="10"/> + + <action name="status" depth="10" timeout="30" interval="30"/> + <action name="monitor" depth="10" timeout="30" interval="30"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + </special> +</resource-agent> +EOT +} + +# +# Start Oracle9i/10g/11g (database portion) +# +start_db() +{ + declare -i rv + declare startup_cmd + declare startup_stdout + + ocf_log info "Starting Oracle DB $ORACLE_SID" + + # Set up our sqlplus script. Basically, we're trying to + # capture output in the hopes that it's useful in the case + # that something doesn't work properly. + startup_cmd="set heading off;\nstartup;\nquit;\n" + startup_stdout=$(echo -e "$startup_cmd" | sqlplus -S "/ as sysdba") + rv=$? + + # Dump output to syslog for debugging + ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd" + ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout" + + if [ $rv -ne 0 ]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see: + # ORA-.....: failure, we failed + # Troubleshooting: + # ORA-00845 - Try rm -f /dev/shm/ora_* + # ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba" + if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout" + return 1 + fi + + ocf_log info "Started Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Stop Oracle (database portion) +# +stop_db() +{ + declare stop_cmd + declare stop_stdout + declare -i rv + declare how_shutdown="$1" + + if [ -z "$1" ]; then + how_shutdown="immediate" + fi + + ocf_log info "Stopping Oracle DB $ORACLE_SID $how_shutdown" + + # Setup for Stop ... + stop_cmd="set heading off;\nshutdown $how_shutdown;\nquit;\n" + stop_stdout=$(echo -e "$stop_cmd" | sqlplus -S "/ as sysdba") + rv=$? + + # Log stdout of the stop command + ocf_log debug "[$ORACLE_SID] sent stop command $stop_cmd" + ocf_log debug "[$ORACLE_SID] got $stop_stdout" + + # sqlplus returned failure. We'll return failed to rhcs + if [ $rv -ne 0 ]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see 'ORA-' or 'failure' in stdout, we're done. + if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, errors in stdout" + return 1 + fi + + ocf_log info "Stopped Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Destroy any remaining processes with refs to $ORACLE_HOME +# +force_cleanup() +{ + declare pids + declare pid + + # Patch from Shane Bradley to fix 471266 + pids=`ps ax | grep "ora_.*_${ORACLE_SID}" | grep -v grep | awk '{print $1}'` + + ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing" + + for pid in $pids; do + kill -9 $pid + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Cleanup $ORACLE_SID Killed PID $pid" + else + ocf_log error "Cleanup $ORACLE_SID Kill PID $pid failed: $rv" + fi + done + + return 0 +} + + + +# +# Wait for oracle processes to exit. Time out after 60 seconds +# +exit_idle() +{ + declare -i n=0 + + ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..." + # grep -q "." keeps this loop going if the previous commands produce any stdout + while ps ax | grep "ora_.*_${ORACLE_SID}" | grep -v grep | awk '{print $1}' | grep -q "."; do + if [ $n -ge 90 ]; then + ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate" + force_cleanup + return 0 + fi + sleep 1 + ((n++)) + done + + ocf_log debug "All Oracle processes for $ORACLE_SID have terminated" + return 0 +} + + +# +# Get database background process status. Restart it if it failed and +# we have seen the lock file. +# +get_db_status() +{ + declare -i subsys_lock=$1 + declare -i i=0 + declare -i rv=0 + declare ora_procname + + for procname in $DB_PROCNAMES ; do + + ora_procname="ora_${procname}_${ORACLE_SID}" + + status $ora_procname + if [ $? -eq 0 ] ; then + # This one's okay; go to the next one. + continue + fi + + # + # We're not supposed to be running, and we are, + # in fact, not running... + # XXX only works when monitoring one db process; consider + # extending in future. + # + if [ $subsys_lock -ne 0 ]; then + return 3 + fi + + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + # this db process is down - stop and + # (re)start all ora_XXXX_$ORACLE_SID processes + ocf_log info "Restarting Oracle Database $ORACLE_SID" + stop_db immediate + if [ $? -ne 0 ] ; then + # stop failed - return 1 + ocf_log error "Error stopping Oracle Database $ORACLE_SID" + return 1 + fi + + start_db + if [ $? -eq 0 ] ; then + # ora_XXXX_$ORACLE_SID processes started + # successfully, so break out of the + # stop/start # 'for' loop + ocf_log info "Restarted Oracle Database $ORACLE_SID successfully" + break + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart Oracle Database $ORACLE_SID after $RESTART_RETRIES tries" + return 1 + fi + done + return 0 +} + + +# +# Get the status of the Oracle listener process +# +get_lsnr_status() +{ + declare -i subsys_lock=$1 + declare -i rv + + ocf_log debug "Checking status for listener $ORACLE_LISTENER" + lsnrctl status $ORACLE_LISTENER >& /dev/null + rv=$? + if [ $rv -eq 0 ] ; then + ocf_log debug "Listener $ORACLE_LISTENER is up" + return 0 # Listener is running fine + fi + + # We're not supposed to be running, and we are, + # in fact, not running. Return 3 + if [ $subsys_lock -ne 0 ]; then + ocf_log debug "Listener $ORACLE_LISTENER is stopped as expected" + return 3 + fi + + # Listener is NOT running (but should be) - try to restart + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + ocf_log info "Listener $ORACLE_LISTENER is down, attempting to restart" + lsnrctl start $ORACLE_LISTENER >& /dev/null + lsnrctl status $ORACLE_LISTENER >& /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "Listener $ORACLE_LISTENER was restarted successfully" + break # Listener was (re)started and is running fine + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart listener $ORACLE_LISTENER after $RESTART_RETRIES tries" + return 1 + fi + + lsnrctl_stdout=$(lsnrctl status $ORACLE_LISTENER) + rv=$? + if [ $rv -ne 0 ] ; then + ocf_log error "Starting listener $ORACLE_LISTENER failed: $rv output $lsnrctl_stdout" + return 1 # Problem restarting the Listener + fi + + ocf_log info "Listener $ORACLE_LISTENER started successfully" + return 0 # Success restarting the Listener +} + + +# +# usage: get_opmn_proc_status <ias-component> [process-type] +# +# Get the status of a specific OPMN-managed process. If process-type +# is not specified, assume the process-type is the same as the ias-component. +# If the lock-file exists (or no lock file is specified), try to restart +# the given process-type if it is not running. +# +get_opmn_proc_status() +{ + declare comp=$1 + declare opmntype=$2 + declare type_pretty + declare _pid _status + + [ -n "$comp" ] || return 1 + if [ -z "$opmntype" ]; then + opmntype=$comp + else + type_pretty=" [$opmntype]" + fi + + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + + _status=`opmnctl status | grep "^$comp " | grep " $opmntype " | cut -d '|' -f3,4 | sed -e 's/ //g' -e 's/|/ /g'` + + _pid=`echo $_status | cut -f1 -d' '` + _status=`echo $_status | cut -f2 -d' '` + if [ "${_status}" == "Alive" ] || [ "${_status}" == "Init" ]; then + if [ $i -lt $RESTART_RETRIES ] ; then + ocf_log info "$comp$type_pretty restarted" + fi + ocf_log info "$comp$type_pretty (pid $_pid) is running..." + break + else + ocf_log info "$comp$type_pretty is stopped" + + # + # Try to restart it, but don't worry if we fail. OPMN + # is supposed to handle restarting these anyway. + # + # If it's running and you tell OPMN to "start" it, + # you will get an error. + # + # If it's NOT running and you tell OPMN to "restart" + # it, you will also get an error. + # + opmnctl startproc process-type=$opmntype &> /dev/null + fi + done + + if [ $i -eq 0 ]; then + # restarts failed - return 1 (failure) + ocf_log error "Failed to restart OPMN process $comp" + return 1 + fi + + return 0 +} + + +# +# Get the status of the OPMN-managed processes. +# +get_opmn_status() +{ + declare -i subsys_lock=$1 + declare -i ct_errors=0 + + opmnctl status &> /dev/null + if [ $? -eq 2 ]; then + # + # OPMN not running?? + # + ocf_log info "OPMN is stopped" + + if [ $subsys_lock -eq 0 ]; then + # + # Don't handle full opmn-restart. XXX + # + return 1 + fi + + # That's okay, it's not supposed to be! + return 3 + fi + + # + # Print out the PIDs for everyone. + # + ocf_log info "OPMN is running..." + ocf_log info "opmn components:" + + # + # Check the OPMN-managed processes + # + get_opmn_proc_status OID || ((ct_errors++)) + get_opmn_proc_status HTTP_Server || ((ct_errors++)) + get_opmn_proc_status OC4J OC4J_SECURITY || ((ct_errors++)) + + # + # One or more OPMN-managed processes failed and could not be + # restarted. + # + if [ $ct_errors -ne 0 ]; then + ocf_log error "$ct_errors errors occurred while restarting OPMN-managed processes" + return 1 + fi + return 0 +} + + +# +# Helps us keep a running status so we know what our ultimate return +# code will be. Returns 1 if the $1 and $2 are not equivalent, otherwise +# returns $1. The return code is meant to be the next $1 when this is +# called, so, for example: +# +# update_status 0 <-- returns 0 +# update_status $? 0 <-- returns 0 +# update_status $? 3 <-- returns 1 (values different - error condition) +# update_status $? 1 <-- returns 1 (same, but happen to be error state!) +# +# update_status 3 +# update_status $? 3 <-- returns 3 +# +# (and so forth...) +# +update_status() +{ + declare -i old_status=$1 + declare -i new_status=$2 + + if [ -z "$2" ]; then + return $old_status + fi + + if [ $old_status -ne $new_status ]; then + return 1 + fi + + return $old_status +} + + +# +# Print an error message to the user and exit. +# +oops() +{ + ocf_log error "$ORACLE_SID: Fatal: $1 failed validation checks" + exit 1 +} + + +# +# Do some validation on the user-configurable stuff at the beginning of the +# script. +# +validation_checks() +{ + ocf_log debug "Validating configuration for $ORACLE_SID" + + # + # If the oracle user doesn't exist, we're done. + # + [ -n "$ORACLE_USER" ] || oops "ORACLE_USER" + id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER" + id -g $ORACLE_USER > /dev/null || oops "ORACLE_USER" + + # + # If the oracle home isn't a directory, we're done + # + [ -n "$ORACLE_HOME" ] || oops ORACLE_HOME + #[ -d "$ORACLE_HOME" ] || oops ORACLE_HOME + + # + # If the oracle SID is NULL, we're done + # + [ -n "$ORACLE_SID" ] || oops ORACLE_SID + + # + # If we don't know the type, we're done + # + if [ "$ORACLE_TYPE" = "base" ]; then + # Other names for base + ORACLE_TYPE="base" + elif [ "$ORACLE_TYPE" = "10g" ] || [ "$ORACLE_TYPE" = "base-em" ]; then + ORACLE_TYPE="base-em" + elif [ "$ORACLE_TYPE" = "10g-ias" ] || [ "$ORACLE_TYPE" = "ias" ]; then + ORACLE_TYPE="ias" + elif [ "$ORACLE_TYPE" = "11g" ] || [ "$ORACLE_TYPE" = "base-em-11g" ]; then + ORACLE_TYPE="base-em-11g" + elif [ "$ORACLE_TYPE" = "base-11g" ]; then + ORACLE_TYPE="base-11g" + else + oops "ORACLE_TYPE $ORACLE_TYPE" + fi + + # + # If the hostname is zero-length, fix it + # + [ -n "$ORACLE_HOSTNAME" ] || ORACLE_HOSTNAME=`hostname` + + # + # Super user? Automatically change UID and exec as oracle user. + # Oracle needs to be run as the Oracle user, not root! + # + if [ "`id -u`" = "0" ]; then + #echo "Restarting $0 as $ORACLE_USER." + # + # Breaks on RHEL5 + # exec sudo -u $ORACLE_USER $0 $* + # + su $ORACLE_USER -c "$0 $*" + exit $? + fi + + # + # If we're not root and not the Oracle user, we're done. + # + [ "`id -u`" = "`id -u $ORACLE_USER`" ] || oops "not ORACLE_USER after su" + [ "`id -g`" = "`id -g $ORACLE_USER`" ] || oops "not ORACLE_GROUP after su" + + # + # Go home. + # + cd "$ORACLE_HOME" + + ocf_log debug "Validation checks for $ORACLE_SID succeeded" + return 0 +} + + +# +# Start Oracle 9i/10g/11g Application Server Infrastructure +# +start_oracle() +{ + ocf_log info "Starting service $ORACLE_SID" + + start_db + rv=$? + if [ $rv -ne 0 ]; then + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + + ocf_log info "Starting listener $ORACLE_LISTENER" + lsnrctl_stdout=$(lsnrctl start $ORACLE_LISTENER) + rv=$? + if [ $rv -ne 0 ]; then + ocf_log debug "[$ORACLE_SID] Listener $ORACLE_LISTENER start returned $rv output $lsnrctl_stdout" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + + if [ "$ORACLE_TYPE" = "base-em" ]; then + ocf_log info "Starting iSQL*Plus for $ORACLE_SID" + isqlplusctl start + if [ $? -ne 0 ]; then + ocf_log error "iSQL*Plus startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + else + ocf_log info "iSQL*Plus startup for $ORACLE_SID succeeded" + fi + + ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID" + emctl start dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + else + ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded" + fi + elif [ "$ORACLE_TYPE" = "ias" ]; then + ocf_log info "Starting Oracle EM for $ORACLE_SID" + emctl start em + if [ $? -ne 0 ]; then + ocf_log error "Oracle EM startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + else + ocf_log info "Oracle EM startup for $ORACLE_SID succeeded" + fi + + ocf_log info "Starting iAS Infrastructure for $ORACLE_SID" + opmnctl startall + if [ $? -ne 0 ]; then + ocf_log error "iAS Infrastructure startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + else + ocf_log info "iAS Infrastructure startup for $ORACLE_SID succeeded" + fi + elif [ "$ORACLE_TYPE" = "base-em-11g" ]; then + ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID" + emctl start dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + else + ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded" + fi + fi + + if [ -n "$LOCKFILE" ]; then + touch "$LOCKFILE" + fi + + ocf_log info "Starting service $ORACLE_SID completed successfully" + return 0 +} + + +# +# Stop Oracle 9i/10g/11g Application Server Infrastructure +# +stop_oracle() +{ + ocf_log info "Stopping service $ORACLE_SID" + + if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then + ocf_log error "Oracle Listener Control is not available ($ORACLE_HOME not mounted?)" + return 0 + fi + + if [ "$ORACLE_TYPE" = "base-em" ]; then + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID" + emctl stop dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + else + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded" + fi + + ocf_log info "Stopping iSQL*Plus for $ORACLE_SID" + isqlplusctl stop + if [ $? -ne 0 ]; then + ocf_log error "Stopping iSQL*Plus for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + else + ocf_log info "Stopping iSQL*Plus for $ORACLE_SID succeeded" + fi + elif [ "$ORACLE_TYPE" = "ias" ]; then + ocf_log info "Stopping iAS Infrastructure for $ORACLE_SID" + opmnctl stopall + if [ $? -ne 0 ]; then + ocf_log error "Stopping iAS Infrastructure for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + else + ocf_log info "Stopping iAS Infrastructure for $ORACLE_SID succeeded" + fi + + ocf_log info "Stopping Oracle EM for $ORACLE_SID" + emctl stop em + if [ $? -ne 0 ]; then + ocf_log error "Stopping Oracle EM for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + else + ocf_log info "Stopping Oracle EM for $ORACLE_SID succeeded" + fi + elif [ "$ORACLE_TYPE" = "base-em-11g" ]; then + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID" + emctl stop dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + else + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded" + fi + fi + + stop_db immediate || stop_db abort + if [ $? -ne 0 ]; then + ocf_log error "Stopping service $ORACLE_SID failed" + return 1 + fi + + ocf_log info "Stopping listener $ORACLE_LISTENER for $ORACLE_SID" + lsnrctl_stdout=$(lsnrctl stop $ORACLE_LISTENER) + rv=$? + if [ $? -ne 0 ]; then + ocf_log error "Listener $ORACLE_LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout" + # XXX - failure? + fi + + exit_idle + if [ $? -ne 0 ]; then + ocf_log warning "WARNING: Not all Oracle processes exited cleanly for $ORACLE_SID" + fi + + if [ -n "$LOCKFILE" ]; then + rm -f "$LOCKFILE" + fi + + ocf_log info "Stopping service $ORACLE_SID succeeded" + return 0 +} + + +# +# Find and display the status of iAS infrastructure. +# +# This has three parts: +# (1) Oracle database itself +# (2) Oracle listener process +# (3) OPMN and OPMN-managed processes +# +# - If all are (cleanly) down, we return 3. In order for this to happen, +# $LOCKFILE must not exist. In this case, we try and restart certain parts +# of the service - as this may be running in a clustered environment. +# +# - If some but not all are running (and, if $LOCKFILE exists, we could not +# restart the failed portions), we return 1 (ERROR) +# +# - If all are running, return 0. In the "all-running" case, we recreate +# $LOCKFILE if it does not exist. +# +status_oracle() +{ + declare -i subsys_lock=1 + declare -i last + + ocf_log debug "Checking status for $ORACLE_SID depth $depth" + + # + # Check for lock file. Crude and rudimentary, but it works + # + if [ -z "$LOCKFILE" ] || [ -f "$LOCKFILE" ]; then + subsys_lock=0 + fi + + # Check database status + get_db_status $subsys_lock + update_status $? # Start + last=$? + + # Check & report listener status + get_lsnr_status $subsys_lock + update_status $? $last + last=$? + + if [ "$ORACLE_TYPE" = "base-em" ] || [ "$ORACLE_TYPE" = "base-em-11g" ]; then + # XXX Add isqlplus status check?! + emctl status dbconsole >&/dev/null + update_status $? $last + last=$? + elif [ "$ORACLE_TYPE" = "ias" ]; then + # Check & report opmn / opmn-managed process status + get_opmn_status $subsys_lock + update_status $? $last + last=$? + fi + + # + # No lock file, but everything's running. Put the lock + # file back. XXX - this kosher? + # + if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then + touch "$LOCKFILE" + fi + + ocf_log debug "Status returning $last for $ORACLE_SID" + return $last +} + + +######################## +# Do some real work... # +######################## +if [ "$1" = "meta-data" ]; then + meta_data + exit 0 +fi + +validation_checks $* + +case $1 in + start) + start_oracle + exit $? + ;; + stop) + stop_oracle + exit $? + ;; + status|monitor) + status_oracle + exit $? + ;; + restart) + $0 stop || exit $? + $0 start || exit $? + exit 0 + ;; + *) + echo "usage: $SCRIPT {start|stop|status|restart|meta-data}" + exit 1 + ;; +esac +exit 0 diff --git a/rgmanager/src/resources/oradg.metadata b/rgmanager/src/resources/oradg.metadata new file mode 100644 index 0000000..2f8f044 --- /dev/null +++ b/rgmanager/src/resources/oradg.metadata @@ -0,0 +1,107 @@ +<?xml version="1.0" ?> +<!-- $Id: oradg.metadata 58 2009-06-29 05:15:12Z hevirtan $ --> + +<!-- Resource metadata for Oracle DB agent --> +<resource-agent name="oradg" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Oracle Data Guard Failover Instance + </longdesc> + <shortdesc lang="en"> + Oracle Data Guard Failover Instance + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Instance name (SID) of oracle instance + </longdesc> + <shortdesc lang="en"> + Oracle SID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="user" required="1"> + <longdesc lang="en"> + Oracle user name. This is the user name of the Oracle + user which the Oracle instance runs as. + </longdesc> + <shortdesc lang="en"> + Oracle User Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="home" required="1"> + <longdesc lang="en"> + This is the Oracle database home directory. + This is configured when you install Oracle. + </longdesc> + <shortdesc lang="en"> + Oracle Home Directory + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="listeners"> + <longdesc lang="en"> + List of Oracle listeners which will be started with + the database instance. Listener names are separated by + whitespace. Defaults to empty which disables listeners. + </longdesc> + <shortdesc lang="en"> + Oracle listeners + </shortdesc> + <content type="string" default=""/> + </parameter> + + <parameter name="vhost"> + <longdesc lang="en"> + Virtual hostname for DB Console startup + </longdesc> + <shortdesc lang="en"> + Virtual hostname for DB Console + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="lockfile"> + <longdesc lang="en"> + Location for lockfile which will be used for checking if + the Oracle should be running or not. Defaults to location + under /tmp. + </longdesc> + <shortdesc lang="en"> + Pathname for lockfile + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="tns_admin" required="0" unique="1"> + <longdesc lang="en"> + Full path to the directory that contains the Oracle + listener tnsnames.ora configuration file. The shell + variable TNS_ADMIN is set to the value provided. + </longdesc> + <shortdesc lang="en"> + Full path to the directory containing tnsnames.ora + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="90"/> + + <!-- Note: status check will also perform recover + (Oracle DB restart) if the check fails --> + <action name="status" timeout="900" interval="1m"/> + <action name="monitor" timeout="900" interval="1m"/> + + <action name="meta-data" timeout="0"/> + <action name="verify-all" timeout="0"/> + </actions> +</resource-agent> diff --git a/rgmanager/src/resources/oradg.sh.in b/rgmanager/src/resources/oradg.sh.in new file mode 100644 index 0000000..b55233b --- /dev/null +++ b/rgmanager/src/resources/oradg.sh.in @@ -0,0 +1,660 @@ +#!@BASH_SHELL@ +# +# Copyright 2003-2004, 2006-2013 Red Hat, Inc. +# +# Author(s): +# Hardy Merrill <hmerrill at redhat.com> +# Lon Hohberger <lhh at redhat.com> +# Michael Moon <Michael dot Moon at oracle.com> +# Ryan McCabe <rmccabe at redhat.com> +# +# This program is Open Source software. You may modify and/or redistribute +# it persuant to the terms of the Open Software License version 2.1, which +# is available from the following URL and is included herein by reference: +# +# http://opensource.org/licenses/osl-2.1.php +# +# NOTES: +# +# (1) You can comment out the LOCKFILE declaration below. This will prevent +# the need for this script to access anything outside of the ORACLE_HOME +# path. +# +# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and +# ORACLE_HOSTNAME to match your installation if not running from within +# rgmanager. +# +# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's +# home directory in non-clustered environments and /usr/share/cluster +# in rgmanager/Red Hat cluster environments. +# +# Oracle is a registered trademark of Oracle Corporation. +# Oracle9i is a trademark of Oracle Corporation. +# Oracle10g is a trademark of Oracle Corporation. +# Oracle11g is a trademark of Oracle Corporation. +# All other trademarks are property of their respective owners. +# +# +# $Id: oradg.sh 127 2009-08-21 09:17:52Z hevirtan $ +# +# Original version is distributed with RHCS. The modifications include +# the following minor changes: +# - Meta-data moved to a dedicated file +# - Support for multiple listeners +# - Disabled EM +# - SysV init support removed. Only usable with rgmanager +# + +# Grab the global RHCS helper functions +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +. /etc/init.d/functions + +declare SCRIPT="`basename $0`" +declare SCRIPTDIR="`dirname $0`" + +# Required parameters from rgmanager +ORACLE_USER=$OCF_RESKEY_user +ORACLE_HOME=$OCF_RESKEY_home +ORACLE_SID=$OCF_RESKEY_name +[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin + +# Optional parameters with default values +LISTENERS=$OCF_RESKEY_listeners +LOCKFILE="$ORACLE_HOME/.orainstance-${ORACLE_SID}.lock" +[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost +[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile + +export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_HOSTNAME +export LD_LIBRARY_PATH=$ORACLE_HOME/lib +export PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin + +#declare -i RESTART_RETRIES=3 +declare -i RESTART_RETRIES=0 +declare -r DB_PROCNAMES="pmon" +declare -r LSNR_PROCNAME="tnslsnr" + +# clulog will not log messages when run by the oracle user. +# This is a hack to work around that. +if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then + ocf_log() { + prio=$1 + shift + logger -i -p daemon."$prio" -- "$*" + } +fi + +# +# Start Oracle (database portion) +# +start_db() { + declare -i rv + declare startup_cmd + declare startup_stdout + + ocf_log info "Starting Oracle DB $ORACLE_SID" + + # Set up our sqlplus script. Basically, we're trying to + # capture output in the hopes that it's useful in the case + # that something doesn't work properly. + + startup_stdout=$(sqlplus "/ as sysdba" << EOF +set serveroutput on +startup mount; + +declare + rol varchar(20); +begin + select database_role into rol from v\$database; + + dbms_output.put_line('Database role is ' || rol); + if (rol = 'PHYSICAL STANDBY') then + return; + end if; + + execute immediate 'alter database open'; +end; +/ + +select database_role, open_mode from v\$database; +set heading off; +set serveroutput off; +spool ${HA_RSCTMP}/dgstatus.${ORACLE_SID}; +select open_mode from v\$database; +spool off; +EOF +) + rv=$? + + # Data Guard Modification 2 - Remove deprecated parameter error from startup_stdout + startup_stdout=$(echo $startup_stdout | sed 's/ORA-32004//g') + + # Dump output to syslog for debugging + ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd" + ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout" + + if [ $rv -ne 0 ]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see: + # ORA-.....: failure, we failed + # Troubleshooting: + # ORA-00845 - Try rm -f /dev/shm/ora_* + # ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba" + if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout" + return 1 + fi + + ocf_log info "Started Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Stop Oracle (database portion) +# +stop_db() { + declare stop_cmd + declare stop_stdout + declare -i rv + declare how_shutdown="$1" + + if [ -z "$1" ]; then + how_shutdown="immediate" + fi + + ocf_log info "Stopping Oracle DB $ORACLE_SID $how_shutdown" + + ora_procname="ora_${DB_PROCNAMES}_${ORACLE_SID}" + status $ora_procname + if [ $? -ne 0 ]; then + ocf_log debug "no pmon process -- DB $ORACLE_SID already stopped" + # No pmon process found, db already down + return 0 + fi + + # Setup for Stop ... + stop_cmd="set heading off;\nshutdown $how_shutdown;\nquit;\n" + stop_stdout=$(echo -e "$stop_cmd" | sqlplus -S "/ as sysdba") + rv=$? + + # Log stdout of the stop command + ocf_log debug "[$ORACLE_SID] sent stop command $stop_cmd" + ocf_log debug "[$ORACLE_SID] got $stop_stdout" + + # sqlplus returned failure. We'll return failed to rhcs + if [ $rv -ne 0 ]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see 'ORA-' or 'failure' in stdout, we're done. + if [[ "$stop_stdout" =~ "ORA-" ]] || [[ "$stop_stdout" =~ "failure" ]]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, errors in stdout" + return 1 + fi + + ocf_log info "Stopped Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Destroy any remaining processes with refs to $ORACLE_SID +# +force_cleanup() { + declare pids + declare pid + + ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing" + + pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'` + + for pid in $pids; do + kill -9 $pid + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Cleanup $ORACLE_SID Killed PID $pid" + else + ocf_log error "Cleanup $ORACLE_SID Kill PID $pid failed: $rv" + fi + done + + return 0 +} + + +# +# Wait for oracle processes to exit. Time out after 60 seconds +# +exit_idle() { + declare -i n=0 + + ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..." + while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do + if [ $n -ge 90 ]; then + ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate" + force_cleanup + return 0 + fi + sleep 1 + ((n++)) + done + + ocf_log debug "All Oracle processes for $ORACLE_SID have terminated" + return 0 +} + + +# +# Get database background process status. Restart it if it failed and +# we have seen the lock file. +# +get_db_status() { + declare -i subsys_lock=$1 + declare -i i=0 + declare -i rv=0 + declare ora_procname + + ocf_log debug "Checking status of DB $ORACLE_SID" + + for procname in $DB_PROCNAMES ; do + ora_procname="ora_${procname}_${ORACLE_SID}" + + status $ora_procname + if [ $? -eq 0 ] ; then + # This one's okay; go to the next one. + continue + fi + + # We're not supposed to be running, and we are, + # in fact, not running... + if [ $subsys_lock -ne 0 ]; then + ocf_log debug "DB $ORACLE_SID is already stopped" + return 3 + fi + + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + # this db process is down - stop and + # (re)start all ora_XXXX_$ORACLE_SID processes + ocf_log info "Restarting Oracle Database $ORACLE_SID" + stop_db + + start_db + if [ $? -eq 0 ] ; then + # ora_XXXX_$ORACLE_SID processes started + # successfully, so break out of the + # stop/start # 'for' loop + ocf_log info "Restarted Oracle DB $ORACLE_SID successfully" + break + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart Oracle DB $ORACLE_SID after $RESTART_RETRIES tries" + return 1 + fi + done + + ocf_log debug "Checking status of DB $ORACLE_SID success" + return 0 +} + + +# +# Get the status of the Oracle listener process +# +get_lsnr_status() { + declare -i subsys_lock=$1 + declare -i rv + declare -r LISTENER=$3 + + ocf_log debug "Checking status for listener $LISTENER" + lsnrctl status "$LISTENER" >& /dev/null + rv=$? + if [ $rv -eq 0 ] ; then + ocf_log debug "Listener $LISTENER is up" + return 0 # Listener is running fine + fi + + # We're not supposed to be running, and we are, + # in fact, not running. Return 3 + if [ $subsys_lock -ne 0 ]; then + ocf_log debug "Listener $LISTENER is stopped as expected" + return 3 + fi + + # Listener is NOT running (but should be) - try to restart + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + ocf_log info "Listener $LISTENER is down, attempting to restart" + lsnrctl start "$LISTENER" >& /dev/null + lsnrctl status "$LISTENER" >& /dev/null + if [ $? -eq 0 ]; then + ocf_log info "Listener $LISTENER was restarted successfully" + break # Listener was (re)started and is running fine + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart listener $LISTENER after $RESTART_RETRIES tries" + return 1 + fi + + lsnrctl_stdout=$(lsnrctl status "$LISTENER") + rv=$? + if [ $rv -ne 0 ] ; then + ocf_log error "Starting listener $LISTENER failed: $rv output $lsnrctl_stdout" + return 1 # Problem restarting the Listener + fi + + ocf_log info "Listener $LISTENER started successfully" + return 0 # Success restarting the Listener +} + + +# +# Helps us keep a running status so we know what our ultimate return +# code will be. Returns 1 if the $1 and $2 are not equivalent, otherwise +# returns $1. The return code is meant to be the next $1 when this is +# called, so, for example: +# +# update_status 0 <-- returns 0 +# update_status $? 0 <-- returns 0 +# update_status $? 3 <-- returns 1 (values different - error condition) +# update_status $? 1 <-- returns 1 (same, but happen to be error state!) +# +# update_status 3 +# update_status $? 3 <-- returns 3 +# +# (and so forth...) +# +update_status() { + declare -i old_status=$1 + declare -i new_status=$2 + + if [ -z "$2" ]; then + return $old_status + fi + + if [ $old_status -ne $new_status ]; then + ocf_log error "Error: $old_status vs $new_status for $ORACLE_SID - returning 1" + return 1 + fi + + return $old_status +} + + +# +# Print an error message to the user and exit. +# +oops() { + ocf_log error "$ORACLE_SID: Fatal: $1 failed validation checks" + exit 1 +} + + +# +# Do some validation on the user-configurable stuff at the beginning of the +# script. +# +validation_checks() { + ocf_log debug "Validating configuration for $ORACLE_SID" + + # If the oracle user doesn't exist, we're done. + [ -n "$ORACLE_USER" ] || oops "ORACLE_USER" + id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER" + id -g $ORACLE_USER > /dev/null || oops "ORACLE_GROUP" + + # If the oracle home isn't a directory, we're done + [ -n "$ORACLE_HOME" ] || oops "ORACLE_HOME" + + # If the oracle SID is NULL, we're done + [ -n "$ORACLE_SID" ] || oops "ORACLE_SID" + + # Super user? Automatically change UID and exec as oracle user. + # Oracle needs to be run as the Oracle user, not root! + if [ "`id -u`" = "0" ]; then + su $ORACLE_USER -c "$0 $*" + exit $? + fi + + # If we're not root and not the Oracle user, we're done. + [ "`id -u`" = "`id -u $ORACLE_USER`" ] || oops "not ORACLE_USER after su" + [ "`id -g`" = "`id -g $ORACLE_USER`" ] || oops "not ORACLE_GROUP after su" + + # Go home. + cd "$ORACLE_HOME" + + ocf_log debug "Validation checks for $ORACLE_SID succeeded" + return 0 +} + + +# +# Start Oracle +# +start_oracle() { + ocf_log info "Starting service $ORACLE_SID" + + start_db + rv=$? + if [ $rv -ne 0 ]; then + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + + for LISTENER in ${LISTENERS}; do + ocf_log info "Starting listener $LISTENER" + lsnrctl_stdout=$(lsnrctl start "$LISTENER") + rv=$? + if [ $rv -ne 0 ]; then + ocf_log debug "[$ORACLE_SID] Listener $LISTENER start returned $rv output $lsnrctl_stdout" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + done + + if [ -n "$ORACLE_HOSTNAME" -a -s ${HA_RSCTMP}/dgstatus.${ORACLE_SID} ]; then + # Start DB Console if vhost defined and database_role is READ WRITE + if cat ${HA_RSCTMP}/dgstatus.${ORACLE_SID} 2>/dev/null | grep "READ WRITE"; then + ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID" + emctl start dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed" + ocf_log error "Starting service $ORACLE_SID failed" + # Force good return status + #return 1 + return 0 + else + ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded" + fi + fi + rm -f ${HA_RSCTMP}/dgstatus.${ORACLE_SID} + fi + + if [ -n "$LOCKFILE" ]; then + touch "$LOCKFILE" + fi + + ocf_log info "Starting service $ORACLE_SID completed successfully" + return 0 +} + + +# +# Stop Oracle +# +stop_oracle() { + ocf_log info "Stopping service $ORACLE_SID" + + if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then + ocf_log error "Oracle Listener Control is not available ($ORACLE_HOME not mounted?)" + # XXX should this return 1? + return 0 + fi + + stop_db || stop_db abort + if [ $? -ne 0 ]; then + ocf_log error "Unable to stop DB for $ORACLE_SID" + return 1 + fi + + for LISTENER in ${LISTENERS}; do + ocf_log info "Stopping listener $LISTENER for $ORACLE_SID" + lsnrctl_stdout=$(lsnrctl stop "$LISTENER") + rv=$? + if [ $rv -ne 0 ]; then + ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout" + + pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'` + kill -9 $pid + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Cleanup $LISTENER Killed PID $pid" + else + ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv" + fi + fi + done + + if [ -n "$ORACLE_HOSTNAME" ]; then + # Stop DB Console if vhost defined + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID" + emctl stop dbconsole + if [ $? -ne 0 ]; then + ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed" + ocf_log error "Stopping service $ORACLE_SID failed" + # Force good return status + #return 1 + return 0 + else + ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded" + fi + fi + + exit_idle + + if [ $? -ne 0 ]; then + ocf_log error "WARNING: Not all Oracle processes exited cleanly for $ORACLE_SID" + # XXX - failure? + fi + + if [ -n "$LOCKFILE" ]; then + rm -f "$LOCKFILE" + fi + + ocf_log info "Stopping service $ORACLE_SID succeeded" + return 0 +} + + +# +# Find and display the status of iAS infrastructure. +# +# This has three parts: +# (1) Oracle database itself +# (2) Oracle listener process +# (3) OPMN and OPMN-managed processes +# +# - If all are (cleanly) down, we return 3. In order for this to happen, +# $LOCKFILE must not exist. In this case, we try and restart certain parts +# of the service - as this may be running in a clustered environment. +# +# - If some but not all are running (and, if $LOCKFILE exists, we could not +# restart the failed portions), we return 1 (ERROR) +# +# - If all are running, return 0. In the "all-running" case, we recreate +# $LOCKFILE if it does not exist. +# +status_oracle() { + declare -i subsys_lock=1 + declare -i last + declare -i depth=$1 + + ocf_log debug "Checking status for $ORACLE_SID depth $depth" + + # Check for lock file. Crude and rudimentary, but it works + if [ -z "$LOCKFILE" ] || [ -f "$LOCKFILE" ]; then + subsys_lock=0 + fi + + # Check database status + get_db_status $subsys_lock $depth + update_status $? # Start + last=$? + + # Check & report listener status + for LISTENER in ${LISTENERS}; do + get_lsnr_status $subsys_lock $depth "$LISTENER" + update_status $? $last + last=$? + done + + # No status for DB Console (ORACLE_HOSTNAME) + + # No lock file, but everything's running. Put the lock + # file back. XXX - this kosher? + if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then + touch "$LOCKFILE" + fi + + ocf_log debug "Status returning $last for $ORACLE_SID" + return $last +} + + +######################## +# Do some real work... # +######################## + +# Data Guard Modification 1 - Debug Logging +case $1 in +stop | start | status | restart | recover | monitor ) +[ $(id -u) = 0 ] && exec > "${HA_RSCTMP}/oradg_${ORACLE_SID}_$1.log" 2>&1 +set -x +date +echo $@ +printenv +esac + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + start) + validation_checks $* + start_oracle + exit $? + ;; + stop) + validation_checks $* + stop_oracle + exit $? + ;; + status|monitor) + validation_checks $* + status_oracle $OCF_CHECK_LEVEL + exit $? + ;; + restart) + $0 stop || exit $? + $0 start || exit $? + exit 0 + ;; + *) + echo "usage: $SCRIPT {start|stop|restart|status|monitor|meta-data}" + exit 1 + ;; +esac + +exit 0 diff --git a/rgmanager/src/resources/orainstance.metadata b/rgmanager/src/resources/orainstance.metadata new file mode 100644 index 0000000..b58617b --- /dev/null +++ b/rgmanager/src/resources/orainstance.metadata @@ -0,0 +1,98 @@ +<?xml version="1.0" ?> +<!-- $Id: orainstance.metadata 58 2009-06-29 05:15:12Z hevirtan $ --> + +<!-- Resource metadata for Oracle DB agent --> +<resource-agent name="orainstance" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Oracle 10g Failover Instance + </longdesc> + <shortdesc lang="en"> + Oracle 10g Failover Instance + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Instance name (SID) of oracle instance + </longdesc> + <shortdesc lang="en"> + Oracle SID + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="user" required="1"> + <longdesc lang="en"> + Oracle user name. This is the user name of the Oracle + user which the Oracle instance runs as. + </longdesc> + <shortdesc lang="en"> + Oracle User Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="home" required="1"> + <longdesc lang="en"> + This is the Oracle database home directory. + This is configured when you install Oracle. + </longdesc> + <shortdesc lang="en"> + Oracle Home Directory + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="listeners"> + <longdesc lang="en"> + List of Oracle listeners which will be started with + the database instance. Listener names are separated by + whitespace. Defaults to empty which disables listeners. + </longdesc> + <shortdesc lang="en"> + Oracle listeners + </shortdesc> + <content type="string" default=""/> + </parameter> + + <parameter name="lockfile"> + <longdesc lang="en"> + Location for lockfile which will be used for checking if + the Oracle should be running or not. Defaults to location + under /tmp. + </longdesc> + <shortdesc lang="en"> + Pathname for lockfile + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="tns_admin" required="0" unique="1"> + <longdesc lang="en"> + Full path to the directory that contains the Oracle + listener tnsnames.ora configuration file. The shell + variable TNS_ADMIN is set to the value provided. + </longdesc> + <shortdesc lang="en"> + Full path to the directory containing tnsnames.ora + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="900"/> + <action name="stop" timeout="90"/> + + <!-- Note: status check will also perform recover + (Oracle DB restart) if the check fails --> + <action name="status" timeout="900" interval="1m"/> + <action name="monitor" timeout="900" interval="1m"/> + + <action name="meta-data" timeout="0"/> + <action name="verify-all" timeout="0"/> + </actions> + +</resource-agent> diff --git a/rgmanager/src/resources/orainstance.sh.in b/rgmanager/src/resources/orainstance.sh.in new file mode 100644 index 0000000..5873373 --- /dev/null +++ b/rgmanager/src/resources/orainstance.sh.in @@ -0,0 +1,594 @@ +#!@BASH_SHELL@ +# +# Copyright 2003-2004, 2006-2013 Red Hat, Inc. +# +# Author(s): +# Hardy Merrill <hmerrill at redhat.com> +# Lon Hohberger <lhh at redhat.com> +# Michael Moon <Michael dot Moon at oracle.com> +# Ryan McCabe <rmccabe at redhat.com> +# +# This program is Open Source software. You may modify and/or redistribute +# it persuant to the terms of the Open Software License version 2.1, which +# is available from the following URL and is included herein by reference: +# +# http://opensource.org/licenses/osl-2.1.php +# +# NOTES: +# +# (1) You can comment out the LOCKFILE declaration below. This will prevent +# the need for this script to access anything outside of the ORACLE_HOME +# path. +# +# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and +# ORACLE_HOSTNAME to match your installation if not running from within +# rgmanager. +# +# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's +# home directory in non-clustered environments and /usr/share/cluster +# in rgmanager/Red Hat cluster environments. +# +# Oracle is a registered trademark of Oracle Corporation. +# Oracle9i is a trademark of Oracle Corporation. +# Oracle10g is a trademark of Oracle Corporation. +# Oracle11g is a trademark of Oracle Corporation. +# All other trademarks are property of their respective owners. +# +# +# $Id: orainstance.sh 127 2009-08-21 09:17:52Z hevirtan $ +# +# Original version is distributed with RHCS. The modifications include +# the following minor changes: +# - Meta-data moved to a dedicated file +# - Support for multiple listeners +# - Disabled EM +# - SysV init support removed. Only usable with rgmanager +# + +# Grab the global RHCS helper functions +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +. /etc/init.d/functions + +declare SCRIPT="`basename $0`" +declare SCRIPTDIR="`dirname $0`" + +# Required parameters from rgmanager +ORACLE_USER=$OCF_RESKEY_user +ORACLE_HOME=$OCF_RESKEY_home +ORACLE_SID=$OCF_RESKEY_name +[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin + +# Optional parameters with default values +LISTENERS=$OCF_RESKEY_listeners +LOCKFILE="$ORACLE_HOME/.orainstance-${ORACLE_SID}.lock" +[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile + +export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE +export LD_LIBRARY_PATH=$ORACLE_HOME/lib +export PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin + +declare -i RESTART_RETRIES=3 +declare -r DB_PROCNAMES="pmon" +declare -r LSNR_PROCNAME="tnslsnr" + +# clulog will not log messages when run by the oracle user. +# This is a hack to work around that. +if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then + ocf_log() { + prio=$1 + shift + logger -i -p daemon."$prio" -- "$*" + } +fi + +# +# Start Oracle (database portion) +# +start_db() { + declare -i rv + declare startup_cmd + declare startup_stdout + + ocf_log info "Starting Oracle DB $ORACLE_SID" + + # Set up our sqlplus script. Basically, we're trying to + # capture output in the hopes that it's useful in the case + # that something doesn't work properly. + startup_cmd="set heading off;\nstartup;\nquit;\n" + startup_stdout=$(echo -e "$startup_cmd" | sqlplus -S "/ as sysdba") + rv=$? + + # Dump output to syslog for debugging + ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd" + ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout" + + if [ $rv -ne 0 ]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see: + # ORA-.....: failure, we failed + # Troubleshooting: + # ORA-00845 - Try rm -f /dev/shm/ora_* + # ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba" + # We need to ignore some non-fatl errors + + ignore_error=(ORA-32004) + + for error in ${ignore_error[*]} + do + startup_stdout=$(echo "$startup_stdout" | sed "s/${error}//g") + done + + if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then + ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout" + return 1 + fi + + ocf_log info "Started Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Stop Oracle (database portion) +# +stop_db() { + declare stop_cmd + declare stop_stdout + declare -i rv + declare how_shutdown="$1" + + if [ -z "$1" ]; then + how_shutdown="immediate" + fi + + ocf_log info "Stopping Oracle DB $ORACLE_SID $how_shutdown" + + ora_procname="ora_${DB_PROCNAMES}_${ORACLE_SID}" + status $ora_procname + if [ $? -ne 0 ]; then + ocf_log debug "no pmon process -- DB $ORACLE_SID already stopped" + # No pmon process found, db already down + return 0 + fi + + # Setup for Stop ... + stop_cmd="set heading off;\nshutdown $how_shutdown;\nquit;\n" + stop_stdout=$(echo -e "$stop_cmd" | sqlplus -S "/ as sysdba") + rv=$? + + # Log stdout of the stop command + ocf_log debug "[$ORACLE_SID] sent stop command $stop_cmd" + ocf_log debug "[$ORACLE_SID] got $stop_stdout" + + # sqlplus returned failure. We'll return failed to rhcs + if [ $rv -ne 0 ]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, sqlplus returned $rv" + return 1 + fi + + # If we see 'ORA-' or 'failure' in stdout, we're done. + if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then + ocf_log error "Stopping Oracle DB $ORACLE_SID failed, errors in stdout" + return 1 + fi + + ocf_log info "Stopped Oracle DB $ORACLE_SID successfully" + return 0 +} + + +# +# Destroy any remaining processes with refs to $ORACLE_SID +# +force_cleanup() { + declare pids + declare pid + + ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing" + + pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'` + + for pid in $pids; do + kill -9 $pid + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Cleanup $ORACLE_SID Killed PID $pid" + else + ocf_log error "Cleanup $ORACLE_SID Kill PID $pid failed: $rv" + fi + done + + return 0 +} + + +# +# Wait for oracle processes to exit. Time out after 60 seconds +# +exit_idle() { + declare -i n=0 + + ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..." + while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do + if [ $n -ge 90 ]; then + ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate" + force_cleanup + return 0 + fi + sleep 1 + ((n++)) + done + + ocf_log debug "All Oracle processes for $ORACLE_SID have terminated" + return 0 +} + + +# +# Get database background process status. Restart it if it failed and +# we have seen the lock file. +# +get_db_status() { + declare -i subsys_lock=$1 + declare -i i=0 + declare -i rv=0 + declare ora_procname + + ocf_log debug "Checking status of DB $ORACLE_SID" + + for procname in $DB_PROCNAMES ; do + ora_procname="ora_${procname}_${ORACLE_SID}" + + status $ora_procname + if [ $? -eq 0 ] ; then + # This one's okay; go to the next one. + continue + fi + + # We're not supposed to be running, and we are, + # in fact, not running... + if [ $subsys_lock -ne 0 ]; then + ocf_log debug "DB $ORACLE_SID is already stopped" + return 3 + fi + + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + # this db process is down - stop and + # (re)start all ora_XXXX_$ORACLE_SID processes + ocf_log info "Restarting Oracle Database $ORACLE_SID" + stop_db + + start_db + if [ $? -eq 0 ] ; then + # ora_XXXX_$ORACLE_SID processes started + # successfully, so break out of the + # stop/start # 'for' loop + ocf_log info "Restarted Oracle DB $ORACLE_SID successfully" + break + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart Oracle DB $ORACLE_SID after $RESTART_RETRIES tries" + return 1 + fi + done + + ocf_log debug "Checking status of DB $ORACLE_SID success" + return 0 +} + + +# +# Get the status of the Oracle listener process +# +get_lsnr_status() { + declare -i subsys_lock=$1 + declare -i rv + declare -r LISTENER=$3 + + ocf_log debug "Checking status for listener $LISTENER" + lsnrctl status "$LISTENER" >& /dev/null + rv=$? + if [ $rv -eq 0 ] ; then + ocf_log debug "Listener $LISTENER is up" + return 0 # Listener is running fine + fi + + # We're not supposed to be running, and we are, + # in fact, not running. Return 3 + if [ $subsys_lock -ne 0 ]; then + ocf_log debug "Listener $LISTENER is stopped as expected" + return 3 + fi + + # Listener is NOT running (but should be) - try to restart + for (( i=$RESTART_RETRIES ; i; i-- )) ; do + ocf_log info "Listener $LISTENER is down, attempting to restart" + lsnrctl start "$LISTENER" >& /dev/null + lsnrctl status "$LISTENER" >& /dev/null + if [ $? -eq 0 ]; then + ocf_log info "Listener $LISTENER was restarted successfully" + break # Listener was (re)started and is running fine + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log error "Failed to restart listener $LISTENER after $RESTART_RETRIES tries" + return 1 + fi + + lsnrctl_stdout=$(lsnrctl status "$LISTENER") + rv=$? + if [ $rv -ne 0 ] ; then + ocf_log error "Starting listener $LISTENER failed: $rv output $lsnrctl_stdout" + return 1 # Problem restarting the Listener + fi + + ocf_log info "Listener $LISTENER started successfully" + return 0 # Success restarting the Listener +} + + +# +# Helps us keep a running status so we know what our ultimate return +# code will be. Returns 1 if the $1 and $2 are not equivalent, otherwise +# returns $1. The return code is meant to be the next $1 when this is +# called, so, for example: +# +# update_status 0 <-- returns 0 +# update_status $? 0 <-- returns 0 +# update_status $? 3 <-- returns 1 (values different - error condition) +# update_status $? 1 <-- returns 1 (same, but happen to be error state!) +# +# update_status 3 +# update_status $? 3 <-- returns 3 +# +# (and so forth...) +# +update_status() { + declare -i old_status=$1 + declare -i new_status=$2 + + if [ -z "$2" ]; then + return $old_status + fi + + if [ $old_status -ne $new_status ]; then + ocf_log error "Error: $old_status vs $new_status for $ORACLE_SID - returning 1" + return 1 + fi + + return $old_status +} + + +# +# Print an error message to the user and exit. +# +oops() { + ocf_log error "$ORACLE_SID: Fatal: $1 failed validation checks" + exit 1 +} + + +# +# Do some validation on the user-configurable stuff at the beginning of the +# script. +# +validation_checks() { + ocf_log debug "Validating configuration for $ORACLE_SID" + + # If the oracle user doesn't exist, we're done. + [ -n "$ORACLE_USER" ] || oops "ORACLE_USER" + id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER" + id -g $ORACLE_USER > /dev/null || oops "ORACLE_GROUP" + + # If the oracle home isn't a directory, we're done + [ -n "$ORACLE_HOME" ] || oops "ORACLE_HOME" + + # If the oracle SID is NULL, we're done + [ -n "$ORACLE_SID" ] || oops "ORACLE_SID" + + # Super user? Automatically change UID and exec as oracle user. + # Oracle needs to be run as the Oracle user, not root! + if [ "`id -u`" = "0" ]; then + su $ORACLE_USER -c "$0 $*" + exit $? + fi + + # If we're not root and not the Oracle user, we're done. + [ "`id -u`" = "`id -u $ORACLE_USER`" ] || oops "not ORACLE_USER after su" + [ "`id -g`" = "`id -g $ORACLE_USER`" ] || oops "not ORACLE_GROUP after su" + + # Go home. + cd "$ORACLE_HOME" + + ocf_log debug "Validation checks for $ORACLE_SID succeeded" + return 0 +} + + +# +# Start Oracle +# +start_oracle() { + ocf_log info "Starting service $ORACLE_SID" + + start_db + rv=$? + if [ $rv -ne 0 ]; then + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + + for LISTENER in ${LISTENERS}; do + ocf_log info "Starting listener $LISTENER" + lsnrctl_stdout=$(lsnrctl start "$LISTENER") + rv=$? + if [ $rv -ne 0 ]; then + ocf_log debug "[$ORACLE_SID] Listener $LISTENER start returned $rv output $lsnrctl_stdout" + ocf_log error "Starting service $ORACLE_SID failed" + return 1 + fi + done + + if [ -n "$LOCKFILE" ]; then + touch "$LOCKFILE" + fi + + ocf_log info "Starting service $ORACLE_SID completed successfully" + return 0 +} + + +# +# Stop Oracle +# +stop_oracle() { + ocf_log info "Stopping service $ORACLE_SID" + + if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then + ocf_log error "Oracle Listener Control is not available ($ORACLE_HOME not mounted?)" + # XXX should this return 1? + return 0 + fi + + stop_db || stop_db abort + if [ $? -ne 0 ]; then + ocf_log error "Unable to stop DB for $ORACLE_SID" + return 1 + fi + + for LISTENER in ${LISTENERS}; do + ocf_log info "Stopping listener $LISTENER for $ORACLE_SID" + lsnrctl_stdout=$(lsnrctl stop "$LISTENER") + rv=$? + if [ $rv -ne 0 ]; then + ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout" + + pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'` + kill -9 $pid + rv=$? + if [ $rv -eq 0 ]; then + ocf_log info "Cleanup $LISTENER Killed PID $pid" + else + ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv" + fi + fi + done + + exit_idle + + if [ $? -ne 0 ]; then + ocf_log error "WARNING: Not all Oracle processes exited cleanly for $ORACLE_SID" + # XXX - failure? + fi + + if [ -n "$LOCKFILE" ]; then + rm -f "$LOCKFILE" + fi + + ocf_log info "Stopping service $ORACLE_SID succeeded" + return 0 +} + + +# +# Find and display the status of iAS infrastructure. +# +# This has three parts: +# (1) Oracle database itself +# (2) Oracle listener process +# (3) OPMN and OPMN-managed processes +# +# - If all are (cleanly) down, we return 3. In order for this to happen, +# $LOCKFILE must not exist. In this case, we try and restart certain parts +# of the service - as this may be running in a clustered environment. +# +# - If some but not all are running (and, if $LOCKFILE exists, we could not +# restart the failed portions), we return 1 (ERROR) +# +# - If all are running, return 0. In the "all-running" case, we recreate +# $LOCKFILE if it does not exist. +# +status_oracle() { + declare -i subsys_lock=1 + declare -i last + declare -i depth=$1 + + ocf_log debug "Checking status for $ORACLE_SID depth $depth" + + # Check for lock file. Crude and rudimentary, but it works + if [ -z "$LOCKFILE" ] || [ -f "$LOCKFILE" ]; then + subsys_lock=0 + fi + + # Check database status + get_db_status $subsys_lock $depth + update_status $? # Start + last=$? + + # Check & report listener status + for LISTENER in ${LISTENERS}; do + get_lsnr_status $subsys_lock $depth "$LISTENER" + update_status $? $last + last=$? + done + + # No lock file, but everything's running. Put the lock + # file back. XXX - this kosher? + if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then + touch "$LOCKFILE" + fi + + ocf_log debug "Status returning $last for $ORACLE_SID" + return $last +} + + +######################## +# Do some real work... # +######################## + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + start) + validation_checks $* + start_oracle + exit $? + ;; + stop) + validation_checks $* + stop_oracle + exit $? + ;; + status|monitor) + validation_checks $* + status_oracle $OCF_CHECK_LEVEL + exit $? + ;; + restart) + $0 stop || exit $? + $0 start || exit $? + exit 0 + ;; + *) + echo "usage: $SCRIPT {start|stop|restart|status|monitor|meta-data}" + exit 1 + ;; +esac + +exit 0 diff --git a/rgmanager/src/resources/oralistener.metadata b/rgmanager/src/resources/oralistener.metadata new file mode 100644 index 0000000..0ed04a6 --- /dev/null +++ b/rgmanager/src/resources/oralistener.metadata @@ -0,0 +1,73 @@ +<?xml version="1.0" ?> +<!-- $Id: oralistener.metadata 61 2009-06-29 10:01:49Z hevirtan $ --> + +<!-- Resource metadata for Oracle listener agent --> +<resource-agent name="oralistener" version="rgmanager 2.0"> + <version>1.0</version> + + <longdesc lang="en"> + Oracle 10g Listener Instance + </longdesc> + <shortdesc lang="en"> + Oracle 10g Listener Instance + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Listener name + </longdesc> + <shortdesc lang="en"> + Listener name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="user" required="1"> + <longdesc lang="en"> + Oracle user name. This is the user name of the Oracle + user which the Oracle instance runs as. + </longdesc> + <shortdesc lang="en"> + Oracle User Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="home" required="1"> + <longdesc lang="en"> + This is the Oracle database home directory. + This is configured when you install Oracle. + </longdesc> + <shortdesc lang="en"> + Oracle Home Directory + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="tns_admin" required="0" unique="1"> + <longdesc lang="en"> + Full path to the directory that contains the Oracle + listener tnsnames.ora configuration file. The shell + variable TNS_ADMIN is set to the value provided. + </longdesc> + <shortdesc lang="en"> + Full path to the directory containing tnsnames.ora + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="90"/> + <action name="stop" timeout="30"/> + <!-- <action name="recover" timeout="90"/> --> + + <action name="status" timeout="90" interval="5m"/> + <action name="monitor" timeout="90" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="verify-all" timeout="0"/> + </actions> + +</resource-agent> diff --git a/rgmanager/src/resources/oralistener.sh.in b/rgmanager/src/resources/oralistener.sh.in new file mode 100644 index 0000000..215fd0f --- /dev/null +++ b/rgmanager/src/resources/oralistener.sh.in @@ -0,0 +1,197 @@ +#!@BASH_SHELL@ +# +# Red Hat Cluster Suite resource agent for controlling Oracle 10g +# listener instances. This script will start, stop and monitor running +# listeners. +# +# start: Will start given listener instance +# +# stop: Will stop given listener instance +# +# monitor: Will check that the listener is OK by calling lsnrctl status +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# Grab the global RHCS helper functions +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare -i RESTART_RETRIES=3 + +ORACLE_USER=$OCF_RESKEY_user +ORACLE_HOME=$OCF_RESKEY_home +LISTENER=$OCF_RESKEY_name +[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin + +LC_ALL=C +LANG=C +PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH ORACLE_USER ORACLE_HOME + +# clulog will not log messages when run by the oracle user. +# This is a hack to work around that. +if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then + ocf_log() { + prio=$1 + shift + logger -i -p daemon."$prio" -- "$*" + } +fi + +verify_all() { + ocf_log debug "Validating configuration for $LISTENER" + + if [ -z "$OCF_RESKEY_name" ]; then + ocf_log error "Validation for $LISTENER failed: Invalid name of service (listener name)" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_home" ]; then + ocf_log error "Validation for $LISTENER failed: No Oracle home specified." + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_user" ]; then + ocf_log error "Validation for $LISTENER failed: No Oracle username specified." + return $OCF_ERR_ARGS + fi + + # Super user? Automatically change UID and exec as oracle user. + # Oracle needs to be run as the Oracle user, not root! + if [ "`id -u`" = "0" ]; then + su $OCF_RESKEY_user -c "$0 $*" + exit $? + fi + + # Make sure the lsnrctl binary is in our $PATH + if [ ! -x $(which lsnrctl) ]; then + ocf_log error "Validation for $LISTENER failed: Unable to locate lsnrctl command from path! ($PATH)" + return $OCF_ERR_GENERIC + fi + + ocf_log debug "Validation checks for $LISTENER succeeded" + return 0 +} + +start() { + ocf_log info "Starting listener $LISTENER" + lsnrctl_stdout=$(lsnrctl start "$LISTENER") + if [ $? -ne 0 ]; then + ocf_log error "start listener $LISTENER failed $lsnrctl_stdout" + return $OCF_ERR_GENERIC + fi + + ocf_log info "Listener $LISTENER started successfully" + return 0 +} + +stop() { + ocf_log info "Stopping listener $LISTENER" + + monitor $OCF_CHECK_LEVEL + if [ $? -ne 0 ]; then + ocf_log info "Listener $LISTENER already stopped" + return 0 + fi + + lsnrctl_stdout=$(lsnrctl stop "$LISTENER") + if [ $? -ne 0 ]; then + ocf_log debug "stop listener $LISTENER failed $lsnrctl_stdout" + return $OCF_ERR_GENERIC + fi + + ocf_log info "Listener $LISTENER stopped successfully" + return 0 +} + +monitor() { + declare -i depth=$1 + + ocf_log debug "Checking status for listener $LISTENER depth $depth" + lsnrctl status "$LISTENER" >& /dev/null + if [ $? -ne 0 ]; then + ocf_log error "Listener $LISTENER not running" + return $OCF_ERR_GENERIC + fi + + ocf_log debug "Listener $LISTENER is up" + return 0 # Listener is running fine +} + +recover() { + ocf_log debug "Recovering listener $LISTENER" + + for (( i=$RESTART_RETRIES ; i; i-- )); do + start + if [ $? -eq 0 ] ; then + ocf_log debug "Restarted listener $LISTENER successfully" + break + fi + done + + if [ $i -eq 0 ]; then + # stop/start's failed - return 1 (failure) + ocf_log debug "Failed to restart listener $LISTENER after $RESTART_RETRIES tries" + return 1 + fi + + status + if [ $? -ne 0 ] ; then + ocf_log debug "Failed to restart listener $LISTENER" + return 1 # Problem restarting the Listener + fi + + ocf_log debug "Restarted listener $LISTENER successfully" + return 0 # Success restarting the Listener +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + verify-all) + verify_all $* + exit $? + ;; + start) + verify_all $* && start + exit $? + ;; + stop) + verify_all $* && stop + exit $? + ;; + recover) + verify_all $* && recover + exit $? + ;; + status|monitor) + verify_all $* + monitor $OCF_CHECK_LEVEL + exit $? + ;; + *) + echo "Usage: $0 {start|stop|recover|monitor|status|meta-data|verify-all}" + exit $OCF_ERR_GENERIC + ;; +esac diff --git a/rgmanager/src/resources/postgres-8.metadata b/rgmanager/src/resources/postgres-8.metadata new file mode 100644 index 0000000..c9f752c --- /dev/null +++ b/rgmanager/src/resources/postgres-8.metadata @@ -0,0 +1,95 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="postgres-8"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of PostgreSQL server + </longdesc> + <shortdesc lang="en"> + Defines a PostgreSQL server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Specifies a service name for logging and other purposes + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/var/lib/pgsql/data/postgresql.conf"/> + </parameter> + + <parameter name="postmaster_user"> + <longdesc lang="en"> + User who runs the database server because it can't be + run by root. + </longdesc> + <shortdesc lang="en"> + User who runs the database server + </shortdesc> + <content type="string" default="postgres" /> + </parameter> + + <parameter name="postmaster_options"> + <longdesc lang="en"> + Other command-line options for postmaster + </longdesc> + <shortdesc lang="en"> + Other command-line options for postmaster + </shortdesc> + <content type="string" default="-D /var/lib/pgsql/data"/> + </parameter> + + <parameter name="startup_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service startup + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service startup. + </shortdesc> + <content type="number" default="10"/> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="30"/> + <action name="stop" timeout="30"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/postgres-8.sh.in b/rgmanager/src/resources/postgres-8.sh.in new file mode 100644 index 0000000..6f597d3 --- /dev/null +++ b/rgmanager/src/resources/postgres-8.sh.in @@ -0,0 +1,241 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare PSQL_POSTMASTER="/usr/bin/postmaster" +declare PSQL_CTL="/usr/bin/pg_ctl" +declare PSQL_pid_file="`generate_name_for_pid_file`" +declare PSQL_conf_dir="`generate_name_for_conf_dir`" +declare PSQL_gen_config_file="$PSQL_conf_dir/postgresql.conf" +declare PSQL_kill_timeout="5" +declare PSQL_stop_timeout="15" +if [ -z "$OCF_RESKEY_startup_wait" ]; then + OCF_RESKEY_startup_wait=10 +fi + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_postmaster_user" ]; then + clog_servicer_verify $CLOG_FAILED "Invalid User" + return $OCF_ERR_ARGS + fi + + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + declare ip_addressess="$3" + + declare ip_comma=""; + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + declare x=1 + for i in $ip_addressess; do + i=`echo $i | sed -e 's/\/.*$//'` + if [ $x -eq 1 ]; then + x=0 + ip_comma=$i + else + ip_comma=$ip_comma,$i + fi + done + + generate_configTemplate "$generated_file" "$1" + echo "external_pid_file = '$PSQL_pid_file'" >> "$generated_file" + echo "listen_addresses = '$ip_comma'" >> "$generated_file" + + echo >> "$generated_file" + sed 's/^[[:space:]]*external_pid_file/### external_pid_file/i;s/^[[:space:]]*listen_addresses/### listen_addresses/i' < "$original_file" >> "$generated_file" + + sha1_addToFile "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + declare pguser_group + declare count=0 + clog_service_start $CLOG_INIT + + create_pid_directory + create_conf_directory "$PSQL_conf_dir" + check_pid_file "$PSQL_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$PSQL_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + # + # Create an empty PID file for the postgres user and + # change it to be owned by the postgres user so that + # postmaster doesn't complain. + # + pguser_group=`groups $OCF_RESKEY_postmaster_user | cut -f3 -d ' '` + touch $PSQL_pid_file + chown $OCF_RESKEY_postmaster_user.$pguser_group $PSQL_pid_file + + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + generate_config_file "$OCF_RESKEY_config_file" "$PSQL_gen_config_file" "$ip_addresses" + + su - "$OCF_RESKEY_postmaster_user" -c "$PSQL_POSTMASTER -c config_file=\"$PSQL_gen_config_file\" \ + $OCF_RESKEY_postmaster_options" &> /dev/null & + + # We need to sleep briefly to allow pg_ctl to detect that we've started. + # We need to fetch "-D /path/to/pgsql/data" from $OCF_RESKEY_postmaster_options + until [ "$count" -gt "$OCF_RESKEY_startup_wait" ] || + [ `su - "$OCF_RESKEY_postmaster_user" -c \ + "$PSQL_CTL status $OCF_RESKEY_postmaster_options" &> /dev/null; echo $?` = '0' ] + do + sleep 1 + let count=$count+1 + done + + if [ "$count" -gt "$OCF_RESKEY_startup_wait" ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + ## Send -INT to close connections and stop. -QUIT is used if -INT signal does not stop process. + stop_generic_sigkill "$PSQL_pid_file" "$PSQL_stop_timeout" "$PSQL_kill_timeout" "-INT" + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$PSQL_pid_file" + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$PSQL_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/ra-api-1-modified.dtd b/rgmanager/src/resources/ra-api-1-modified.dtd new file mode 100644 index 0000000..67eb94d --- /dev/null +++ b/rgmanager/src/resources/ra-api-1-modified.dtd @@ -0,0 +1,68 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> + +<!-- This is based on the RA-API-1.0 DTD from: + http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/ra-api-1.dtd + + There are additions for rgmanager. These additions should be + ignored by other RMs. + --> + +<!ELEMENT resource-agent (version,longdesc,shortdesc,parameters,actions,special*) > +<!ATTLIST resource-agent + name CDATA #REQUIRED + version CDATA #IMPLIED> + +<!ELEMENT version (#PCDATA)> + +<!ELEMENT parameters (parameter*)> + +<!ELEMENT actions (action*)> + +<!-- Primary and required are for rgmanager use. --> +<!ELEMENT parameter (longdesc+,shortdesc+,content)> +<!ATTLIST parameter + name CDATA #REQUIRED + primary (1|0) "0" + required (1|0) "0" + inherit CDATA "" + unique (1|0) "0" + reconfig (1|0) "0"> + +<!ELEMENT longdesc ANY> +<!ATTLIST longdesc + lang NMTOKEN #IMPLIED> + +<!ELEMENT shortdesc ANY> +<!ATTLIST shortdesc + lang NMTOKEN #IMPLIED> + +<!ELEMENT content EMPTY> +<!ATTLIST content + type (string|integer|boolean) #REQUIRED + default CDATA #IMPLIED> + +<!ELEMENT action EMPTY> +<!ATTLIST action + name (start|stop|recover|status|reconfig|monitor|reload|meta-data|validate-all|verify-all|migrate|methods) #REQUIRED + timeout CDATA #REQUIRED + interval CDATA #IMPLIED + start-delay CDATA #IMPLIED + depth CDATA #IMPLIED> + +<!-- Special tag list for rgmanager --> +<!ELEMENT special (attributes*, child*)> +<!ATTLIST special + tag CDATA #REQUIRED> + +<!ELEMENT attributes EMPTY> +<!ATTLIST attributes + maxinstances CDATA "0" + init_on_add CDATA "0" + destroy_on_delete CDATA "0"> + +<!ELEMENT child EMPTY> +<!ATTLIST child + type CDATA #REQUIRED + forbid (1|0) "0" + start CDATA "100" + stop CDATA "0"> diff --git a/rgmanager/src/resources/ra2man.xsl b/rgmanager/src/resources/ra2man.xsl new file mode 100644 index 0000000..095a7b3 --- /dev/null +++ b/rgmanager/src/resources/ra2man.xsl @@ -0,0 +1,158 @@ +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="text" indent="no"/> + +<xsl:template match="parameter"> +.TP +.B <xsl:value-of select="@name"/><xsl:text> +</xsl:text> +<xsl:value-of select="normalize-space(longdesc)"/><xsl:text> +</xsl:text> +<xsl:if test="@primary = 1">This is the defining attribute for the <b><xsl:value-of select="/resource-agent/@name"/></b> resource type, and will be shown in logs. +</xsl:if> +<xsl:if test="@unique = 1 or @primary = 1">No other instances of the +.B <xsl:value-of select="/resource-agent/@name"/> +resource may have the same value for the +.B <xsl:value-of select="@name"/> +parameter. +</xsl:if> +<xsl:if test="@required = 1 or @primary = 1">This parameter is required; the resource manager will ignore specification of a resource without this parameter.</xsl:if> +<xsl:if test="@reconfig = 1">You may safely change this attribute on the fly; doing so will not cause a restart of the resource or its children. +</xsl:if> +Content: <xsl:value-of select="content/@type"/><xsl:text> +</xsl:text> +<xsl:if test="content/@default">Default Value: <xsl:value-of select="content/@default"/> +</xsl:if> +</xsl:template> + +<xsl:template match="action"> +.TP +\fB<xsl:value-of select="@name"/>\fp<xsl:if test="@timeout"> (timeout: <xsl:value-of select="@timeout"/>) </xsl:if> +<xsl:choose> +<xsl:when test="@name = 'start'"> +This starts the resource. +</xsl:when> +<xsl:when test="@name = 'stop'"> +This stops the resource. +</xsl:when> +<xsl:when test="@name = 'monitor'"> +<xsl:if test="@depth"> +Depth <xsl:value-of select="@depth"/>. +</xsl:if> +This checks the status of the resource. This is specified in the OCF Resource Agent API, but not LSB compliant. This is synonymous with +.B status +on some resource managers. +</xsl:when> +<xsl:when test="@name = 'validate-all'"> +Given (minimally) all required parameters to start or check the status of the resource, validate that those parameters are correct as much as possible. +</xsl:when> +<xsl:when test="@name = 'meta-data'"> +Display the XML metadata describing this resource. +</xsl:when> +<xsl:when test="@name = 'reload'">Reconfigure the resource in-place with the new given parameters. +</xsl:when> +<xsl:when test="@name = 'recover'"> +Attempt to recover the resource in-place without affecting dependencies. If this fails, the resource manager will try more forceful recovery (such as stop-start). +</xsl:when> +<!-- known non-OCF stuff --> +<xsl:when test="@name = 'status'"> +<xsl:if test="@depth">Depth <xsl:value-of select="@depth"/>. +</xsl:if> +This checks the status of the resource. This is LSB compliant, but not specified by the OCF Resource Agent API. This is synonymous with +.B monitor +on some resource managers. +</xsl:when> +<xsl:when test="@name = 'reconfig'"> +Reconfigure the resource in-place with the new given parameters. +</xsl:when> +<xsl:when test="@name = 'verify-all'"> +Given (minimally) all required parameters to start or check the status of the resource, validate that those parameters are correct as much as possible. This is a misinterpretation of the +.B validate-all +action, and should be fixed. Please report a bug. +</xsl:when> +<xsl:when test="@name = 'promote'"> +If this resource was the slave instance of the +resource, promote it to master status. +</xsl:when> +<xsl:when test="@name = 'demote'"> +If this resource was the master instance of the +resource, demote it to slave status. +</xsl:when> +<xsl:when test="@name = 'migrate'"> +Migrate this resource to another node in the cluster. +</xsl:when> +<!-- Ehhh --> +<xsl:otherwise> +The operational behavior of this is not known. +</xsl:otherwise> +</xsl:choose> +</xsl:template> + +<xsl:template match="child"> +.PP +<xsl:value-of select="@type"/> - +Started at level <xsl:value-of select="@start"/>. +Stopped at level <xsl:value-of select="@stop"/>. +</xsl:template> +<xsl:template match="/resource-agent">.TH RESOURCE_AGENT 8 2009-01-20 "<xsl:value-of select="@name"/> (Resource Agent)" +.SH +<xsl:value-of select="@name"/> +Cluster Resource Agent + +.SH DESCRIPTION +<xsl:value-of select="normalize-space(longdesc)"/> + +.SH PARAMETERS +<xsl:apply-templates select="parameters"/> + +.SH RGMANAGER INTERNAL PARAMETERS +.TP +.B __enforce_timeouts +If set to 1, an operation exceeding the defined timeout will be considered +a failure of that operation. Note that fail-to-stop is critical, and causes +a service to enter the failed state. + +.TP +.B __independent_subtree +If set to 1, failure of a status operation of this resource or any of its +children will be considered non-fatal unless a restart of this resource and +all of its children also fails. + +.SH ACTIONS +<xsl:apply-templates select="actions"/> + +.SH RGMANAGER NOTES +<xsl:if test="special/@tag = 'rgmanager'"> +<xsl:if test="special/attributes/@maxinstances"> +.PP +An instatnce of this resource defined in the +.B <resources> +section of +.B cluster.conf +can be referenced in the resource +tree at most <xsl:value-of select="special/attributes/@maxinstances"/> +time(s). All subsequent references to this resource will be ignored. +</xsl:if> +<xsl:if test="special/attributes/@root"> +.PP +This is the root resource class. Other resource +types must be attached as children to this resource +class. +</xsl:if> +<xsl:if test="special/child/@type"> +.PP +Known Child Types: +<xsl:apply-templates select="special"/> +</xsl:if> +</xsl:if> +.SH REFERENCES +.PP +http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD - The Open Cluster Framework Resource Agent Application Programming Interface draft version 1.0 + +.PP +http://www.linux-foundation.org/spec/refspecs/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html - Linux Standards Base v3.1.0 - Init Script Actions + +.PP +http://sources.redhat.com/cluster/wiki/RGManager - Linux-cluster Resource Group Manager information +</xsl:template> + +</xsl:stylesheet> diff --git a/rgmanager/src/resources/ra2ref.xsl b/rgmanager/src/resources/ra2ref.xsl new file mode 100644 index 0000000..6e61073 --- /dev/null +++ b/rgmanager/src/resources/ra2ref.xsl @@ -0,0 +1,10 @@ +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="text" indent="yes"/> +<xsl:template name="capitalize"> + <xsl:param name="value"/> + <xsl:variable name="normalized" select="translate($value, '_abcdefghijklmnopqrstuvwrxyz', '-ABCDEFGHIJKLMNOPQRSTUVWRXYZ')"/> + <xsl:value-of select="$normalized"/> +</xsl:template> +<xsl:template match="/resource-agent"> + <ref name="<xsl:call-template name="capitalize"><xsl:with-param name="value" select="@name"/></xsl:call-template>"/></xsl:template> +</xsl:stylesheet> diff --git a/rgmanager/src/resources/ra2rng.xsl b/rgmanager/src/resources/ra2rng.xsl new file mode 100644 index 0000000..28aba68 --- /dev/null +++ b/rgmanager/src/resources/ra2rng.xsl @@ -0,0 +1,340 @@ +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:int="__internal__" + exclude-result-prefixes="int"> + <xsl:output method="text" indent="no"/> + +<xsl:param name="init-indent" select="' '"/> +<xsl:param name="indent" select="' '"/> + + +<!-- + helpers + --> + +<int:common-optional-parameters> + <int:parameter name="__independent_subtree"> + <int:shortdesc> + Treat this and all children as an independent subtree. + </int:shortdesc> + </int:parameter> + <int:parameter name="__enforce_timeouts"> + <int:shortdesc> + Consider a timeout for operations as fatal. + </int:shortdesc> + </int:parameter> + <int:parameter name="__max_failures"> + <int:shortdesc> + Maximum number of failures before returning a failure to + a status check. + </int:shortdesc> + </int:parameter> + <int:parameter name="__failure_expire_time"> + <int:shortdesc> + Amount of time before a failure is forgotten. + </int:shortdesc> + </int:parameter> + <int:parameter name="__max_restarts"> + <int:shortdesc> + Maximum number restarts for an independent subtree before + giving up. + </int:shortdesc> + </int:parameter> + <int:parameter name="__restart_expire_time"> + <int:shortdesc> + Amount of time before a failure is forgotten for + an independent subtree. + </int:shortdesc> + </int:parameter> +</int:common-optional-parameters> + +<xsl:variable name="SP" select="' '"/> +<xsl:variable name="NL" select="'
'"/> +<xsl:variable name="NLNL" select="'

'"/> +<xsl:variable name="Q" select="'"'"/> +<xsl:variable name="TS" select="'<'"/> +<xsl:variable name="TSc" select="'</'"/> +<xsl:variable name="TE" select="'>'"/> +<xsl:variable name="TEc" select="'/>'"/> + +<xsl:template name="comment"> + <xsl:param name="text" select="''"/> + <xsl:param name="indent" select="''"/> + <xsl:if test="$indent != 'none'"> + <xsl:value-of select="concat($init-indent, $indent)"/> + </xsl:if> + <xsl:value-of select="concat($TS, '!-- ', $text, ' --',$TE)"/> +</xsl:template> + +<xsl:template name="tag-start"> + <xsl:param name="name"/> + <xsl:param name="attrs" select="''"/> + <xsl:param name="indent" select="''"/> + <xsl:if test="$indent != 'none'"> + <xsl:value-of select="concat($init-indent, $indent)"/> + </xsl:if> + <xsl:value-of select="concat($TS, $name)"/> + <xsl:if test="$attrs != ''"> + <xsl:value-of select="concat($SP, $attrs)"/> + </xsl:if> + <xsl:value-of select="$TE"/> +</xsl:template> + +<xsl:template name="tag-end"> + <xsl:param name="name"/> + <xsl:param name="attrs" select="''"/> + <xsl:param name="indent" select="''"/> + <xsl:if test="$indent != 'none'"> + <xsl:value-of select="concat($init-indent, $indent)"/> + </xsl:if> + <xsl:value-of select="concat($TSc, $name)"/> + <xsl:if test="$attrs != ''"> + <xsl:value-of select="concat($SP, $attrs)"/> + </xsl:if> + <xsl:value-of select="$TE"/> +</xsl:template> + +<xsl:template name="tag-self"> + <xsl:param name="name"/> + <xsl:param name="attrs" select="''"/> + <xsl:param name="indent" select="''"/> + <xsl:if test="$indent != 'none'"> + <xsl:value-of select="concat($init-indent, $indent)"/> + </xsl:if> + <xsl:value-of select="concat($TS, $name)"/> + <xsl:if test="$attrs != ''"> + <xsl:value-of select="concat($SP, $attrs)"/> + </xsl:if> + <xsl:value-of select="$TEc"/> +</xsl:template> + +<xsl:template name="capitalize"> + <xsl:param name="value"/> + <xsl:value-of select="translate($value, + '_abcdefghijklmnopqrstuvwrxyz', + '-ABCDEFGHIJKLMNOPQRSTUVWRXYZ')"/> +</xsl:template> + + +<!-- + proceed + --> + +<xsl:template match="/resource-agent"> + <xsl:value-of select="$NL"/> + + <!-- define name=... (start) --> + <xsl:variable name="capitalized"> + <xsl:call-template name="capitalize"> + <xsl:with-param name="value" select="@name"/> + </xsl:call-template> + </xsl:variable> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'define'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, $capitalized, $Q)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- element name=... rha:description=... (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'element'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, @name, $Q, $SP, + 'rha:description=', $Q, normalize-space(shortdesc), $Q)"/> + <xsl:with-param name="indent" select="$indent"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- choice (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'choice'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- group (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'group'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- (comment) --> + <xsl:call-template name="comment"> + <xsl:with-param name="text"> + <xsl:text>rgmanager specific stuff</xsl:text> + </xsl:with-param> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- attribute name="ref" --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'attribute'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, 'ref', $Q, $SP, + 'rha:description=', $Q, 'Reference to existing ', + @name, ' resource in ', + 'the resources section.', $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- group (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'group'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- group (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'group'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <xsl:for-each select="parameters/parameter"> + <xsl:choose> + <xsl:when test="@required = '1' or @primary = '1'"> + <!-- attribute name=... rha:description=... --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'attribute'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, @name, $Q, $SP, + 'rha:description=', $Q, normalize-space(shortdesc), $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + </xsl:when> + <xsl:otherwise> + <!-- optional (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'optional'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- attribute name=... rha:description=... --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'attribute'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, @name, $Q, $SP, + 'rha:description=', $Q, normalize-space(shortdesc), $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- optional (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'optional'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + + <!-- group (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'group'"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- choice (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'choice'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <xsl:for-each select="document('')/*/int:common-optional-parameters/int:parameter"> + <!-- optional (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'optional'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- attribute name=... rha:description=... --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'attribute'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, @name, $Q, $SP, + 'rha:description=', $Q, normalize-space(int:shortdesc), $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- optional (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'optional'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + </xsl:for-each> + + <!-- interleave (start) --> + <xsl:call-template name="tag-start"> + <xsl:with-param name="name" select="'interleave'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- ref name="RESOURCEACTION" --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'ref'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, 'RESOURCEACTION', $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- ref name="CHILDREN" --> + <xsl:call-template name="tag-self"> + <xsl:with-param name="name" select="'ref'"/> + <xsl:with-param name="attrs" select="concat( + 'name=', $Q, 'CHILDREN', $Q)"/> + <xsl:with-param name="indent" select="concat($indent, $indent, + $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- interleave (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'interleave'"/> + <xsl:with-param name="indent" select="concat($indent, $indent)"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- element (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'element'"/> + <xsl:with-param name="indent" select="$indent"/> + </xsl:call-template> + <xsl:value-of select="$NL"/> + + <!-- define (end) --> + <xsl:call-template name="tag-end"> + <xsl:with-param name="name" select="'define'"/> + </xsl:call-template> + <xsl:value-of select="$NLNL"/> + +</xsl:template> + +</xsl:stylesheet> diff --git a/rgmanager/src/resources/resources.rng.head b/rgmanager/src/resources/resources.rng.head new file mode 100644 index 0000000..49db15b --- /dev/null +++ b/rgmanager/src/resources/resources.rng.head @@ -0,0 +1 @@ +<!-- Autogenerated resources definitions --> diff --git a/rgmanager/src/resources/resources.rng.mid b/rgmanager/src/resources/resources.rng.mid new file mode 100644 index 0000000..63ee46d --- /dev/null +++ b/rgmanager/src/resources/resources.rng.mid @@ -0,0 +1,20 @@ + <define name="RESOURCEACTION"> + <zeroOrMore> + <element name="action" rha:description="Overrides resource action timings for a resource instance."> + <attribute name="name" rha:description="Name of resource action (start, stop, status, etc.)."/> + <optional> + <attribute name="depth" rha:description="Status check depth (resource agent dependent; * = all depths)."/> + </optional> + <optional> + <attribute name="interval" rha:description="Status check interval."/> + </optional> + <optional> + <attribute name="timeout" rha:description="Action timeout. Meaningless unless __enforce_timeouts is set for this resource."/> + </optional> + </element> + </zeroOrMore> + </define> + + <define name="CHILDREN"> + <zeroOrMore> + <choice> diff --git a/rgmanager/src/resources/resources.rng.tail b/rgmanager/src/resources/resources.rng.tail new file mode 100644 index 0000000..d726579 --- /dev/null +++ b/rgmanager/src/resources/resources.rng.tail @@ -0,0 +1,6 @@ + + </choice> + </zeroOrMore> + </define> + +<!-- End autogenerated resources definitions --> diff --git a/rgmanager/src/resources/samba.metadata b/rgmanager/src/resources/samba.metadata new file mode 100644 index 0000000..1e03fcd --- /dev/null +++ b/rgmanager/src/resources/samba.metadata @@ -0,0 +1,89 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="samba"> + <version>1.0</version> + + <longdesc lang="en"> + Dynamic smbd/nmbd resource agent + </longdesc> + <shortdesc lang="en"> + Dynamic smbd/nmbd resource agent + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + Samba Symbolic Name. + </longdesc> + <shortdesc lang="en"> + Samba Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/etc/samba/smb.conf"/> + </parameter> + + <parameter name="smbd_options"> + <longdesc lang="en"> + Other command-line options for smbd + </longdesc> + <shortdesc lang="en"> + Other command-line options for smbd + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="nmbd_options"> + <longdesc lang="en"> + Other command-line options for nmbd + </longdesc> + <shortdesc lang="en"> + Other command-line options for nmbd + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this smb service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- This is just a wrapper for LSB init scripts, so monitor + and status can't have a timeout, nor do they do any extra + work regardless of the depth --> + <action name="status" interval="30s" timeout="0"/> + <action name="monitor" interval="30s" timeout="0"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> +</resource-agent> diff --git a/rgmanager/src/resources/samba.sh.in b/rgmanager/src/resources/samba.sh.in new file mode 100644 index 0000000..9b7216d --- /dev/null +++ b/rgmanager/src/resources/samba.sh.in @@ -0,0 +1,241 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare SAMBA_SMBD=/usr/sbin/smbd +declare SAMBA_NMBD=/usr/sbin/nmbd +declare SAMBA_pid_dir="`generate_name_for_pid_dir`" +declare SAMBA_conf_dir="`generate_name_for_conf_dir`" +declare SAMBA_smbd_pid_file="$SAMBA_pid_dir/smbd-smb.conf.pid" +declare SAMBA_nmbd_pid_file="$SAMBA_pid_dir/nmbd-smb.conf.pid" +declare SAMBA_gen_config_file="$SAMBA_conf_dir/smb.conf" + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + declare ip_addresses="$3" + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + generate_configTemplate "$generated_file" "$1" + + echo "pid directory = \"$SAMBA_pid_dir\"" >> "$generated_file" + echo "interfaces = $ip_addresses" >> "$generated_file" + echo "bind interfaces only = Yes" >> "$generated_file" + echo "netbios name = ${OCF_RESKEY_name/ /_}" >> "$generated_file" + echo >> "$generated_file" + sed 's/^[[:space:]]*pid directory/### pid directory/i;s/^[[:space:]]*interfaces/### interfaces/i;s/^[[:space:]]*bind interfaces only/### bind interfaces only/i;s/^[[:space:]]*netbios name/### netbios name/i' \ + < "$original_file" >> "$generated_file" + + sha1_addToFile "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + clog_service_start $CLOG_INIT + + create_pid_directory + mkdir -p "$SAMBA_pid_dir" + create_conf_directory "$SAMBA_conf_dir" + check_pid_file "$SAMBA_smbd_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$SAMBA_smbd_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + check_pid_file "$SAMBA_nmbd_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$SAMBA_nmbd_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + generate_config_file "$OCF_RESKEY_config_file" "$SAMBA_gen_config_file" "$ip_addresses" + + $SAMBA_SMBD -D -s "$SAMBA_gen_config_file" $OCF_RESKEY_smbd_options + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + $SAMBA_NMBD -D -s "$SAMBA_gen_config_file" $OCF_RESKEY_nmbd_options + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$SAMBA_smbd_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + stop_generic "$SAMBA_nmbd_pid_file" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + if [ -e "$SAMBA_smbd_pid_file" ]; then + rm -f "$SAMBA_smbd_pid_file" + fi + + if [ -e "$SAMBA_nmbd_pid_file" ]; then + rm -f "$SAMBA_nmbd_pid_file" + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$SAMBA_smbd_pid_file" + + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$SAMBA_smbd_pid_file" + return $OCF_ERR_GENERIC + fi + + status_check_pid "$SAMBA_nmbd_pid_file" + + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$SAMBA_nmbd_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/script.sh.in b/rgmanager/src/resources/script.sh.in new file mode 100644 index 0000000..88e8315 --- /dev/null +++ b/rgmanager/src/resources/script.sh.in @@ -0,0 +1,171 @@ +#!@BASH_SHELL@ + +# +# Script to handle a non-OCF script (e.g. a normal init-script) +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +. $(dirname $0)/ocf-shellfuncs + +meta_data() +{ + cat <<EOT +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="script"> + <version>1.0</version> + + <longdesc lang="en"> + The script resource allows a standard LSB-compliant init script + to be used to start a clustered service. + </longdesc> + <shortdesc lang="en"> + LSB-compliant init script as a clustered resource. + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + Name + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="file" unique="1" required="1"> + <longdesc lang="en"> + Path to script + </longdesc> + <shortdesc lang="en"> + Path to script + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name, in case the + script wants to know this information. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- This is just a wrapper for LSB init scripts, so monitor + and status can't have a timeout, nor do they do any extra + work regardless of the depth --> + <action name="status" interval="30s" timeout="0"/> + <action name="monitor" interval="30s" timeout="0"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> +</resource-agent> +EOT +} + +validate_all() +{ + if [ -z "${OCF_RESKEY_file}" ]; then + ocf_log err "No file provided" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if ! [ -e "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} does not exist" + return $OCF_ERR_INSTALLED # Program not installed + fi + + if [ -b "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is a block device" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if [ -d "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is a directory" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if [ -c "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is a character device" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if [ -p "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is a named pipe" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if [ -S "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is a socket" + return $OCF_ERR_ARGS # Invalid Argument + fi + + if ! [ -s "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is empty" + return $OCF_ERR_GENERIC # ??? + fi + + if ! [ -x "${OCF_RESKEY_file}" ]; then + ocf_log err "${OCF_RESKEY_file} is not executable" + return $OCF_ERR_PERM + fi + + return 0 +} + +case $1 in + meta-data) + meta_data + exit 0 + ;; + validate-all) + validate_all + exit $? + ;; + *) + ;; +esac + + +validate_all || exit $? + +# Execute the script +ocf_log info "Executing ${OCF_RESKEY_file} $1" +${OCF_RESKEY_file} $1 + +declare -i rv=$? +if [ $rv -ne 0 ]; then + ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)" + exit $OCF_ERR_GENERIC +fi diff --git a/rgmanager/src/resources/service.sh.in b/rgmanager/src/resources/service.sh.in new file mode 100644 index 0000000..5fd9b5b --- /dev/null +++ b/rgmanager/src/resources/service.sh.in @@ -0,0 +1,300 @@ +#!@BASH_SHELL@ + +# +# Dummy OCF script for resource group +# +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +fi + +meta_data() +{ + cat <<EOT +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="service"> + <version>1.0</version> + + <longdesc lang="en"> + This defines a collection of resources, known as a resource + group or cluster service. + </longdesc> + <shortdesc lang="en"> + Defines a service (resource group). + </shortdesc> + + <parameters> + <parameter name="name" unique="1" required="1" primary="1"> + <longdesc lang="en"> + This is the name of the resource group. + </longdesc> + <shortdesc lang="en"> + Name. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="domain" reconfig="1"> + <longdesc lang="en"> + Failover domains define lists of cluster members + to try in the event that a resource group fails. + </longdesc> + <shortdesc lang="en"> + Failover domain. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="autostart" reconfig="1"> + <longdesc lang="en"> + If set to yes, this resource group will automatically be started + after the cluster forms a quorum. If set to no, this resource + group will start in the 'disabled' state after the cluster forms + a quorum. + </longdesc> + <shortdesc lang="en"> + Automatic start after quorum formation + </shortdesc> + <content type="boolean" default="1"/> + </parameter> + + <parameter name="exclusive" reconfig="1"> + <longdesc lang="en"> + If set, this resource group will only relocate to + nodes which have no other resource groups running in the + event of a failure. If no empty nodes are available, + this resource group will not be restarted after a failure. + Additionally, resource groups will not automatically + relocate to the node running this resource group. This + option can be overridden by manual start and/or relocate + operations. + </longdesc> + <shortdesc lang="en"> + Exclusive service. + </shortdesc> + <content type="boolean" default="0"/> + </parameter> + + <parameter name="nfslock"> + <longdesc lang="en"> + Enable NFS lock workarounds. When used with a compatible + HA-callout program like clunfslock, this could be used + to provide NFS lock failover, but at significant cost to + other services on the machine. This requires a compatible + version of nfs-utils and manual configuration of rpc.statd; + see 'man rpc.statd' to see if your version supports + the -H parameter. + </longdesc> + <shortdesc lang="en"> + Enable NFS lock workarounds. + </shortdesc> + <content type="boolean" default="0"/> + </parameter> + + <parameter name="nfs_client_cache"> + <longdesc lang="en"> + On systems with large numbers of exports, a performance + problem in the exportfs command can cause inordinately long + status check times for services with lots of mounted + NFS clients. This occurs because exportfs does DNS queries + on all clients in the export list. + + Setting this option to '1' will enable caching of the export + list returned from the exportfs command on a per-service + basis. The cache will last for 30 seconds before expiring + instead of being generated each time an nfsclient resource + is called. + </longdesc> + <shortdesc lang="en"> + Enable exportfs list caching (performance). + </shortdesc> + <content type="integer" default="0"/> + </parameter> + + + <parameter name="recovery" reconfig="1"> + <longdesc lang="en"> + This currently has three possible options: "restart" tries + to restart failed parts of this resource group locally before + attempting to relocate (default); "relocate" does not bother + trying to restart the service locally; "disable" disables + the resource group if any component fails. Note that + any resource with a valid "recover" operation which can be + recovered without a restart will be. + </longdesc> + <shortdesc lang="en"> + Failure recovery policy (restart, relocate, or disable). + </shortdesc> + <content type="string" default="restart"/> + </parameter> + + <parameter name="depend"> + <longdesc lang="en"> + Service dependency; will not start without the specified + service running. + </longdesc> + <shortdesc lang="en"> + Top-level service this depends on, in service:name format. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="depend_mode"> + <longdesc lang="en"> + Service dependency mode. + hard - This service is stopped/started if its dependency + is stopped/started + soft - This service only depends on the other service for + initial startip. If the other service stops, this + service is not stopped. + </longdesc> + <shortdesc lang="en"> + Service dependency mode (soft or hard). + </shortdesc> + <content type="string" default="hard"/> + </parameter> + + <parameter name="max_restarts"> + <longdesc lang="en"> + Maximum restarts for this service. + </longdesc> + <shortdesc lang="en"> + Maximum restarts for this service. + </shortdesc> + <content type="string" default="0"/> + </parameter> + + <parameter name="restart_expire_time"> + <longdesc lang="en"> + Restart expiration time. A restart is forgotten + after this time. When combined with the max_restarts + option, this lets administrators specify a threshold + for when to fail over services. If max_restarts + is exceeded in this given expiration time, the service + is relocated instead of restarted again. + </longdesc> + <shortdesc lang="en"> + Restart expiration time; amount of time before a restart + is forgotten. + </shortdesc> + <content type="string" default="0"/> + </parameter> + + <parameter name="priority"> + <longdesc lang="en"> + Priority for the service. In a failover scenario, this + indicates the ordering of the service (1 is processed + first, 2 is processed second, etc.). This overrides the + order presented in cluster.conf. This option only has + an effect if central processing within rgmanager is turned + on. + </longdesc> + <shortdesc lang="en"> + Service priority. + </shortdesc> + <content type="integer" default="0"/> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="5"/> + <action name="stop" timeout="5"/> + + <!-- No-ops. Groups are abstract resource types. + <action name="status" timeout="5" interval="1h"/> + <action name="monitor" timeout="5" interval="1h"/> + --> + + <action name="reconfig" timeout="5"/> + <action name="recover" timeout="5"/> + <action name="reload" timeout="5"/> + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + </actions> + + <special tag="rgmanager"> + <attributes maxinstances="1"/> + <child type="lvm" start="1" stop="9"/> + <child type="fs" start="2" stop="8"/> + <child type="clusterfs" start="3" stop="7"/> + <child type="netfs" start="4" stop="6"/> + <child type="nfsexport" start="5" stop="5"/> + + <child type="nfsclient" start="6" stop="4"/> + + <child type="ip" start="7" stop="2"/> + <child type="smb" start="8" stop="3"/> + <child type="script" start="9" stop="1"/> + </special> +</resource-agent> +EOT +} + + +# +# A Resource group is abstract, but the OCF RA API doesn't allow for abstract +# resources, so here it is. +# +case $1 in + start) + # + # XXX If this is set, we kill lockd. If there is no + # child IP address, then clients will NOT get the reclaim + # notification. + # + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + pkill -KILL -x lockd + fi + fi + exit 0 + ;; + stop) + exit 0 + ;; + recover|restart) + exit 0 + ;; + status|monitor) + exit 0 + ;; + reload) + exit 0 + ;; + meta-data) + meta_data + exit 0 + ;; + validate-all) + exit 0 + ;; + reconfig) + exit 0 + ;; + *) + exit 0 + ;; +esac diff --git a/rgmanager/src/resources/smb.sh.in b/rgmanager/src/resources/smb.sh.in new file mode 100644 index 0000000..f2d566f --- /dev/null +++ b/rgmanager/src/resources/smb.sh.in @@ -0,0 +1,698 @@ +#!@BASH_SHELL@ + +# +# Script to manage a Samba file-sharing service component. +# Unline NFS, this should be placed at the top level of a service +# because it will try to gather information necessary to run the +# smbd/nmbd daemons at run-time from the service structure. +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Author(s): +# Lon Hohberger (lhh at redhat.com) +# Tim Burke (tburke at redhat.com) +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +# +# Definitions! +# +declare SAMBA_CONFIG_DIR=/etc/samba +declare SMBD_COMMAND=/usr/sbin/smbd +declare NMBD_COMMAND=/usr/sbin/nmbd +declare KILLALL_COMMAND=/usr/bin/killall +declare SAMBA_PID_DIR=/var/run/samba +declare SAMBA_LOCK_DIR=/var/cache/samba + +# +# gross globals +# +declare -a ipkeys +declare -a fskeys + +# Don't change please :) +_FAIL=255 + +. $(dirname $0)/ocf-shellfuncs + +meta_data() +{ + cat <<EOT +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="smb"> + <version>1.0</version> + + <longdesc lang="en"> + Dynamic smbd/nmbd resource agent + </longdesc> + <shortdesc lang="en"> + Dynamic smbd/nmbd resource agent + </shortdesc> + + <parameters> + <parameter name="name" unique="1" primary="1"> + <longdesc lang="en"> + Samba Symbolic Name. This name will + correspond to /etc/samba/smb.conf.NAME + </longdesc> + <shortdesc lang="en"> + Samba Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="workgroup"> + <longdesc lang="en"> + Workgroup name + </longdesc> + <shortdesc lang="en"> + Workgroup name + </shortdesc> + <content type="string" default="LINUXCLUSTER"/> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this smb service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- This is just a wrapper for LSB init scripts, so monitor + and status can't have a timeout, nor do they do any extra + work regardless of the depth --> + <action name="status" interval="30s" timeout="0"/> + <action name="monitor" interval="30s" timeout="0"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> +</resource-agent> +EOT +} + + +# +# Usage: ccs_get key +# +ccs_get() +{ + declare outp + declare key + + [ -n "$1" ] || return $_FAIL + + key="$*" + + outp=$(ccs_tool query "$key" 2>&1) + if [ $? -ne 0 ]; then + if [ "$outp" = "${outp/No data available/}" ] || [ "$outp" = "${outp/Operation not permitted/}" ]; then + ocf_log err "$outp ($key)" + return $_FAIL + fi + + # no real error, just no data available + return 0 + fi + + echo $outp + + return 0 +} + + +# +# Build a list of service IP keys; traverse refs if necessary +# +get_service_ip_keys() +{ + declare svc=$1 + declare -i x y=0 + declare outp + declare key + + # + # Find service-local IP keys + # + x=1 + while : ; do + key="/cluster/rm/service[@name=\"$svc\"]/ip[$x]" + + # + # Try direct method + # + outp=$(ccs_get "$key/@address") + if [ $? -ne 0 ]; then + return 1 + fi + + # + # Try by reference + # + if [ -z "$outp" ]; then + outp=$(ccs_get "$key/@ref") + if [ $? -ne 0 ]; then + return 1 + fi + key="/cluster/rm/resources/ip[@address=\"$outp\"]" + fi + + if [ -z "$outp" ]; then + break + fi + + #ocf_log debug "IP $outp found @ $key" + + ipkeys[$y]="$key" + + ((y++)) + ((x++)) + done + + ocf_log debug "$y IP addresses found for $svc/$OCF_RESKEY_name" + + return 0 +} + + +# +# Build a list of service fs keys, traverse refs if necessary +# +get_service_fs_keys() +{ + declare svc=$1 + declare -i x y=0 + declare outp + declare key + + # + # Find service-local IP keys + # + x=1 + while : ; do + key="/cluster/rm/service[@name=\"$svc\"]/fs[$x]" + + # + # Try direct method + # + outp=$(ccs_get "$key/@name") + if [ $? -ne 0 ]; then + return 1 + fi + + # + # Try by reference + # + if [ -z "$outp" ]; then + outp=$(ccs_get "$key/@ref") + if [ $? -ne 0 ]; then + return 1 + fi + key="/cluster/rm/resources/fs[@name=\"$outp\"]" + fi + + if [ -z "$outp" ]; then + break + fi + + #ocf_log debug "filesystem $outp found @ $key" + + fskeys[$y]="$key" + + ((y++)) + ((x++)) + done + + ocf_log debug "$y filesystems found for $svc/$OCF_RESKEY_name" + + return 0 +} + + +build_ip_list() +{ + declare ipaddrs ipaddr + declare -i x=0 + + while [ -n "${ipkeys[$x]}" ]; do + ipaddr=$(ccs_get "${ipkeys[$x]}/@address") + if [ -z "$ipaddr" ]; then + break + fi + + ipaddrs="$ipaddrs $ipaddr" + + ((x++)) + done + + echo $ipaddrs +} + + +add_sha1() +{ + declare sha1line="# rgmanager-sha1 $(sha1sum "$1")" + echo $sha1line >> "$1" +} + + +verify_sha1() +{ + declare tmpfile="$(mktemp /tmp/smb-$OCF_RESKEY_name.tmp.XXXXXX)" + declare current exp + + exp=$(grep "^# rgmanager-sha1.*$1" "$1" | head -1) + if [ -z "$exp" ]; then + # No sha1 line. We're done. + ocf_log debug "No SHA1 info in $1" + return 1 + fi + + # + # Find expected sha1 and expected file name + # + exp=${exp/*sha1 /} + exp=${exp/ */} + + grep -v "^# rgmanager-sha1" "$1" > "$tmpfile" + current=$(sha1sum "$tmpfile") + current=${current/ */} + + rm -f "$tmpfile" + + if [ "$current" = "$exp" ]; then + ocf_log debug "SHA1 sum matches for $1" + return 0 + fi + ocf_log debug "SHA1 sum does not match for $1" + return 1 +} + + +add_fs_entries() +{ + declare conf="$1" + declare sharename + declare sharepath key + + declare -i x=0 + + while [ -n "${fskeys[$x]}" ]; do + key="${fskeys[$x]}/@name" + + sharename=$(ccs_get "$key") + if [ -z "$sharename" ]; then + break + fi + + key="${fskeys[$x]}/@mountpoint" + sharepath=$(ccs_get "$key") + if [ -z "$sharepath" ]; then + break + fi + + cat >> "$conf" <<EODEV +[$sharename] + comment = Auto-generated $sharename share + # Hide the secret cluster files + veto files = /.clumanager/.rgmanager/ + browsable = yes + writable = no + public = yes + path = $sharepath + +EODEV + + ((x++)) + done +} + + +# +# Generate the samba configuration if neede for this service. +# +gen_smb_conf() +{ + declare conf="$1" + declare lvl="debug" + + if [ -f "$conf" ]; then + verify_sha1 "$conf" + if [ $? -ne 0 ]; then + ocf_log debug "Config file changed; skipping" + return 0 + fi + else + lvl="info" + fi + + ocf_log $lvl "Creating $conf" + + get_service_ip_keys "$OCF_RESKEY_service_name" + get_service_fs_keys "$OCF_RESKEY_service_name" + + cat > "$conf" <<EOT +# +# "$conf" +# +# This template configuration wass automatically generated, and will +# be automatically regenerated if removed. Please modify this file to +# speficy subdirectories and/or client access permissions. +# +# Once this file has been altered, automatic re-generation will stop. +# Remember to copy this file to all other cluster members after making +# changes, or your SMB service will not operate correctly. +# +# From a cluster perspective, the key fields are: +# lock directory - must be unique per samba service. +# bind interfaces only - must be present set to yes. +# interfaces - must be set to service floating IP address. +# path - must be the service mountpoint or subdirectory thereof. +# + +[global] + workgroup = $OCF_RESKEY_workgroup + pid directory = /var/run/samba/$OCF_RESKEY_name + lock directory = /var/cache/samba/$OCF_RESKEY_name + log file = /var/log/samba/%m.log + #private dir = /var/ + encrypt passwords = yes + bind interfaces only = yes + netbios name = ${OCF_RESKEY_name/ /_} + + # + # Interfaces are based on ip resources at the top level of + # "$OCF_RESKEY_service_name"; IPv6 addresses may or may not + # work correctly. + # + interfaces = $(build_ip_list) + +# +# Shares based on fs resources at the top level of "$OCF_RESKEY_service_name" +# +EOT + add_fs_entries "$conf" + add_sha1 "$conf" + + return 0 +} + + +# +# Kill off the specified PID +# (from clumanager 1.0.x/1.2.x) +# +# Killing off the samba daemons was miserable to implement, merely +# because killall doesn't distinguish by program commandline. +# Consequently I had to implement these routines to selectively pick 'em off. +# +# Kills of either the {smbd|nmbd} which is running and was started with +# the specified argument. Can't use `killall` to do this because it +# doesn't allow you to distinguish which process to kill based on any +# of the program arguments. +# +# This routine is also called on "status" checks. In this case it doesn't +# actually kill anything. +# +# Parameters: +# daemonName - daemon name, can be either smbd or nmbd +# command - [stop|start|status] +# arg - argument passed to daemon. In this case its not the +# full set of program args, rather its really just the +# samba config file. +# +# Returns: 0 - success (or the daemon isn't currently running) +# 1 - failure +# +kill_daemon_by_arg() +{ + declare daemonName=$1 + declare action=$2 + declare arg=$3 + # Create a unique temporary file to stash off intermediate results + declare tmpfile_str=/tmp/sambapids.XXXXXX + declare tmpfile + declare ret + + tmpfile=$(mktemp $tmpfile_str); ret_val=$? + + if [ -z "$tmpfile" ]; then + ocf_log err "kill_daemon_by_arg: Can't create tmp file" + return $_FAIL + fi + + # Mumble, need to strip off the /etc/samba portion, otherwise the + # grep pattern matching will fail. + declare confFile="$(basename $arg)" + + # First generate a list of candidate pids. + pidof $daemonName > $tmpfile + if [ $? -ne 0 ]; then + ocf_log debug "kill_daemon_by_arg: no pids for $daemonName" + rm -f $tmpfile + case "$action" in + 'stop') + return 0 + ;; + 'status') + return $_FAIL + ;; + esac + return 0 + fi + + # If you don't find any matching daemons for a "stop" operation, thats + # considered success; whereas for "status" inquiries its a failure. + case "$action" in + 'stop') + ret=0 + ;; + 'status') + ret=$_FAIL + ;; + esac + # + # At this point tmpfile contains a set of pids for the corresponding + # {smbd|nmbd}. Now look though this candidate set of pids and compare + # the program arguments (samba config file name). This distinguishes + # which ones should be killed off. + # + declare daemonPid="" + for daemonPid in $(cat $tmpfile); do + declare commandLine=$(cat /proc/$daemonPid/cmdline) + declare confBase="$(basename $commandLine)" + if [ "$confBase" = "$confFile" ]; then + case "$action" in + 'status') + rm -f $tmpfile + return 0 + ;; + esac + kill_daemon_pid $daemonPid + if [ $? -ne 0 ]; then + ret=$_FAIL + ocf_log err \ + "kill_daemon_by_arg: kill_daemon_pid $daemonPid failed" + else + ocf_log debug \ + "kill_daemon_by_arg: kill_daemon_pid $daemonPid success" + fi + fi + done + rm -f $tmpfile + return $ret +} + + +# +# Kill off the specified PID +# (from clumanager 1.0.x/1.2.x) +# +kill_daemon_pid() +{ + declare pid=$1 + declare retval=0 + + + kill -TERM $pid + if [ $? -eq 0 ]; then + ocf_log debug "Samba: successfully killed $pid" + else + ocf_log debug "Samba: failed to kill $pid" + retval=$_FAIL + fi + return $retval +} + + +share_start_stop() +{ + declare command=$1 + declare conf="$SAMBA_CONFIG_DIR/smb.conf.$OCF_RESKEY_name" + declare smbd_command + declare nmbd_command + declare netbios_name + + # + # Specify daemon options + # -D = spawn off as separate daemon + # -s = the following arg specifies the config file + # + declare smbd_options="-D -s" + declare nmbd_options="-D -s" + + if [ "$command" = "start" ]; then + gen_smb_conf "$conf" + else + if ! [ -f "$conf" ]; then + ocf_log warn "\"$conf\" missing during $command" + fi + fi + + # + # On clusters with multiple samba shares, we need to ensure (as much + # as possible) that each service is advertised as a separate netbios + # name. + # + # Generally, the admin sets this in smb.conf.NAME - but since + # it is not required, we need another option. Consequently, we use + # smb instance name (which must be unique) + # + if [ -f "$conf" ]; then + grep -qe "^\([[:space:]]\+n\|n\)etbios[[:space:]]\+name[[:space:]]*=[[:space:]]*[[:alnum:]]\+" "$conf" + if [ $? -ne 0 ]; then + + netbios_name=$OCF_RESKEY_name + + ocf_log notice "Using $netbios_name as NetBIOS name (service $OCF_RESKEY_service_name)" + nmbd_options=" -n $netbios_name $nmbd_options" + fi + fi + + case $command in + start) + ocf_log info "Starting Samba instance \"$OCF_RESKEY_name\"" + mkdir -p "$SAMBA_PID_DIR/$OCF_RESKEY_name" + mkdir -p "$SAMBA_LOCK_DIR/$OCF_RESKEY_name" + + [ -f "$SMBD_COMMAND" ] || exit $OCF_ERR_INSTALLED + [ -f "$NMBD_COMMAND" ] || exit $OCF_ERR_INSTALLED + + # Kick off the per-service smbd + $SMBD_COMMAND $smbd_options "$conf" + ret_val=$? + if [ $ret_val -ne 0 ]; then + ocf_log err "Samba service failed: $SMBD_COMMAND $smbd_options \"$conf\"" + return $_FAIL + fi + ocf_log debug "Samba service succeeded: $SMBD_COMMAND $smbd_options \"$conf\"" + + # Kick off the per-service nmbd + $NMBD_COMMAND $nmbd_options "$conf" + ret_val=$? + if [ $ret_val -ne 0 ]; then + ocf_log err "Samba service failed: $NMBD_COMMAND $nmbd_options \"$conf\"" + return $_FAIL + fi + ocf_log debug "Samba service succeeded: $NMBD_COMMAND $nmbd_options \"$conf\"" + ;; + stop) + ocf_log info "Stopping Samba instance \"$OCF_RESKEY_name\"" + + kill_daemon_by_arg "nmbd" $command "$conf" + kill_daemon_by_arg "smbd" $command "$conf" + if [ "$SAMBA_PID_DIR/$OCF_RESKEY_name" != "/" ]; then + pushd "$SAMBA_PID_DIR" &> /dev/null + rm -rf "$OCF_RESKEY_name" + popd &> /dev/null + fi + if [ "$SAMBA_LOCK_DIR/$OCF_RESKEY_name" != "/" ]; then + pushd "$SAMBA_LOCK_DIR" &> /dev/null + rm -rf "$OCF_RESKEY_name" + popd &> /dev/null + fi + ;; + status) + ocf_log debug "Checking Samba instance \"$OCF_RESKEY_name\"" + kill_daemon_by_arg "nmbd" $command "$conf" + if [ $? -ne 0 ]; then + ocf_log err \ + "share_start_stop: nmbd for service $svc_name died!" + return $_FAIL + fi + kill_daemon_by_arg "smbd" $command "$conf" + if [ $? -ne 0 ]; then + ocf_log err \ + "share_start_stop: nmbd for service $svc_name died!" + return $_FAIL + fi + ;; + esac +} + + +verify_all() +{ + [ -z "$OCF_RESKEY_workgroup" ] && export OCF_RESKEY_workgroup="LINUXCLUSTER" + [ -n "${OCF_RESKEY_name}" ] || exit $OCF_ERR_ARGS # Invalid Argument + if [ -z "${OCF_RESKEY_service_name}" ]; then + ocf_log ERR "Samba service ${OCF_RESKEY_name} is not the child of a service" + exit $OCF_ERR_ARGS + fi +} + +case $1 in + meta-data) + meta_data + exit 0 + ;; + start|stop) + verify_all + share_start_stop $1 + exit $? + ;; + status|monitor) + verify_all + share_start_stop status + exit $? + ;; + validate-all) + verify_all + echo "Yer radio's workin', driver!" + exit 0 + ;; + *) + echo "usage: $0 {start|stop|status|monitor|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + diff --git a/rgmanager/src/resources/svclib_nfslock.in b/rgmanager/src/resources/svclib_nfslock.in new file mode 100644 index 0000000..86efd07 --- /dev/null +++ b/rgmanager/src/resources/svclib_nfslock.in @@ -0,0 +1,281 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# Do reclaim-broadcasts when we kill lockd during shutdown/startup +# of a cluster service. +# +# Exported functions: +# +# notify_list_store +# notify_list_merge +# notify_list_broadcast +# + +# +# Usage: +# statd_notify <directory> <hostname|ip> +# +# Copy out a list from <directory>, merge them with the system nfs lock +# list, and send them out as <hostname|ip> after generating a random +# state (needed so clients will reclaim their locks) +# +nfslock_statd_notify() +{ + declare tmpdir + declare nl_dir=$1 + declare nl_ip=$2 + declare command # Work around bugs in rpc.statd + declare pid_xxx # Work around bugs in rpc.statd + declare owner + + [ -z "$lockd_pid" ] && return 0 + if ! [ -d $nl_dir ]; then + return 0 + fi + + if [ -z "`ls $nl_dir/sm/* 2> /dev/null`" ]; then + ocf_log debug "No hosts to notify" + return 0 + fi + + tmpdir=$(mktemp -d /tmp/statd-$2.XXXXXX) + + # Ok, copy the HA directory to something we can use. + mkdir -p $tmpdir/sm + + # Copy in our specified entries + cp -f $nl_dir/sm/* $tmpdir/sm + + # Copy in our global entries + # XXX This might be what we just copied. + + if [ -d "/var/lib/nfs/statd/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -f /var/lib/nfs/statd/sm/* $tmpdir/sm + elif [ -d "/var/lib/nfs/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -f /var/lib/nfs/sm/* $tmpdir/sm + fi + + # + # Generate a random state file. If this ends up being what a client + # already has in its list, that's bad, but the chances of this + # are small - and relocations should be rare. + # + dd if=/dev/urandom of=$tmpdir/state bs=1 count=4 &> /dev/null + + # + # Make sure we set permissions, or statd will not like it. + # + chown -R $owner $tmpdir + + # + # Tell rpc.statd to notify clients. Don't go into background, + # because statd is buggy and won't exit like it's supposed to after + # sending the notifications out. + # + ocf_log info "Sending reclaim notifications via $nl_ip" + command="rpc.statd -NFP $tmpdir -n $nl_ip" + eval $command 2>&1 & + sleep 3 # XXX - the instance of rpc.statd we just spawned is supposed + # to exit after it finishes notifying clients. + # rpc.statd spawned which is still running handles the actual + # new SM_MON requests... we hope 3 seconds is enough time + # to get all the SM_NOTIFY messages out. rpc.statd = bugged + # + # clean up + # + pid_xxx=`ps auwwx | grep "$command" | grep -v grep | awk '{print $2}'` + kill $pid_xxx + rm -rf $tmpdir + + return 0 +} + + +# +# Copy of isSlave from svclib_ip and/or ip.sh +# +nfslock_isSlave() +{ + declare intf=$1 + declare line + + if [ -z "$intf" ]; then + ocf_log err "usage: isSlave <I/F>" + return 1 + fi + + line=$(/sbin/ip link list dev $intf) + if [ $? -ne 0 ]; then + ocf_log err "$intf not found" + return 1 + fi + + if [ "$line" = "${line/<*SLAVE*>/}" ]; then + return 2 + fi + + # Yes, it is a slave device. Ignore. + return 0 +} + + +# +# Get all the IPs on the system except loopback IPs +# +nfslock_ip_address_list() +{ + declare idx dev family ifaddr + + while read idx dev family ifaddr; do + + if [ "$family" != "inet" ] && [ "$family" != "inet6" ]; then + continue + fi + + if [ "$dev" = "lo" ]; then + # Ignore loopback + continue + fi + + nfslock_isSlave $dev + if [ $? -ne 2 ]; then + continue + fi + + idx=${idx/:/} + + echo $dev $family ${ifaddr/\/*/} ${ifaddr/*\//} + + done < <(/sbin/ip -o addr list | awk '{print $1,$2,$3,$4}') + + return 0 +} + + +# +# Usage: broadcast_notify <state_directory> +# +# Send the contents of <state_directory> out via all IPs on the system. +# +notify_list_broadcast() +{ + declare dev family addr maskbits ip_name + declare lockd_pid=$(pidof lockd) + declare nl_dir=$1 + + # First of all, send lockd a SIGKILL. We hope nfsd is running. + # If it is, this will cause lockd to reset the grace period for + # lock reclaiming. + if [ -n "$lockd_pid" ]; then + ocf_log info "Asking lockd to drop locks (pid $lockd_pid)" + kill -9 $lockd_pid + else + ocf_log warning "lockd not running; cannot notify clients" + return 1 + fi + + while read dev family addr maskbits; do + if [ "$family" != "inet" ]; then + continue + fi + + ip_name=$(clufindhostname -i $addr) + if [ -z "$ip_name" ]; then + nfslock_statd_notify $nl_dir $addr + else + nfslock_statd_notify $nl_dir $ip_name + fi + + done < <(nfslock_ip_address_list) +} + + +# +# Store the lock monitor list from rpc.statd - do this during a teardown +# after the IP addresses of a service have been taken offline. Note that +# this should be done by HA-callout programs, but this feature is not in +# RHEL3. +# +notify_list_store() +{ + declare nl_dir=$1 + declare owner + + mkdir -p $nl_dir/sm + + if [ -d "/var/lib/nfs/statd/sm" ]; then + if [ -z "`ls /var/lib/nfs/statd/sm/* 2> /dev/null`" ]; then + return 1 + # nothing to do! + fi + + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -Rdpf /var/lib/nfs/statd/sm/* $nl_dir/sm + chown -R $owner $nl_dir + return 0 + elif [ -d "/var/lib/nfs/sm" ]; then + if [ -z "`ls /var/lib/nfs/sm/* 2> /dev/null`" ]; then + return 1 + # nothing to do! + fi + + owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}') + cp -Rdpf /var/lib/nfs/sm/* $nl_dir/sm + chown -R $owner $nl_dir + return 0 + fi + + return 1 +} + + +# +# Merge the contents of <nl_dir>/sm with the system-wide list +# Make sure ownership is right, or statd will hiccup. This should not +# actually ever be needed because statd will, upon getting a SM_MON +# request, create all the entries in this list. It's mostly for +# housekeeping for next time we relocate the service. +# +notify_list_merge() +{ + declare nl_dir=$1 + declare owner + + if [ -z "`ls $nl_dir/* 2> /dev/null`" ]; then + return 1 + fi + + if [ -d "/var/lib/nfs/statd/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -Rdpf $nl_dir/sm/* /var/lib/nfs/statd/sm + chown -R $owner $nl_dir + return 0 + elif [ -d "/var/lib/nfs/sm" ]; then + owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}') + cp -Rdpf $nl_dir/sm/* /var/lib/nfs/sm + chown -R $owner $nl_dir + return 0 + fi + + return 1 +} + diff --git a/rgmanager/src/resources/tomcat-5.metadata b/rgmanager/src/resources/tomcat-5.metadata new file mode 100644 index 0000000..021bef7 --- /dev/null +++ b/rgmanager/src/resources/tomcat-5.metadata @@ -0,0 +1,104 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="tomcat-5"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of Tomcat server + </longdesc> + <shortdesc lang="en"> + Defines a Tomcat server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Specifies a service name for logging and other purposes + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/etc/tomcat5/tomcat5.conf"/> + </parameter> + + <parameter name="tomcat_user"> + <longdesc lang="en"> + User who runs the Tomcat server + </longdesc> + <shortdesc lang="en"> + User who runs the Tomcat server + </shortdesc> + <content type="string" default="tomcat" /> + </parameter> + + <parameter name="catalina_options"> + <longdesc lang="en"> + Other command-line options for Catalina + </longdesc> + <shortdesc lang="en"> + Other command-line options for Catalina + </shortdesc> + <content type="string" /> + </parameter> + + <parameter name="catalina_base"> + <longdesc lang="en"> + Cataliny base directory + </longdesc> + <shortdesc lang="en"> + Catalina base directory (differs for each service) + </shortdesc> + <content type="string" default="/usr/share/tomcat5" /> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" default="30" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/tomcat-5.sh.in b/rgmanager/src/resources/tomcat-5.sh.in new file mode 100644 index 0000000..c414343 --- /dev/null +++ b/rgmanager/src/resources/tomcat-5.sh.in @@ -0,0 +1,274 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare TOMCAT_TOMCAT=/usr/bin/dtomcat5 +declare TOMCAT_RELINK=/usr/share/tomcat5/bin/relink +declare TOMCAT_pid_file="`generate_name_for_pid_file`" +declare TOMCAT_conf_dir="`generate_name_for_conf_dir`/conf" +declare TOMCAT_gen_config_file="$TOMCAT_conf_dir/server.xml" +declare TOMCAT_gen_catalina_base="`generate_name_for_conf_dir`" + +declare JAVA_HOME +declare CATALINA_HOME +declare CATALINA_BASE +declare CATALINA_TMPDIR +declare CLASSPATH +declare TOMCAT_USER +## + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + . "$OCF_RESKEY_config_file" + + if [ $? -ne 0 ]; then + clog_service_verify $CLOG_FAILED "Error In The File \"$OCF_RESKEY_config_file\"" + return $OCF_ERR_ARGS + fi + + if [ -z "$JAVA_HOME" ]; then + clog_service_verify $CLOG_FAILED "JAVA_HOME Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$JAVA_HOME" ]; then + clog_service_verify $CLOG_FAILED "JAVA_HOME Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$JAVA_ENDORSED_DIRS" ]; then + clog_service_verify $CLOG_FAILED "JAVA_ENDORSED_DIRS Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$JAVA_ENDORSED_DIRS" ]; then + clog_service_verify $CLOG_FAILED "JAVA_ENDORSED_DIRS Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$CATALINA_HOME" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_HOME Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$CATALINA_HOME" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_HOME Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$CATALINA_TMPDIR" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$CATALINA_TMPDIR" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$TOMCAT_USER" ]; then + clog_service_verify $CLOG_FAILED "TOMCAT_USER Does Not Exist" + return $OCF_ERR_ARGS; + fi + + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + declare ip_addresses="$3" + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + generate_configTemplateXML "$generated_file" "$original_file" + $(dirname $0)/utils/tomcat-parse-config.pl $ip_addresses < "$original_file" >> "$generated_file" + + sha1_addToFileXML "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + clog_service_start $CLOG_INIT + + create_pid_directory + create_conf_directory "$TOMCAT_conf_dir" + check_pid_file "$TOMCAT_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$TOMCAT_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + generate_config_file "$OCF_RESKEY_catalina_base/conf/server.xml" "$TOMCAT_gen_config_file" "$ip_addresses" + ln -s "$OCF_RESKEY_catalina_base"/* "$TOMCAT_gen_catalina_base" &> /dev/null + ln -s "$OCF_RESKEY_catalina_base"/conf/* "$TOMCAT_gen_catalina_base"/conf &> /dev/null + + CLASSPATH="$JAVA_HOME"/lib/tools.jar:"$CATALINA_HOME"/bin/bootstrap.jar:"$CATALINA_HOME"/bin/commons-logging-api.jar:`/usr/bin/build-classpath mx4j/mx4j-impl`:`/usr/bin/build-classpath mx4j/mx4j-jmx` + + su "$TOMCAT_USER" -c " \"$JAVA_HOME/bin/java\" $JAVA_OPTS $OCF_RESKEY_catalina_options \ + -Djava.endorsed.dirs=\"$JAVA_ENDORSED_DIRS\" -classpath \"$CLASSPATH\" \ + -Dcatalina.base=\"$TOMCAT_gen_catalina_base\" \ + -Dcatalina.home=\"$CATALINA_HOME\" \ + -Djava.io.tmpdir=\"$CATALINA_TMPDIR\" \ + org.apache.catalina.startup.Bootstrap \"$@\" start " \ + >> "$TOMCAT_gen_catalina_base"/logs/catalina.out 2>&1 & + + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + if [ -z "$!" ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + echo $! > "$TOMCAT_pid_file" + + clog_service_start $CLOG_SUCCEED + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$TOMCAT_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + if [ -e "$TOMCAT_pid_file" ]; then + rm -f "$TOMCAT_pid_file" + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$TOMCAT_pid_file" + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$TOMCAT_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all && stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/tomcat-6.metadata b/rgmanager/src/resources/tomcat-6.metadata new file mode 100644 index 0000000..493e0d5 --- /dev/null +++ b/rgmanager/src/resources/tomcat-6.metadata @@ -0,0 +1,74 @@ +<?xml version="1.0"?> +<resource-agent version="rgmanager 2.0" name="tomcat-6"> + <version>1.0</version> + + <longdesc lang="en"> + This defines an instance of Tomcat server + </longdesc> + <shortdesc lang="en"> + Defines a Tomcat server + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + Specifies a service name for logging and other purposes + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="config_file"> + <longdesc lang="en"> + Define absolute path to configuration file + </longdesc> + <shortdesc lang="en"> + Config File + </shortdesc> + <content type="string" default="/etc/tomcat6/tomcat6.conf"/> + </parameter> + + <parameter name="shutdown_wait"> + <longdesc lang="en"> + Wait X seconds for correct end of service shutdown + </longdesc> + <shortdesc lang="en"> + Wait X seconds for correct end of service shutdown + </shortdesc> + <content type="integer" default="30" /> + </parameter> + + <parameter name="service_name" inherit="service%name"> + <longdesc lang="en"> + Inherit the service name. We need to know + the service name in order to determine file + systems and IPs for this service. + </longdesc> + <shortdesc lang="en"> + Inherit the service name. + </shortdesc> + <content type="string"/> + </parameter> + </parameters> + + <actions> + <action name="start" timeout="0"/> + <action name="stop" timeout="0"/> + + <!-- Checks to see if it''s mounted in the right place --> + <action name="status" interval="1m" timeout="10"/> + <action name="monitor" interval="1m" timeout="10"/> + + <!-- Checks to see if we can read from the mountpoint --> + <action name="status" depth="10" timeout="30" interval="5m"/> + <action name="monitor" depth="10" timeout="30" interval="5m"/> + + <action name="meta-data" timeout="0"/> + <action name="validate-all" timeout="0"/> + </actions> + + <special tag="rgmanager"> + </special> +</resource-agent> diff --git a/rgmanager/src/resources/tomcat-6.sh.in b/rgmanager/src/resources/tomcat-6.sh.in new file mode 100644 index 0000000..749a86d --- /dev/null +++ b/rgmanager/src/resources/tomcat-6.sh.in @@ -0,0 +1,250 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +export LC_ALL=C +export LANG=C +export PATH=/bin:/sbin:/usr/bin:/usr/sbin + +. $(dirname $0)/ocf-shellfuncs +. $(dirname $0)/utils/config-utils.sh +. $(dirname $0)/utils/messages.sh +. $(dirname $0)/utils/ra-skelet.sh + +declare TOMCAT_pid_file="`generate_name_for_pid_file`" +declare TOMCAT_conf_dir="`generate_name_for_conf_dir`/conf" +declare TOMCAT_gen_config_file="$TOMCAT_conf_dir/server.xml" +declare TOMCAT_gen_catalina_base="`generate_name_for_conf_dir`" + +declare CATALINA_HOME +declare CATALINA_BASE +declare CATALINA_TMPDIR +declare CLASSPATH +declare TOMCAT_USER +## + +verify_all() +{ + clog_service_verify $CLOG_INIT + + if [ -z "$OCF_RESKEY_name" ]; then + clog_service_verify $CLOG_FAILED "Invalid Name Of Service" + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_service_name" ]; then + clog_service_verify $CLOG_FAILED_NOT_CHILD + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_config_file" + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + if [ ! -r "$OCF_RESKEY_config_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_READABLE $OCF_RESKEY_config_file + clog_service_verify $CLOG_FAILED + return $OCF_ERR_ARGS + fi + + . "$OCF_RESKEY_config_file" + + if [ $? -ne 0 ]; then + clog_service_verify $CLOG_FAILED "Error In The File \"$OCF_RESKEY_config_file\"" + return $OCF_ERR_ARGS + fi + + if [ -z "$CATALINA_HOME" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_HOME Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$CATALINA_HOME" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_HOME Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$CATALINA_TMPDIR" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Not Specified In ${OCF_RESKEY_config_file}" + return $OCF_ERR_ARGS; + fi + + if [ ! -d "$CATALINA_TMPDIR" ]; then + clog_service_verify $CLOG_FAILED "CATALINA_TMPDIR Does Not Exist" + return $OCF_ERR_ARGS; + fi + + if [ -z "$TOMCAT_USER" ]; then + clog_service_verify $CLOG_FAILED "TOMCAT_USER Does Not Exist" + return $OCF_ERR_ARGS; + fi + + clog_service_verify $CLOG_SUCCEED + + return 0 +} + +generate_config_file() +{ + declare original_file="$1" + declare generated_file="$2" + declare ip_addresses="$3" + + if [ -f "$generated_file" ]; then + sha1_verify "$generated_file" + if [ $? -ne 0 ]; then + clog_check_sha1 $CLOG_FAILED + return 0 + fi + fi + + clog_generate_config $CLOG_INIT "$original_file" "$generated_file" + + $(dirname $0)/utils/tomcat-parse-config.pl $ip_addresses < "$original_file" > "$generated_file" + + sha1_addToFileXML "$generated_file" + clog_generate_config $CLOG_SUCCEED "$original_file" "$generated_file" + + return 0; +} + +start() +{ + clog_service_start $CLOG_INIT + + create_conf_directory "$TOMCAT_conf_dir" + check_pid_file "$TOMCAT_pid_file" + + if [ $? -ne 0 ]; then + clog_check_pid $CLOG_FAILED "$TOMCAT_pid_file" + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + clog_looking_for $CLOG_INIT "IP Addresses" + + get_service_ip_keys "$OCF_RESKEY_service_name" + ip_addresses=`build_ip_list` + + if [ -z "$ip_addresses" ]; then + clog_looking_for $CLOG_FAILED_NOT_FOUND "IP Addresses" + return $OCF_ERR_GENERIC + fi + + clog_looking_for $CLOG_SUCCEED "IP Addresses" + + . "$OCF_RESKEY_config_file" + + create_pid_directory "$TOMCAT_USER" + + generate_config_file "$CATALINA_BASE/conf/server.xml" "$TOMCAT_gen_config_file" "$ip_addresses" + rm -f "$TOMCAT_gen_catalina_base/conf/tomcat6.conf" + ( cat $OCF_RESKEY_config_file | grep -v 'CATALINA_PID=' | grep -v 'CATALINA_BASE='; echo CATALINA_BASE="$TOMCAT_gen_catalina_base"; echo CATALINA_PID="$TOMCAT_pid_file") > "$TOMCAT_gen_catalina_base/conf/tomcat6.conf" + ln -s "$CATALINA_BASE"/* "$TOMCAT_gen_catalina_base" &> /dev/null + ln -s "$CATALINA_BASE"/conf/* "$TOMCAT_gen_catalina_base"/conf &> /dev/null + + export TOMCAT_CFG="$TOMCAT_gen_catalina_base/conf/tomcat6.conf" + + tomcat6_options="$tomcat6_options $( + awk '!/^#/ && !/^$/ { ORS=" "; print "export ", $0, ";" }' \ + $TOMCAT_CFG + )" + + eval "$tomcat6_options" + + /bin/su -s /bin/sh $TOMCAT_USER -c "/usr/sbin/tomcat6 start" + + if [ $? -ne 0 ]; then + clog_service_start $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + clog_service_start $CLOG_SUCCEED + + return 0; +} + +stop() +{ + clog_service_stop $CLOG_INIT + + stop_generic "$TOMCAT_pid_file" "$OCF_RESKEY_shutdown_wait" + + if [ $? -ne 0 ]; then + clog_service_stop $CLOG_FAILED + return $OCF_ERR_GENERIC + fi + + if [ -e "$TOMCAT_pid_file" ]; then + rm -f "$TOMCAT_pid_file" + fi + + clog_service_stop $CLOG_SUCCEED + return 0; +} + +status() +{ + clog_service_status $CLOG_INIT + + status_check_pid "$TOMCAT_pid_file" + if [ $? -ne 0 ]; then + clog_service_status $CLOG_FAILED "$TOMCAT_pid_file" + return $OCF_ERR_GENERIC + fi + + clog_service_status $CLOG_SUCCEED + return 0 +} + +case $1 in + meta-data) + cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` + exit 0 + ;; + validate-all) + verify_all + exit $? + ;; + start) + verify_all && start + exit $? + ;; + stop) + verify_all + stop + exit $? + ;; + status|monitor) + verify_all + status + exit $? + ;; + restart) + verify_all + stop + start + exit $? + ;; + *) + echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/rgmanager/src/resources/utils/Makefile.am b/rgmanager/src/resources/utils/Makefile.am new file mode 100644 index 0000000..48b0b6a --- /dev/null +++ b/rgmanager/src/resources/utils/Makefile.am @@ -0,0 +1,37 @@ +# +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +MAINTAINERCLEANFILES = Makefile.in + +commonscripts = fs-lib.sh \ + httpd-parse-config.pl \ + member_util.sh \ + messages.sh \ + named-parse-config.pl \ + ra-skelet.sh \ + tomcat-parse-config.pl \ + config-utils.sh + +EXTRA_DIST = $(commonscripts) \ + rhev-check.sh + +sbin_SCRIPTS = rhev-check.sh + +rasutilsdir = ${CLUSTERDATA}/utils + +rasutils_SCRIPTS = $(commonscripts) diff --git a/rgmanager/src/resources/utils/config-utils.sh.in b/rgmanager/src/resources/utils/config-utils.sh.in new file mode 100644 index 0000000..b59bcc7 --- /dev/null +++ b/rgmanager/src/resources/utils/config-utils.sh.in @@ -0,0 +1,309 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +declare RA_COMMON_pid_dir=/var/run/cluster +declare RA_COMMON_conf_dir=/etc/cluster + +declare -i FAIL=255 +declare -a ip_keys + +generate_configTemplate() +{ + cat > "$1" << EOT +# +# "$1" was created from the "$2" +# +# This template configuration was automatically generated, and will be +# automatically regenerated if removed. Once this file has been altered, +# automatic re-generation will stop. Remember to copy this file to all +# other cluster members after making changes, or your service will not +# operate correctly. +# +EOT +} + +generate_configTemplateXML() +{ + cat > "$1" << EOT +<!-- + "$1" was created from the "$2" + + This template configuration was automatically generated, and will be + automatically regenerated if removed. Once this file has been altered, + automatic re-generation will stop. Remember to copy this file to all + other cluster members after making changes, or your service will not + operate correctly. +--> +EOT +} + +sha1_addToFile() +{ + declare sha1line="# rgmanager-sha1 $(sha1sum "$1")" + echo $sha1line >> "$1" +} + +sha1_addToFileXML() +{ + declare sha1line="<!--# rgmanager-sha1 $(sha1sum "$1")-->" + echo $sha1line >> "$1" +} + +sha1_verify() +{ + declare sha1_new sha1_old + declare oldFile=$1 + + ocf_log debug "Checking: SHA1 checksum of config file $oldFile" + + sha1_new=`cat "$oldFile" | grep -v "# rgmanager-sha1" | sha1sum | sed 's/^\([a-z0-9]\+\) .*$/\1/'` + sha1_old=`tail -n 1 "$oldFile" | sed 's/^\(<!--\)\?# rgmanager-sha1 \(.*\)$/\2/' | sed 's/^\([a-z0-9]\+\) .*$/\1/'` + + if [ "$sha1_new" = "$sha1_old" ]; then + ocf_log debug "Checking: SHA1 checksum > succeed" + return 0; + else + ocf_log debug "Checking: SHA1 checksum > failed - file changed" + return 1; + fi +} + +# +# Usage: ccs_get key +# +ccs_get() +{ + declare outp + declare key + + [ -n "$1" ] || return $FAIL + + key="$*" + + outp=$(ccs_tool query "$key" 2>&1) + if [ $? -ne 0 ]; then + if [[ "$outp" =~ "Query failed: Invalid argument" ]]; then + # This usually means that element does not exist + # e.g. when checking for IP address + return 0; + fi + + if [ "$outp" = "${outp/No data available/}" ] || [ "$outp" = "${outp/Operation not permitted/}" ]; then + ocf_log err "$outp ($key)" + return $FAIL + fi + + # no real error, just no data available + return 0 + fi + + echo $outp + + return 0 +} + +# +# Build a list of service IP keys; traverse refs if necessary +# Usage: get_service_ip_keys desc serviceName +# +get_service_ip_keys() +{ + declare svc=$1 + declare -i x y=0 + declare outp + declare key + + # + # Find service-local IP keys + # + x=1 + while : ; do + key="/cluster/rm/service[@name=\"$svc\"]/ip[$x]" + + # + # Try direct method + # + outp=$(ccs_get "$key/@address") + if [ $? -ne 0 ]; then + return 1 + fi + + # + # Try by reference + # + if [ -z "$outp" ]; then + outp=$(ccs_get "$key/@ref") + if [ $? -ne 0 ]; then + return 1 + fi + key="/cluster/rm/resources/ip[@address=\"$outp\"]" + fi + + if [ -z "$outp" ]; then + break + fi + + #ocf_log debug "IP $outp found @ $key" + + ip_keys[$y]="$key" + + ((y++)) + ((x++)) + done + + ocf_log debug "$y IP addresses found for $svc/$OCF_RESKEY_name" + + return 0 +} + +build_ip_list() +{ + declare ipaddrs ipaddr + declare -i x=0 + + while [ -n "${ip_keys[$x]}" ]; do + ipaddr=$(ccs_get "${ip_keys[$x]}/@address") + if [ -z "$ipaddr" ]; then + break + fi + + # remove netmask + iponly=`echo $ipaddr | sed 's/\/.*//'` + ipaddrs="$ipaddrs $iponly" + ((x++)) + done + + echo $ipaddrs +} + +generate_name_for_pid_file() +{ + declare filename=$(basename $0) + + echo "$RA_COMMON_pid_dir/$(basename $0 | sed 's/^\(.*\)\..*/\1/')/$OCF_RESOURCE_INSTANCE.pid" + + return 0; +} + +generate_name_for_pid_dir() +{ + declare filename=$(basename $0) + + echo "$RA_COMMON_pid_dir/$(basename $0 | sed 's/^\(.*\)\..*/\1/')/$OCF_RESOURCE_INSTANCE" + + return 0; +} + +generate_name_for_conf_dir() +{ + declare filename=$(basename $0) + + echo "$RA_COMMON_conf_dir/$(basename $0 | sed 's/^\(.*\)\..*/\1/')/$OCF_RESOURCE_INSTANCE" + + return 0; +} + +set_pid_directory_permissions() +{ + declare program_name="$1" + declare dirname="$2" + declare username="$3" + + if [ "$program_name" = "mysql" ]; then + if [ -n "$username" ]; then + chown "${username}.root" "$dirname" + else + chown mysql.root "$dirname" + fi + elif [ "$program_name" = "tomcat-5" -o "$program_name" = "tomcat-6" ]; then + if [ -n "$username" ]; then + chown "${username}.root" "$dirname" + else + chown tomcat.root "$dirname" + fi + elif [ "$program_name" = "named" ]; then + if [ -n "$username" ]; then + chown "${username}.root" "$dirname" + fi + fi +} + +# +# Usage: create_pid_directory [username] +# +create_pid_directory() +{ + declare program_name="$(basename $0 | sed 's/^\(.*\)\..*/\1/')" + declare dirname="$RA_COMMON_pid_dir/$program_name" + declare username="$1" + + if [ -d "$dirname" ]; then + # make sure the permissions are correct even if directory exists + set_pid_directory_permissions "$program_name" "$dirname" "$username" + return 0; + fi + + chmod 711 "$RA_COMMON_pid_dir" + mkdir -p "$dirname" + + set_pid_directory_permissions "$program_name" "$dirname" "$username" + return 0; +} + +create_conf_directory() +{ + declare dirname="$1" + + if [ -d "$dirname" ]; then + return 0; + fi + + mkdir -p "$dirname" + + return 0; +} + +check_pid_file() { + declare pid_file="$1" + + if [ -z "$pid_file" ]; then + return 1; + fi + + if [ ! -e "$pid_file" ]; then + return 0; + fi + + ## if PID file is empty then it should be safe to remove it + read pid < "$pid_file" + if [ -z "$pid" ]; then + rm $pid_file + ocf_log debug "PID File \"$pid_file\" Was Removed - Zero length"; + return 0; + fi + + if [ ! -d /proc/`cat "$pid_file"` ]; then + rm "$pid_file" + ocf_log debug "PID File \"$pid_file\" Was Removed - PID Does Not Exist"; + return 0; + fi + + return 1; +} diff --git a/rgmanager/src/resources/utils/fs-lib.sh.in b/rgmanager/src/resources/utils/fs-lib.sh.in new file mode 100644 index 0000000..8b0ef5e --- /dev/null +++ b/rgmanager/src/resources/utils/fs-lib.sh.in @@ -0,0 +1,1230 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# +# File system common functions +# + +LC_ALL=C +LANG=C +PATH=/bin:/sbin:/usr/bin:/usr/sbin +export LC_ALL LANG PATH + +# Define this value to 0 by default, bind-mount.sh or any other agent +# that uses this value will alter it after sourcing fs-lib.sh +export IS_BIND_MOUNT=0 + +# Private return codes +FAIL=2 +NO=1 +YES=0 +YES_STR="yes" + +[ -z "$OCF_RESOURCE_INSTANCE" ] && export OCF_RESOURCE_INSTANCE="filesystem:$OCF_RESKEY_name" + +# +# Using a global to contain the return value saves +# clone() operations. This is important to reduce +# resource consumption during status checks. +# +# There is no way to return a string from a function +# in bash without cloning the process, which is exactly +# what we are trying to avoid. So, we have to resort +# to using a dedicated global variables. +declare REAL_DEVICE +declare STRIP_SLASHES="" +declare FINDMNT_OUTPUT="" + +# +# Stub ocf_log function for when we are using +# quick_status, since ocf_log generally forks (and +# sourcing ocf-shellfuncs forks -a lot-). +# +ocf_log() +{ + echo $* +} + +# +# Assume NFS_TRICKS are not available until we are +# proved otherwise. +# +export NFS_TRICKS=1 + +# +# Quick status doesn't fork() or clone() when using +# device files directly. (i.e. not symlinks, LABEL= or +# UUID= +# +if [ "$1" = "status" -o "$1" = "monitor" ] && + [ "$OCF_RESKEY_quick_status" = "1" ]; then + echo Using Quick Status + + # XXX maybe we can make ocf-shellfuncs have a 'quick' mode too? + export OCF_SUCCESS=0 + export OCF_ERR_GENERIC=1 +else + # + # Grab nfs lock tricks if available + # + if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 + fi + + . $(dirname $0)/ocf-shellfuncs +fi + + +verify_name() +{ + if [ -z "$OCF_RESKEY_name" ]; then + ocf_log err "No file system name specified." + return $OCF_ERR_ARGS + fi + return $OCF_SUCCESS +} + + +verify_mountpoint() +{ + if [ -z "$OCF_RESKEY_mountpoint" ]; then + ocf_log err "No mount point specified." + return $OCF_ERR_ARGS + fi + + if ! [ -e "$OCF_RESKEY_mountpoint" ]; then + ocf_log info "Mount point $OCF_RESKEY_mountpoint will be "\ + "created at mount time." + return $OCF_SUCCESS + fi + + [ -d "$OCF_RESKEY_mountpoint" ] && return $OCF_SUCCESS + + ocf_log err "$OCF_RESKEY_mountpoint exists but is not a directory." + + return $OCF_ERR_ARGS +} + + +# +# This used to be called using $(...), but doing this causes bash +# to set up a pipe and clone(). So, the output of this function is +# stored in the global variable REAL_DEVICE, declared previously. +# +real_device() +{ + declare dev="$1" + declare realdev + + if [ $IS_BIND_MOUNT -eq 1 ]; then + REAL_DEVICE="$dev" + return $OCF_SUCCESS + fi + REAL_DEVICE="" + + [ -z "$dev" ] && return $OCF_ERR_ARGS + + # Oops, we have a link. Sorry, this is going to fork. + if [ -h "$dev" ]; then + realdev=$(readlink -f $dev) + if [ $? -ne 0 ]; then + return $OCF_ERR_ARGS + fi + REAL_DEVICE="$realdev" + return $OCF_SUCCESS + fi + + # If our provided blockdev is a device, we are done + if [ -b "$dev" ]; then + REAL_DEVICE="$dev" + return $OCF_SUCCESS + fi + + # It's not a link, it's not a block device. If it also + # does not match UUID= or LABEL=, then findfs is not + # going to find anything useful, so we should quit now. + if [ "${dev/UUID=/}" = "$dev" ] && + [ "${dev/LABEL=/}" = "$dev" ]; then + return $OCF_ERR_GENERIC + fi + + # When using LABEL= or UUID=, we can't save a fork. + realdev=$(findfs "$dev" 2> /dev/null) + if [ -n "$realdev" ] && [ -b "$realdev" ]; then + REAL_DEVICE="$realdev" + return $OCF_SUCCESS + fi + + return $OCF_ERR_GENERIC +} + + +verify_device() +{ + declare realdev + + if [ -z "$OCF_RESKEY_device" ]; then + ocf_log err "No device or label specified." + return $OCF_ERR_ARGS + fi + + real_device "$OCF_RESKEY_device" + realdev="$REAL_DEVICE" + if [ -n "$realdev" ]; then + if [ "$realdev" != "$OCF_RESKEY_device" ]; then + ocf_log info "Specified $OCF_RESKEY_device maps to $realdev" + fi + return $OCF_SUCCESS + fi + + ocf_log err "Device or label \"$OCF_RESKEY_device\" not valid" + + return $OCF_ERR_ARGS +} + +list_mounts() +{ + if [ $IS_BIND_MOUNT -eq 1 ]; then + cat /etc/mtab + else + cat /proc/mounts + fi +} + +## +# Tries to use findmnt util to return list +# of mountpoints for a device +# +# Global variables are used to reduce forking when capturing stdout. +# +# Return values +# 0 - device mount points found, mountpoint list returned to FINDMNT_OUTPUT global variable +# 1 - device mount not found +# 2 - findmnt tool isn't found or can not be used +# +## +try_findmnt() +{ + FINDMNT_OUTPUT="" + + case $OCF_RESKEY_use_findmnt in + 0|false|no|off) + return 2 ;; + *) + : ;; + esac + + which findmnt > /dev/null 2>&1 + if [ $? -eq 0 ]; then + FINDMNT_OUTPUT="$(findmnt -o TARGET --noheadings $1)" + if [ $? -ne 0 ]; then + # workaround mount helpers inconsistency that still + # add / on the device entry in /proc/mounts + FINDMNT_OUTPUT="$(findmnt -o TARGET --noheadings $1/)" + if [ $? -ne 0 ]; then + return 1 + else + return 0 + fi + else + return 0 + fi + fi + + return 2 +} + +## +# Returns result in global variable to reduce forking +## +strip_trailing_slashes() +{ + local tmp=$1 + while [ "${tmp#${tmp%?}}" = "/" ] + do + tmp="${tmp%/}" + done + + STRIP_SLASHES="$tmp" +} + +# +# kill_procs_using_mount mount_point [signal] +# +# Kill any processes using the specified mountpoint, using the optional +# specified signal. This is used in place of fuser to avoid it becoming +# blocked while following symlinks to an unresponsive file system. +# Defaults to SIGKILL if no signal specified. +# +kill_procs_using_mount () { + declare mp + declare procs + declare mmap_procs + + if [ $# -lt 1 -o -z "$1" ]; then + ocf_log err "Usage: kill_procs_using_mount mount_point [signal]" + return $FAIL + fi + + strip_trailing_slashes "$1" + mp="$STRIP_SLASHES" + + if [ -z "$mp" ]; then + ocf_log err "Usage: kill_procs_using_mount mount_point [signal]" + return $FAIL + fi + + # anything held open in mount point after the slash + procs=$(find /proc/[0-9]*/ -type l -lname "${mp}/*" -or -lname "${mp}" 2>/dev/null | awk -F/ '{print $3}' | uniq) + + # anything with memory mapping to something in the mountpoint + mmap_procs=$(grep " ${mp}" /proc/[0-9]*/maps | awk -F/ '{print $3}' | uniq) + procs=$(echo -e "${procs}\n${mmap_procs}" | sort | uniq) + + for pid in $procs; do + if [ -n "$2" ]; then + kill -s $2 $pid + else + kill -s KILL $pid + fi + done + + return $SUCCESS +} + +# +# mount_in_use device mount_point +# +# Check to see if either the device or mount point are in use anywhere on +# the system. It is not required that the device be mounted on the named +# moint point, just if either are in use. +# +mount_in_use () { + declare mp tmp_mp + declare tmp_type + declare dev tmp_dev + declare junkb junkc junkd + declare res=$FAIL + declare findmnt_res=2 + + if [ $# -ne 2 ]; then + ocf_log err "Usage: mount_in_use device mount_point". + return $FAIL + fi + + dev="$1" + mp="$2" + + # First try and find out if the device has a mount point by + # attempting to use the findmnt tool. It is much faster than + # iterating through /proc/mounts + try_findmnt $dev + findmnt_res=$? + if [ $findmnt_res -eq 0 ]; then + case $OCF_RESKEY_fstype in + cifs|nfs|nfs4) + # -r means to include '/' character and not treat it as escape character + while read -r tmp_mp + do + if [ "$tmp_mp" = "$mp" ]; then + return $YES + fi + done < <(echo "$FINDMNT_OUTPUT") + ;; + *) + return $YES + ;; + esac + fi + + while read -r tmp_dev tmp_mp tmp_type junkb junkc junkd; do + + if [ "$tmp_type" = "autofs" ]; then + continue + fi + + # Does the device match? We might have already tried findmnt + # which is why this could get skipped + if [ $findmnt_res -eq 2 ]; then + if [ "${tmp_dev:0:1}" != "-" ]; then + # XXX fork/clone warning XXX + tmp_dev="$(printf "$tmp_dev")" + fi + + strip_trailing_slashes "$tmp_dev" + tmp_dev="$STRIP_SLASHES" + if [ -n "$tmp_dev" -a "$tmp_dev" = "$dev" ]; then + case $OCF_RESKEY_fstype in + cifs|nfs|nfs4) + ;; + *) + return $YES + ;; + esac + fi + fi + + # Mountpoint from /proc/mounts containing spaces will + # have spaces represented in octal. printf takes care + # of this for us. + tmp_mp="$(printf "$tmp_mp")" + + if [ -n "$tmp_mp" -a "$tmp_mp" = "$mp" ]; then + return $YES + fi + done < <(list_mounts) + + return $NO +} + +## +# Returns whether or not the device is mounted. +# If the mountpoint does not match the one provided, the +# mount point found is printed to stdout. +## +real_mountpoint() +{ + declare dev=$1 + declare mp=$2 + declare ret=$NO + declare tmp_mp + declare tmp_dev + declare tmp_type + declare found=1 + declare poss_mp="" + + try_findmnt $dev + case $? in + 0) #findmnt found mount points, loop through them to find a match + + # -r means to include '/' character and not treat it as escape character + while read -r tmp_mp + do + ret=$YES + if [ "$tmp_mp" != "$mp" ]; then + poss_mp=$tmp_mp + else + found=0 + break + fi + done < <(echo "$FINDMNT_OUTPUT") + ;; + 1) + # findmnt found no mount points for the device + return $NO + ;; + 2) # findmnt tool could not be used. + # Using slow method reading /proc/mounts dir. + while read -r tmp_dev tmp_mp tmp_type junk_b junk_c junk_d + do + if [ "$tmp_type" = "autofs" ]; then + continue + fi + + if [ "${tmp_dev:0:1}" != "-" ]; then + # XXX fork/clone warning XXX + tmp_dev="$(printf "$tmp_dev")" + fi + + # CIFS mounts can sometimes have trailing slashes + # in their first field in /proc/mounts, so strip them. + strip_trailing_slashes "$tmp_dev" + tmp_dev="$STRIP_SLASHES" + real_device "$tmp_dev" + tmp_dev="$REAL_DEVICE" + + # XXX fork/clone warning XXX + # Mountpoint from /proc/mounts containing spaces will + # have spaces represented in octal. printf takes care + # of this for us. + tmp_mp="$(printf "$tmp_mp")" + + if [ -n "$tmp_dev" -a "$tmp_dev" = "$dev" ]; then + ret=$YES + # + # Check to see if its mounted in the right + # place + # + if [ -n "$tmp_mp" ]; then + if [ "$tmp_mp" != "$mp" ]; then + poss_mp=$tmp_mp + else + found=0 + break + fi + fi + fi + done < <(list_mounts) + esac + + if [ $found -ne 0 ]; then + echo "$poss_mp" + fi + return $ret +} + +# +# is_mounted device mount_point +# +# Check to see if the device is mounted. Print a warning if its not +# mounted on the directory we expect it to be mounted on. +# +is_mounted () { + + declare mp + declare dev + declare ret=$FAIL + declare poss_mp + + if [ $# -ne 2 ]; then + ocf_log err "Usage: is_mounted device mount_point" + return $FAIL + fi + + real_device "$1" + dev="$REAL_DEVICE" + if [ -z "$dev" ]; then + ocf_log err "$OCF_RESOURCE_INSTANCE: is_mounted: Could not match $1 with a real device" + return $OCF_ERR_ARGS + fi + + if [ -h "$2" ]; then + mp="$(readlink -f $2)" + else + mp="$2" + fi + + # This bash glyph simply removes a trailing slash + # if one exists. /a/b/ -> /a/b; /a/b -> /a/b. + mp="${mp%/}" + + poss_mp=$(real_mountpoint "$dev" "$mp") + ret=$? + + if [ $ret -eq $YES ] && [ -n "$poss_mp" ]; then + # if we made it here, then the device is mounted, but not where + # we expected it to be + case $OCF_RESKEY_fstype in + cifs|nfs|nfs4) + ret=$NO + ;; + *) + ocf_log warn "Device $dev is mounted on $poss_mp instead of $mp" + ;; + esac + fi + + + return $ret +} + + +# +# is_alive mount_point +# +# Check to see if mount_point is alive (testing read/write) +# +is_alive() +{ + declare errcode + declare mount_point="$1" + declare file + declare rw + + if [ $# -ne 1 ]; then + ocf_log err "Usage: is_alive mount_point" + return $FAIL + fi + + [ -z "$OCF_CHECK_LEVEL" ] && export OCF_CHECK_LEVEL=0 + + test -d "$mount_point" + if [ $? -ne 0 ]; then + ocf_log err "${OCF_RESOURCE_INSTANCE}: is_alive: $mount_point is not a directory" + return $FAIL + fi + + [ $OCF_CHECK_LEVEL -lt 10 ] && return $YES + + # depth 10 test (read test) + ls "$mount_point" > /dev/null 2> /dev/null + errcode=$? + if [ $errcode -ne 0 ]; then + ocf_log err "${OCF_RESOURCE_INSTANCE}: is_alive: failed read test on [$mount_point]. Return code: $errcode" + return $NO + fi + + [ $OCF_CHECK_LEVEL -lt 20 ] && return $YES + + # depth 20 check (write test) + rw=$YES + for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do + if [ "$o" = "ro" ]; then + rw=$NO + fi + done + if [ $rw -eq $YES ]; then + file=$(mktemp "$mount_point/.check_writable.$(hostname).XXXXXX") + if [ ! -e "$file" ]; then + ocf_log err "${OCF_RESOURCE_INSTANCE}: is_alive: failed write test on [$mount_point]. Return code: $errcode" + return $NO + fi + rm -f $file > /dev/null 2> /dev/null + fi + + return $YES +} + + +# +# Decide which quota options are enabled and return a string +# which we can pass to quotaon +# +quota_opts() +{ + declare quotaopts="" + declare opts="$1" + declare mopt + + for mopt in `echo $opts | sed -e s/,/\ /g`; do + case $mopt in + quota) + quotaopts="gu" + break + ;; + usrquota) + quotaopts="u$quotaopts" + continue + ;; + grpquota) + quotaopts="g$quotaopts" + continue + ;; + noquota) + quotaopts="" + return 0 + ;; + esac + done + + echo $quotaopts + return 0 +} + + + +# +# Enable quotas on the mount point if the user requested them +# +enable_fs_quotas() +{ + declare -i need_check=0 + declare -i rv + declare quotaopts="" + declare mopt + declare opts="$1" + declare mp="$2" + + if ! type quotaon &> /dev/null; then + ocf_log err "quotaon not found in $PATH" + return $OCF_ERR_GENERIC + fi + + quotaopts=$(quota_opts $opts) + [ -z "$quotaopts" ] && return 0 + + ocf_log debug "quotaopts = $quotaopts" + + # Ok, create quota files if they don't exist + for f in quota.user aquota.user quota.group aquota.group; do + if ! [ -f "$mp/$f" ]; then + ocf_log info "$mp/$f was missing - creating" + touch "$mp/$f" + chmod 600 "$mp/$f" + need_check=1 + fi + done + + if [ $need_check -eq 1 ]; then + ocf_log info "Checking quota info in $mp" + quotacheck -$quotaopts "$mp" + fi + + ocf_log info "Enabling Quotas on $mp" + ocf_log debug "quotaon -$quotaopts \"$mp\"" + quotaon -$quotaopts "$mp" + rv=$? + if [ $rv -ne 0 ]; then + # Just a warning + ocf_log warn "Unable to turn on quotas for $mp; return = $rv" + fi + + return $rv +} + + +# Agent-specific actions to take before mounting +# (if required). Typically things like fsck. +do_pre_mount() { + return 0 +} + +# Default mount handler - for block devices +# +do_mount() { + declare dev="$1" + declare mp="$2" + declare mount_options="" + declare fstype_option="" + declare fstype + + # + # Get the filesystem type, if specified. + # + fstype_option="" + fstype=${OCF_RESKEY_fstype} + case "$fstype" in + ""|"[ ]*") + fstype="" + ;; + *) # found it + fstype_option="-t $fstype" + ;; + esac + + # + # Get the mount options, if they exist. + # + mount_options="" + opts=${OCF_RESKEY_options} + case "$opts" in + ""|"[ ]*") + opts="" + ;; + *) # found it + mount_options="-o $opts" + ;; + esac + + # + # Mount the device + # + ocf_log info "mounting $dev on $mp" + ocf_log err "mount $fstype_option $mount_options $dev $mp" + mount $fstype_option $mount_options "$dev" "$mp" + ret_val=$? + if [ $ret_val -ne 0 ]; then + ocf_log err "\ +'mount $fstype_option $mount_options $dev $mp' failed, error=$ret_val" + return 1 + fi + + return 0 +} + + +# Agent-specific actions to take after mounting +# (if required). +do_post_mount() { + return 0 +} + + +# Agent-specific actions to take before unmounting +# (if required) +do_pre_unmount() { + return 0 +} + + +# Agent-specific actions to take after umount succeeds +# (if required) +do_post_unmount() { + return 0 +} + + +# Agent-specific force unmount logic, if required +# return = 0 if successful, or nonzero if unsuccessful +# (unsuccessful = try harder) +do_force_unmount() { + return 1 +} + + +# +# start_filesystem +# +start_filesystem() { + declare -i ret_val=$OCF_SUCCESS + declare mp="${OCF_RESKEY_mountpoint}" + declare dev="" # device + declare fstype="" + declare opts="" + declare mount_options="" + + # + # Check if fstype is supported + # + verify_fstype + case $? in + $OCF_ERR_ARGS) + ocf_log err "File system type $OCF_RESKEY_fstype not supported" + return $OCF_ERR_ARGS + ;; + *) + ;; + esac + + # + # Check if mount point was specified. If not, no need to continue. + # + case "$mp" in + ""|"[ ]*") # nothing to mount + return $OCF_SUCCESS + ;; + /*) # found it + ;; + *) # invalid format + ocf_log err \ +"start_filesystem: Invalid mount point format (must begin with a '/'): \'$mp\'" + return $OCF_ERR_ARGS + ;; + esac + + # + # Get the device + # + real_device "$OCF_RESKEY_device" + dev="$REAL_DEVICE" + if [ -z "$dev" ]; then + ocf_log err "\ +start_filesystem: Could not match $OCF_RESKEY_device with a real device" + return $OCF_ERR_ARGS + fi + + # + # Ensure we've got a valid directory + # + if [ -e "$mp" ]; then + if ! [ -d "$mp" ]; then + ocf_log err"\ +start_filesystem: Mount point $mp exists but is not a directory" + return $OCF_ERR_ARGS + fi + else + ocf_log err "\ +start_filesystem: Creating mount point $mp for device $dev" + mkdir -p "$mp" + ret_val=$? + if [ $ret_val -ne 0 ]; then + ocf_log err "\ +start_filesystem: Unable to create $mp. Error code: $ret_val" + return $OCF_ERR_GENERIC + fi + fi + + # + # See if the device is already mounted. + # + is_mounted "$dev" "$mp" + case $? in + $YES) # already mounted + ocf_log debug "$dev already mounted" + return $OCF_SUCCESS + ;; + $NO) # not mounted, continue + ;; + *) + return $FAIL + ;; + esac + + + # + # Make sure that neither the device nor the mount point are mounted + # (i.e. they may be mounted in a different location). The'mount_in_use' + # function checks to see if either the device or mount point are in + # use somewhere else on the system. + # + mount_in_use "$dev" "$mp" + case $? in + $YES) # uh oh, someone is using the device or mount point + ocf_log err "\ +Cannot mount $dev on $mp, the device or mount point is already in use!" + return $FAIL + ;; + $NO) # good, no one else is using it + ;; + $FAIL) + return $FAIL + ;; + *) + ocf_log err "Unknown return from mount_in_use" + return $FAIL + ;; + esac + + do_pre_mount + case $? in + 0) + ;; + 1) + return $OCF_ERR_GENERIC + ;; + 2) + return $OCF_SUCCESS + ;; + esac + + do_mount "$dev" "$mp" + case $? in + 0) + ;; + 1) + return $OCF_ERR_GENERIC + ;; + 2) + return $OCF_SUCCESS + ;; + esac + + do_post_mount + case $? in + 0) + ;; + 1) + return $OCF_ERR_GENERIC + ;; + 2) + return $OCF_SUCCESS + ;; + esac + + enable_fs_quotas "$opts" "$mp" + + return $OCF_SUCCESS +} + + +# +# stop_filesystem - unmount a file system; calls out to +# +stop_filesystem() { + declare -i ret_val=0 + declare -i try + declare -i sleep_time=5 # time between each umount failure + declare umount_failed="" + declare force_umount="" + declare self_fence="" + declare quotaopts="" + + # + # Get the mount point, if it exists. If not, no need to continue. + # + mp=${OCF_RESKEY_mountpoint} + case "$mp" in + ""|"[ ]*") # nothing to mount + return $OCF_SUCCESS + ;; + /*) # found it + ;; + *) # invalid format + ocf_log err \ +"stop_filesystem: Invalid mount point format (must begin with a '/'): \'$mp\'" + return $FAIL + ;; + esac + + # + # Get the device + # + real_device "$OCF_RESKEY_device" + dev="$REAL_DEVICE" + if [ -z "$dev" ]; then + ocf_log err "\ +stop: Could not match $OCF_RESKEY_device with a real device" + return $OCF_ERR_INSTALLED + fi + + # + # Get the force unmount setting if there is a mount point. + # + case ${OCF_RESKEY_force_unmount} in + $YES_STR) force_umount=$YES ;; + on) force_umount=$YES ;; + true) force_umount=$YES ;; + 1) force_umount=$YES ;; + *) force_umount="" ;; + esac + + # + # self_fence _MUST_ be initialized before calling do_pre_unmount + # The netfs agent depends on the self_fence variable. + # + case ${OCF_RESKEY_self_fence} in + $YES_STR) self_fence=$YES ;; + on) self_fence=$YES ;; + true) self_fence=$YES ;; + 1) self_fence=$YES ;; + *) self_fence="" ;; + esac + + do_pre_unmount + case $? in + 0) + ;; + 1) + return $OCF_ERR_GENERIC + ;; + 2) + return $OCF_SUCCESS + ;; + esac + + # + # Preparations: sync, turn off quotas + # + sync + + quotaopts=$(quota_opts $OCF_RESKEY_options) + if [ -n "$quotaopts" ]; then + ocf_log debug "Turning off quotas for $mp" + quotaoff -$quotaopts "$mp" &> /dev/null + fi + + # + # Unmount the device. + # + for try in 1 2 3; do + if [ $try -ne 1 ]; then + sleep $sleep_time + fi + + is_mounted "$dev" "$mp" + case $? in + $NO) + ocf_log info "$dev is not mounted" + umount_failed= + break + ;; + $YES) # fallthrough + ;; + *) + return $FAIL + ;; + esac + + case ${OCF_RESKEY_no_unmount} in + yes|YES|true|TRUE|YES|on|ON|1) + ocf_log debug "Skipping umount on stop because of 'no_unmount' option" + return $OCF_SUCCESS + ;; + *) : ;; + esac + + ocf_log info "unmounting $mp" + umount "$mp" + ret_val=$? + # some versions of umount will exit with status 16 iff + # the umount(2) succeeded but /etc/mtab could not be written. + if [ $ret_val -eq 0 -o $ret_val -eq 16 ]; then + umount_failed= + break + fi + + ocf_log debug "umount failed: $ret_val" + umount_failed=yes + + if [ -z "$force_umount" ]; then + continue + fi + + # Force unmount: try #1: send SIGTERM + if [ $try -eq 1 ]; then + # Try fs-specific force unmount, if provided + do_force_unmount + if [ $? -eq 0 ]; then + # if this succeeds, we should be done + continue + fi + + ocf_log warning "Sending SIGTERM to processes on $mp" + kill_procs_using_mount "$mp" "TERM" + continue + else + ocf_log warning "Sending SIGKILL to processes on $mp" + kill_procs_using_mount "$mp" + + if [ $? -eq 0 ]; then + # someone is still accessing the mount, We've already sent + # SIGTERM, now we've sent SIGKILL and are trying umount again. + continue + fi + # mount has failed, and no one is accessing it. There's + # nothing left for us to try. + break + fi + done # for + + do_post_unmount + case $? in + 0) + ;; + 1) + return $OCF_ERR_GENERIC + ;; + 2) + return $OCF_SUCCESS + ;; + esac + + if [ -n "$umount_failed" ]; then + ocf_log err "'umount $mp' failed, error=$ret_val" + + if [ "$self_fence" ]; then + ocf_log alert "umount failed - REBOOTING" + sync + reboot -fn + fi + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + + +do_start() { + declare tries=0 + declare rv + + while [ $tries -lt 3 ]; do + start_filesystem + rv=$? + if [ $rv -eq 0 ]; then + return 0 + fi + + ((tries++)) + sleep 3 + done + return $rv +} + + +do_stop() { + stop_filesystem + return $? +} + + +do_monitor() { + ocf_log debug "Checking fs \"$OCF_RESKEY_name\", Level $OCF_CHECK_LEVEL" + + # + # Get the device + # + real_device "$OCF_RESKEY_device" + dev="$REAL_DEVICE" + if [ -z "$dev" ]; then + ocf_log err "\ +start_filesystem: Could not match $OCF_RESKEY_device with a real device" + return $OCF_NOT_RUNNING + fi + + is_mounted "$dev" "${OCF_RESKEY_mountpoint}" + + if [ $? -ne $YES ]; then + ocf_log err "${OCF_RESOURCE_INSTANCE}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}" + return $OCF_NOT_RUNNING + fi + + if [ "$OCF_RESKEY_quick_status" = "1" ]; then + return 0 + fi + + is_alive "${OCF_RESKEY_mountpoint}" + [ $? -eq $YES ] && return 0 + + ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!" + return $OCF_ERR_GENERIC +} + + +do_restart() { + stop_filesystem + if [ $? -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + start_filesystem + if [ $? -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + return 0 +} + + +# MUST BE OVERRIDDEN +do_metadata() { + return 1 +} + + +do_validate() { + return 1 +} + + +main() { + case $1 in + start) + do_start + exit $? + ;; + stop) + do_stop + exit $? + ;; + status|monitor) + do_monitor + exit $? + ;; + restart) + do_restart + exit $? + ;; + meta-data) + do_metadata + exit $? + ;; + validate-all) + do_validate + ;; + *) + echo "usage: $0 {start|stop|status|monitor|restart|meta-data|validate-all}" + exit $OCF_ERR_UNIMPLEMENTED + ;; + esac + exit 0 +} + diff --git a/rgmanager/src/resources/utils/httpd-parse-config.pl b/rgmanager/src/resources/utils/httpd-parse-config.pl new file mode 100755 index 0000000..5b643f6 --- /dev/null +++ b/rgmanager/src/resources/utils/httpd-parse-config.pl @@ -0,0 +1,83 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +## +## This script removes <IfDefine foo> sections from the +## Apache httpd.conf file. This is quite useful because we +## don't have any direct access to the parsed configuration +## file of the httpd server. +## +## Usage: ./httpd-parse-config.pl -Dfoo1 -Dfoo2 < httpd.conf +## where fooX are defines as passed to the httpd server +## +## Note: All whitespace characters at the beginning and end +## of lines are removed. +## +use strict; + +my @defines = (); +## Default behaviour is to show all lines when we are not +## in the <IfDefine foo> sections. +my @show = (1); + +sub testIfDefine($) { + my $param = $1; + my $positiveTest = 1; + if ($param =~ /^!(.*)$/) { + $param = $1; + $positiveTest = 0; + } + + foreach my $def (@defines) { + if ($def eq $param) { + return $positiveTest; + } + } + + return (1-$positiveTest); +} + +foreach my $arg (@ARGV) { + if ($arg =~ /^-D(.*)$/) { + push(@defines, $1); + } +} + +## Parse config file and remove IfDefine sections +while (my $line = <STDIN>) { + chomp($line); + $line =~ s/^\s*(.*?)\s*$/$1/; + if ($line =~ /<IfDefine (.*)>/) { + if (testIfDefine($1) == 1) { + if ($show[$#show] == 1) { + push (@show, 1); + } else { + push (@show, 0); + } + } else { + push (@show, 0); + } + } elsif ($line =~ /<\/IfDefine>/) { + pop(@show); + } elsif ($show[$#show] == 1) { + print $line, "\n"; + } +} + diff --git a/rgmanager/src/resources/utils/member_util.sh.in b/rgmanager/src/resources/utils/member_util.sh.in new file mode 100644 index 0000000..0dab7b7 --- /dev/null +++ b/rgmanager/src/resources/utils/member_util.sh.in @@ -0,0 +1,116 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +# +# Use corosync-quorumtool to figure out if the specified node is a member +# of the cluster. Returns 1 if not a member, and +# 0 if the node is happily running. +# +# Tested on RHEL6 and F17 Note that the old version of this function utilized +# clustat, which had introspection in to the configuration. +# If a node was not found, the old version would return '2', but the only +# consumer of this function never cared about that value. +# +is_node_member_clustat() +{ + local node="$1" + local output_list + + # Still having a tag while (a) online but (b) not running pacemaker + # (e.g. crm_node) or rgmanager not considered adequate for things like + # the LVM agent - so we use corosync-quorumtool instead. The function + # name really should be changed. + # + # corosync 1.4.1 output looks like: + # + # # corosync-quorumtool -l + # Nodeid Name + # 1 rhel6-1 + # 2 rhel6-2 + # + # corosync 2.0.1 output looks like: + # # corosync-quorumtool -l + # + # Membership information + # ---------------------- + # Nodeid Votes Name + # 1 1 rhel7-1.priv.redhat.com + # 2 1 rhel7-2.priv.redhat.com + # + + output_list=$(corosync-quorumtool -l | grep -v "^Nodeid") + + # first try searching for the node in the output as both a FQDN or shortname + echo "$output_list" | grep -i -e " $node\$" -e " $node\..*\$" &> /dev/null && return 0 + + # if the node was not found in the quorum list, try any known aliases found in /etc/hosts + for alias in $(cat /etc/hosts | grep -e "\s$node\s" -e "\s$node\$" | tail -n 1 | sed 's/\t/ /g' | cut -f2- -d " "); + do + echo "$output_list" | grep -i -e " $alias\$" &> /dev/null && return 0 + done + + return 1 +} + + +# +# Print the local node name to stdout +# Returns 0 if could be found, 1 if not +# Tested on RHEL6 (cman) and Fedora 17 (corosync/pacemaker) +# +local_node_name() +{ + local node nid localid + + if which magma_tool &> /dev/null; then + # Use magma_tool, if available. + line=$(magma_tool localname | grep "^Local") + + if [ -n "$line" ]; then + echo ${line/* = /} + return 0 + fi + fi + + if which cman_tool &> /dev/null; then + # Use cman_tool + + line=$(cman_tool status | grep -i "Node name: $1") + [ -n "$line" ] || return 1 + echo ${line/*name: /} + return 0 + fi + + if ! which crm_node &> /dev/null; then + # no crm_node? :( + return 2 + fi + + localid=$(crm_node -i) + while read nid node; do + if [ "$nid" = "$localid" ]; then + echo $node + return 0 + fi + done < <(crm_node -l) + + return 1 +} + diff --git a/rgmanager/src/resources/utils/messages.sh.in b/rgmanager/src/resources/utils/messages.sh.in new file mode 100644 index 0000000..1c4f13c --- /dev/null +++ b/rgmanager/src/resources/utils/messages.sh.in @@ -0,0 +1,269 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +declare CLOG_INIT=100 +declare CLOG_SUCCEED=200 +declare CLOG_SUCCEED_KILL=201 + +declare CLOG_FAILED=400 +declare CLOG_FAILED_TIMEOUT=401 +declare CLOG_FAILED_NOT_FOUND=403 +declare CLOG_FAILED_INVALID=404 +declare CLOG_FAILED_NOT_READABLE=405 +declare CLOG_FAILED_KILL=406 + +## +## Usage: +## clog_service_start %operation% +## +clog_service_start() +{ + case $1 in + $CLOG_INIT) + ocf_log info "Starting Service $OCF_RESOURCE_INSTANCE" + ;; + $CLOG_SUCCEED) + ocf_log debug "Starting Service $OCF_RESOURCE_INSTANCE > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Starting Service $OCF_RESOURCE_INSTANCE > Failed" + ;; + $CLOG_FAILED_TIMEOUT) + ocf_log error "Starting Service $OCF_RESOURCE_INSTANCE > Failed - Timeout Error" + ;; + esac + return 0 +} + +## +## Usage: +## clog_service_stop %operation% +## +clog_service_stop() +{ + case $1 in + $CLOG_INIT) + ocf_log info "Stopping Service $OCF_RESOURCE_INSTANCE" + ;; + $CLOG_SUCCEED) + ocf_log info "Stopping Service $OCF_RESOURCE_INSTANCE > Succeed" + ;; + $CLOG_SUCCEED_KILL) + ocf_log info "Killing Service $OCF_RESOURCE_INSTANCE > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Stopping Service $OCF_RESOURCE_INSTANCE > Failed" + ;; + $CLOG_FAILED_NOT_STOPPED) + ocf_log error "Stopping Service $OCF_RESOURCE_INSTANCE > Failed - Application Is Still Running" + ;; + $CLOG_FAILED_KILL) + ocf_log error "Killing Service $OCF_RESOURCE_INSTANCE > Failed" + ;; + esac + return 0 +} + +## +## Usage: +## clog_service_status %operation% +## +clog_service_status() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Monitoring Service $OCF_RESOURCE_INSTANCE" + ;; + $CLOG_SUCCEED) + ocf_log debug "Monitoring Service $OCF_RESOURCE_INSTANCE > Service Is Running" + ;; + $CLOG_FAILED) + ocf_log error "Monitoring Service $OCF_RESOURCE_INSTANCE > Service Is Not Running" + ;; + $CLOG_FAILED_NOT_FOUND) + ocf_log error "Monitoring Service $OCF_RESOURCE_INSTANCE > Service Is Not Running - PID File Not Found" + ;; + esac + return 0 +} + +## +## Usage: +## clog_service_verify %operation% +## clog_service_verify $CLOG_FAILED %reason% +## +clog_service_verify() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Verifying Configuration Of $OCF_RESOURCE_INSTANCE" + ;; + $CLOG_SUCCEED) + ocf_log debug "Verifying Configuration Of $OCF_RESOURCE_INSTANCE > Succeed" + ;; + $CLOG_FAILED_NOT_CHILD) + ocf_log error "Service $OCF_RESOURCE_INSTANCE Is Not A Child Of A Service" + ;; + $CLOG_FAILED) + if [ "x$2" = "x" ]; then + ocf_log error "Verifying Configuration Of $OCF_RESOURCE_INSTANCE > Failed" + else + ocf_log error "Verifying Configuration Of $OCF_RESOURCE_INSTANCE > Failed - $2" + fi + ;; + esac + return 0 +} + + +## +## Usage: +## clog_check_sha1 %operation% %filename% +## +clog_check_sha1() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Checking SHA1 Checksum Of File $1" + ;; + $CLOG_SUCCEED) + ocf_log debug "Checking SHA1 Checksum Of File > Succeed" + ;; + $CLOG_FAILED) + ocf_log debug "Checking SHA1 Checksum Of File > Failed - File Changed" + ;; + esac + return 0; +} + +## +## Usage: +## clog_check_file_exist %operation% %filename% +## +clog_check_file_exist() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Checking Existence Of File $2" + ;; + $CLOG_SUCCEED) + ocf_log debug "Checking Existence Of File $2 > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Checking Existence Of File $2 [$OCF_RESOURCE_INSTANCE] > Failed" + ;; + $CLOG_FAILED_INVALID) + ocf_log error "Checking Existence Of File $2 [$OCF_RESOURCE_INSTANCE] > Failed - Invalid Argument" + ;; + $CLOG_FAILED_NOT_FOUND) + ocf_log error "Checking Existence Of File $2 [$OCF_RESOURCE_INSTANCE] > Failed - File Doesn't Exist" + ;; + $CLOG_FAILED_NOT_READABLE) + ocf_log error "Checking Existence Of File $2 [$OCF_RESOURCE_INSTANCE] > Failed - File Is Not Readable" + ;; + esac + return 0; +} + +## +## Usage: +## clog_check_pid %operation% %filename% +## +clog_check_pid() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Checking Non-Existence Of PID File $2" + return 0 + ;; + $CLOG_SUCCEED) + ocf_log debug "Checking Non-Existence of PID File $2 > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Checking Non-Existence of PID File $2 [$OCF_RESOURCE_INSTANCE] > Failed - PID File Exists For $OCF_RESOURCE_INSTANCE" + ;; + esac + return 0; +} + +## +## Usage: +## clog_check_syntax %operation% %filename% +## +clog_check_syntax() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Checking Syntax Of The File $2" + ;; + $CLOG_SUCCEED) + ocf_log debug "Checking Syntax Of The File $2 > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Checking Syntax Of The File $2 [$OCF_RESOURCE_INSTANCE] > Failed" + ;; + esac + return 0; +} + +## +## Usage: +## clog_generate_config %operation% %old filename% %new filename% +## +clog_generate_config() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Generating New Config File $3 From $2" + ;; + $CLOG_SUCCEED) + ocf_log debug "Generating New Config File $3 From $2 > Succeed" + ;; + $CLOG_FAILED) + ocf_log error "Generating New Config File $3 From $2 [$OCF_RESOURCE_INSTANCE] > Failed" + ;; + esac + return 0; +} + +## +## Usage: +## clog_looking_for %operation% %resource% +## clog_looking_for %operation% "IP Addresses" +## clog_looking_for %operation% "Filesystems" +## +clog_looking_for() +{ + case $1 in + $CLOG_INIT) + ocf_log debug "Looking For $2" + ;; + $CLOG_SUCCEED) + ocf_log debug "Looking For $2 > Succeed - $3 $2 Found" + ;; + $CLOG_FAILED) + ocf_log error "Looking For $2 [$OCF_RESOURCE_INSTANCE] > Failed" + ;; + $CLOG_FAILED_NOT_FOUND) + ocf_log error "Looking For $2 [$OCF_RESOURCE_INSTANCE] > Failed - No $2 Found" + ;; + esac + return 0; +} diff --git a/rgmanager/src/resources/utils/named-parse-config.pl b/rgmanager/src/resources/utils/named-parse-config.pl new file mode 100644 index 0000000..a941a60 --- /dev/null +++ b/rgmanager/src/resources/utils/named-parse-config.pl @@ -0,0 +1,49 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +## +## Parse named.conf (from STDIN) and add options from cluster.conf +## +## ./named-parse-config.pl "directory" "pid-file" "listen-on" "set source <true | false>" +## +use strict; + +if ($#ARGV < 3) { + die ("Not enough arguments"); +} + +while (my $line = <STDIN>) { + chomp($line); + $line =~ s/(.*?)\s*$/$1/; + if ($line =~ /^\s*options\s+\{/) { + print $line, "\n"; + print "\tdirectory \"$ARGV[0]\";\n"; + print "\tpid-file \"$ARGV[1]\";\n"; + print "\tlisten-on { $ARGV[2] };\n"; + if ($ARGV[3] =~ "1|true|TRUE|yes|YES|on|ON") { + print "\tnotify-source $ARGV[2];\n"; + print "\ttransfer-source $ARGV[2];\n"; + print "\tquery-source $ARGV[2];\n"; + } + } else { + print $line, "\n"; + } +} + diff --git a/rgmanager/src/resources/utils/ra-skelet.sh.in b/rgmanager/src/resources/utils/ra-skelet.sh.in new file mode 100644 index 0000000..ee943b2 --- /dev/null +++ b/rgmanager/src/resources/utils/ra-skelet.sh.in @@ -0,0 +1,156 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +status_check_pid() +{ + declare pid_file="$1" + + if [ -z "$pid_file" ]; then + clog_check_file_exist $CLOG_FAILED_INVALID "$pid_file" + return $OCF_ERR_GENERIC + fi + + if [ ! -e "$pid_file" ]; then + clog_check_file_exist $CLOG_FAILED "$pid_file" + return $OCF_NOT_RUNNING + fi + + read pid < "$pid_file" + + if [ -z "$pid" ]; then + return $OCF_ERR_GENERIC + fi + + if [ ! -d /proc/$pid ]; then + return $OCF_ERR_GENERIC + fi + + return 0 +} + +stop_generic() +{ + declare pid_file="$1" + declare stop_timeout="$2" + declare stop_sig="$3" + declare pid; + declare count=0; + + if [ -z "$stop_sig" ]; then + stop_sig="-TERM" + fi + + if [ ! -e "$pid_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_FOUND "$pid_file" + # In stop-after-stop situation there is no PID file but + # it will be nice to check for it in stop-after-start + # look at bug #449394 + return 0 + fi + + if [ -z "$stop_timeout" ]; then + stop_timeout=20 + fi + + read pid < "$pid_file" + + # @todo: PID file empty -> error? + if [ -z "$pid" ]; then + return 0; + fi + + # @todo: PID is not running -> error? + if [ ! -d "/proc/$pid" ]; then + return 0; + fi + + kill $stop_sig "$pid" + + if [ $? -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + until [ `ps --pid "$pid" &> /dev/null; echo $?` = '1' ] || [ $count -gt $stop_timeout ] + do + sleep 1 + let count=$count+1 + done + + if [ $count -gt $stop_timeout ]; then + clog_service_stop $CLOG_FAILED_NOT_STOPPED + return $OCF_ERR_GENERIC + fi + + return 0; +} + +stop_generic_sigkill() { + # Use stop_generic (kill -TERM) and if application did not stop + # correctly then use kill -QUIT and check if it was killed + declare pid_file="$1" + declare stop_timeout="$2" + declare kill_timeout="$3" + declare stop_sig="$4" + declare pid + + if [ -z "$stop_sig" ]; then + stop_sig="-TERM" + fi + ## If stop_timeout is equal to zero then we do not want + ## to give -TERM signal at all. + if [ $stop_timeout -ne 0 ]; then + stop_generic "$pid_file" "$stop_timeout" "$stop_sig" + if [ $? -eq 0 ]; then + return 0; + fi + fi + + if [ ! -e "$pid_file" ]; then + clog_check_file_exist $CLOG_FAILED_NOT_FOUND "$pid_file" + # In stop-after-stop situation there is no PID file but + # it will be nice to check for it in stop-after-start + # look at bug #449394 + return 0 + fi + read pid < "$pid_file" + + if [ -z "$pid" ]; then + return 0; + fi + + if [ ! -d "/proc/$pid" ]; then + return 0; + fi + + kill -QUIT "$pid" + if [ $? -ne 0 ]; then + return $OCF_GENERIC_ERROR + fi + + sleep "$kill_timeout" + ps --pid "$pid" &> /dev/null + if [ $? -eq 0 ]; then + clog_service_stop $CLOG_FAILED_KILL + return $OCF_ERR_GENERIC + fi + + clog_service_stop $CLOG_SUCCEED_KILL + return 0 +} diff --git a/rgmanager/src/resources/utils/rhev-check.sh b/rgmanager/src/resources/utils/rhev-check.sh new file mode 100644 index 0000000..0529c31 --- /dev/null +++ b/rgmanager/src/resources/utils/rhev-check.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +MYNAME=`basename $0` + +do_log() +{ + declare severity=$1 + + shift + echo "<$severity> $*" + clulog -s $severity "$*" +} + +if [ -z "$1" ]; then + do_log 4 No host specified. + exit 1 +fi + +do_log 6 "Checking RHEV status on $1" + +tries=3 +http_code= + +while [ $tries -gt 0 ]; do + + # Record start/end times so we can calculate the difference + start_time=$(date +%s) + http_code="$(curl -m 10 -sk https://$1/RHEVManagerWeb/HealthStatus.aspx -D - | head -1 | cut -f2 -d' ')" + + if [ "$http_code" = "200" ]; then + exit 0 + fi + + # Reduce sleep time if the attempt took a noticeable amount + # of time. + end_time=$(date +%s) + delta=$(((end_time - start_time))) + sleep_time=$(((90 - delta))) + + ((tries-=1)) + + # if we're going to retry and we have a nonzero sleep time, + # go to sleep. + if [ $tries -gt 0 ] && [ $sleep_time -gt 0 ]; then + sleep $sleep_time + fi +done + +if [ -n "$http_code" ]; then + do_log 3 "RHEV Status check on $1 failed; last HTTP code: $http_code" +else + do_log 3 "RHEV Status check on $1 failed" +fi + +exit 1 diff --git a/rgmanager/src/resources/utils/tomcat-parse-config.pl b/rgmanager/src/resources/utils/tomcat-parse-config.pl new file mode 100644 index 0000000..7c9badf --- /dev/null +++ b/rgmanager/src/resources/utils/tomcat-parse-config.pl @@ -0,0 +1,63 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +## +## This script replace IP addresses on which tomcat server +## should listen. Tomcat can't listen on every IP because that +## way we can run only on instance. +## +## Usage: ./tomcat-parse-config.pl ip1 ip2 < /etc/tomcat/server.xml +## where ipXX defines an IP address [eg. 127.0.0.1 134.45.11.1] +## +## +use strict; + +while (my $line = <STDIN>) { + chomp ($line); + + if ($line =~ /(.*?)<Connector (.*)/) { + my $tmp = $2; + my $content = "<Connector "; + my $start = $1; + my $rest = ""; + + while (($tmp =~ />/) == 0) { + $content .= $tmp . "\n"; + $tmp = <STDIN>; + chomp($tmp); + } + + if ($tmp =~ /(.*?)>(.*)/) { + $content .= $1 . ">\n"; + $rest = $2; + chomp($rest); + } + + print $start; + foreach my $arg (@ARGV) { + $content =~ s/\s+address=".*?"/ /; + $content =~ s/Connector /Connector address="$arg" /; + print $content; + } + print $rest; + } else { + print $line,"\n"; + } +} diff --git a/rgmanager/src/resources/vm.sh.in b/rgmanager/src/resources/vm.sh.in new file mode 100644 index 0000000..a76495f --- /dev/null +++ b/rgmanager/src/resources/vm.sh.in @@ -0,0 +1,1231 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +PATH=/bin:/sbin:/usr/bin:/usr/sbin + +# Only allow pid status checks during monitor operations. +# Otherwise we want proceed with failing if virsh is not available. +ALLOW_PID_STATUS_CHECK=0 +if [ "$1" = "monitor" ] || [ "$1" = "status" ]; then + ALLOW_PID_STATUS_CHECK=1 +fi + +export PATH + +. $(dirname $0)/ocf-shellfuncs || exit 1 + +# +# Virtual Machine start/stop script (requires the virsh command) +# + +EMULATOR_STATE="/var/run/vm-${OCF_RESKEY_name}-emu.state" + +# Indeterminate state: xend/libvirtd is down. +export OCF_APP_ERR_INDETERMINATE=150 + +meta_data() +{ + cat <<EOT +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> +<resource-agent version="rgmanager 2.0" name="vm"> + <version>1.0</version> + + <longdesc lang="en"> + Defines a Virtual Machine + </longdesc> + <shortdesc lang="en"> + Defines a Virtual Machine + </shortdesc> + + <parameters> + <parameter name="name" primary="1"> + <longdesc lang="en"> + This is the name of the virtual machine. + </longdesc> + <shortdesc lang="en"> + Name + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="domain" reconfig="1"> + <longdesc lang="en"> + Failover domains define lists of cluster members + to try in the event that the host of the virtual machine + fails. + </longdesc> + <shortdesc lang="en"> + Cluster failover Domain + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="autostart" reconfig="1"> + <longdesc lang="en"> + If set to yes, this resource group will automatically be started + after the cluster forms a quorum. If set to no, this virtual + machine will start in the 'disabled' state after the cluster + forms a quorum. + </longdesc> + <shortdesc lang="en"> + Automatic start after quorum formation + </shortdesc> + <content type="boolean" default="1"/> + </parameter> + + <parameter name="exclusive" reconfig="1"> + <longdesc lang="en"> + If set, this resource group will only relocate to + nodes which have no other resource groups running in the + event of a failure. If no empty nodes are available, + this resource group will not be restarted after a failure. + Additionally, resource groups will not automatically + relocate to the node running this resource group. This + option can be overridden by manual start and/or relocate + operations. + </longdesc> + <shortdesc lang="en"> + Exclusive resource group + </shortdesc> + <content type="boolean" default="0"/> + </parameter> + + <parameter name="recovery" reconfig="1"> + <longdesc lang="en"> + This currently has three possible options: "restart" tries + to restart this virtual machine locally before + attempting to relocate (default); "relocate" does not bother + trying to restart the VM locally; "disable" disables + the VM if it fails. + </longdesc> + <shortdesc lang="en"> + Failure recovery policy + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="migration_mapping" reconfig="1"> + <longdesc lang="en"> + Mapping of the hostname of a target cluster member to a different hostname + </longdesc> + <shortdesc lang="en"> + memberhost:targethost,memberhost:targethost .. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="use_virsh"> + <longdesc lang="en"> + Force use of virsh instead of xm on Xen machines. + </longdesc> + <shortdesc lang="en"> + If set to 1, vm.sh will use the virsh command to manage + virtual machines instead of xm. This is required when + using non-Xen virtual machines (e.g. qemu / KVM). + </shortdesc> + <content type="integer" default=""/> + </parameter> + + <parameter name="xmlfile"> + <longdesc lang="en"> + Full path to libvirt XML file describing the domain. + </longdesc> + <shortdesc lang="en"> + Full path to libvirt XML file describing the domain. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="migrate"> + <longdesc lang="en"> + Migration type (live or pause, default = live). + </longdesc> + <shortdesc lang="en"> + Migration type (live or pause, default = live). + </shortdesc> + <content type="string" default="live"/> + </parameter> + + <parameter name="migrate_options"> + <longdesc lang="en"> + Extra options for the guest live migration. + </longdesc> + <shortdesc lang="en"> + Extra options for the guest live migration. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="tunnelled"> + <longdesc lang="en"> + Tunnel data over ssh to securely migrate virtual machines. + </longdesc> + <shortdesc lang="en"> + Tunnel data over ssh to securely migrate virtual machines. + </shortdesc> + <content type="string" default=""/> + </parameter> + + <parameter name="path"> + <longdesc lang="en"> + Path specification vm.sh will search for the specified + VM configuration file. /path1:/path2:... + </longdesc> + <shortdesc lang="en"> + Path to virtual machine configuration files. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="snapshot"> + <longdesc lang="en"> + Path to the snapshot directory where the virtual machine + image will be stored. + </longdesc> + <shortdesc lang="en"> + Path to the snapshot directory where the virtual machine + image will be stored. + </shortdesc> + <content type="string" default=""/> + </parameter> + + <parameter name="depend"> + <longdesc lang="en"> + Service dependency; will not start without the specified + service running. + </longdesc> + <shortdesc lang="en"> + Top-level service this depends on, in service:name format. + </shortdesc> + <content type="string"/> + </parameter> + + <parameter name="depend_mode"> + <longdesc lang="en"> + Service dependency mode. + hard - This service is stopped/started if its dependency + is stopped/started + soft - This service only depends on the other service for + initial startip. If the other service stops, this + service is not stopped. + </longdesc> + <shortdesc lang="en"> + Service dependency mode (soft or hard). + </shortdesc> + <content type="string" default="hard"/> + </parameter> + + <parameter name="max_restarts" reconfig="1"> + <longdesc lang="en"> + Maximum restarts for this service. + </longdesc> + <shortdesc lang="en"> + Maximum restarts for this service. + </shortdesc> + <content type="string" default="0"/> + </parameter> + + <parameter name="restart_expire_time" reconfig="1"> + <longdesc lang="en"> + Restart expiration time. A restart is forgotten + after this time. When combined with the max_restarts + option, this lets administrators specify a threshold + for when to fail over services. If max_restarts + is exceeded in this given expiration time, the service + is relocated instead of restarted again. + </longdesc> + <shortdesc lang="en"> + Restart expiration time; amount of time before a restart + is forgotten. + </shortdesc> + <content type="string" default="0"/> + </parameter> + + <parameter name="status_program" reconfig="1"> + <longdesc lang="en"> + Ordinarily, only the presence/health of a virtual machine + is checked. If specified, the status_program value is + executed during a depth 10 check. The intent of this + program is to ascertain the status of critical services + within a virtual machine. + </longdesc> + <shortdesc lang="en"> + Additional status check program + </shortdesc> + <content type="string" default=""/> + </parameter> + + <parameter name="hypervisor"> + <longdesc lang="en"> + Specify hypervisor tricks to use. Default = auto. + Other supported options are xen and qemu. + </longdesc> + <shortdesc lang="en"> + Hypervisor + </shortdesc > + <content type="string" default="auto"/> + </parameter> + + <parameter name="hypervisor_uri"> + <longdesc lang="en"> + Hypervisor URI. Generally, this is keyed off of the + hypervisor and does not need to be set. + </longdesc> + <shortdesc lang="en"> + Hypervisor URI (normally automatic). + </shortdesc > + <content type="string" default="auto" /> + </parameter> + + <parameter name="migration_uri"> + <longdesc lang="en"> + Migration URI. Generally, this is keyed off of the + hypervisor and does not need to be set. + </longdesc> + <shortdesc lang="en"> + Migration URI (normally automatic). + </shortdesc > + <content type="string" default="auto" /> + </parameter> + + <parameter name="no_kill"> + <longdesc lang="en"> + Do not force kill vm during stop, instead + fail after the timeout expires. + </longdesc> + <shortdesc lang="en"> + Don't force kill vm on stop. + </shortdesc > + <content type="boolean" default="false" /> + </parameter> + + </parameters> + + <actions> + <action name="start" timeout="300"/> + <action name="stop" timeout="120"/> + + <action name="status" timeout="10" interval="30"/> + <action name="monitor" timeout="10" interval="30"/> + + <!-- depth 10 calls the status_program --> + <action name="status" depth="10" timeout="20" interval="60"/> + <action name="monitor" depth="10" timeout="20" interval="60"/> + + <!-- reconfigure - reconfigure with new OCF parameters. + NOT OCF COMPATIBLE AT ALL --> + <action name="reconfig" timeout="10"/> + + <action name="migrate" timeout="10m"/> + + <action name="meta-data" timeout="5"/> + <action name="validate-all" timeout="5"/> + + </actions> + + <special tag="rgmanager"> + <!-- Destroy_on_delete / init_on_add are currently only + supported for migratory resources (no children + and the 'migrate' action; see above. Do not try this + with normal services --> + <attributes maxinstances="1" destroy_on_delete="0" init_on_add="0"/> + </special> +</resource-agent> +EOT +} + + +build_virsh_cmdline() +{ + declare cmdline="" + declare operation=$1 + + if [ -n "$OCF_RESKEY_hypervisor_uri" ]; then + cmdline="$cmdline -c $OCF_RESKEY_hypervisor_uri" + fi + + cmdline="$cmdline $operation $OCF_RESKEY_name" + + echo $cmdline +} + + +# this is only used on startup +build_xm_cmdline() +{ + declare operation=$1 + # + # Virtual domains should never restart themselves when + # controlled externally; the external monitoring app + # should. + # + declare cmdline="on_shutdown=\"destroy\" on_reboot=\"destroy\" on_crash=\"destroy\"" + + if [ -n "$OCF_RESKEY_path" ]; then + operation="$operation --path=\"$OCF_RESKEY_path\"" + fi + + if [ -n "$OCF_RESKEY_name" ]; then + cmdline="$operation $OCF_RESKEY_name $cmdline" + fi + + echo $cmdline +} + + +do_xm_start() +{ + # Use /dev/null for the configuration file, if xmdefconfig + # doesn't exist... + # + declare cmdline + + echo -n "Virtual machine $OCF_RESKEY_name is " + do_status && return 0 + + cmdline="`build_xm_cmdline create`" + + ocf_log debug "xm $cmdline" + + eval xm $cmdline + return $? +} + + +get_timeout() +{ + declare -i default_timeout=60 + declare -i tout=60 + + if [ -n "$OCF_RESKEY_RGMANAGER_meta_timeout" ]; then + tout=$OCF_RESKEY_RGMANAGER_meta_timeout + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + tout=$OCF_RESKEY_CRM_meta_timeout + fi + + if [ $tout -eq 0 ]; then + echo $default_timeout + return 0 + fi + if [ $tout -lt 0 ]; then + echo $default_timeout + return 0 + fi + + echo $tout + return 0 +} + +get_emulator() +{ + local emulator="" + + emulator=$(virsh $VIRSH_OPTIONS dumpxml $OCF_RESKEY_name 2>/dev/null | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p') + if [ -z "$emulator" ] && [ -a "$EMULATOR_STATE" ]; then + emulator=$(cat $EMULATOR_STATE) + fi + if [ -z "$emulator" ]; then + emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p') + fi + + if [ -n "$emulator" ]; then + basename $emulator + else + ocf_log error "Unable to determine emulator for $OCF_RESKEY_name" + fi +} + +update_emulator_cache() +{ + local emulator + + emulator=$(get_emulator) + if [ -n "$emulator" ]; then + echo $emulator > $EMULATOR_STATE + fi +} + +# +# Start a virtual machine given the parameters from +# the environment. +# +do_virsh_start() +{ + declare cmdline + declare snapshotimage + declare rc + + echo -n "Virtual machine $OCF_RESKEY_name is " + do_status && return 0 + + snapshotimage="$OCF_RESKEY_snapshot/$OCF_RESKEY_name" + + if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then + eval virsh restore $snapshotimage + if [ $? -eq 0 ]; then + rm -f $snapshotimage + return 0 + fi + return 1 + fi + + if [ -n "$OCF_RESKEY_xmlfile" -a -f "$OCF_RESKEY_xmlfile" ]; then + # TODO: try to use build_virsh_cmdline for the hypervisor_uri + cmdline="virsh create $OCF_RESKEY_xmlfile" + else + cmdline="virsh $(build_virsh_cmdline start)" + fi + + ocf_log debug "$cmdline" + + $cmdline + rc=$? + + update_emulator_cache + + return $rc +} + +do_xm_stop() +{ + declare -i timeout=60 + declare -i ret=1 + declare st + + for op in $*; do + echo "CMD: xm $op $OCF_RESKEY_name" + xm $op $OCF_RESKEY_name + + timeout=60 + while [ $timeout -gt 0 ]; do + sleep 5 + ((timeout -= 5)) + do_status&>/dev/null || return 0 + while read dom state; do + # + # State is "stopped". Kill it. + # + if [ "$dom" != "$OCF_RESKEY_name" ]; then + continue + fi + if [ "$state" != "---s-" ]; then + continue + fi + xm destroy $OCF_RESKEY_name + done < <(xm list | awk '{print $1, $5}') + done + done + + return 1 +} + + +# +# Stop a VM. Try to shut it down. Wait a bit, and if it +# doesn't shut down, destroy it. +# +do_virsh_stop() +{ + declare -i timeout=$(get_timeout) + declare -i ret=1 + declare state + + state=$(do_status) + [ $? -eq 0 ] || return 0 + + if [ -n "$OCF_RESKEY_snapshot" ]; then + virsh save $OCF_RESKEY_name "$OCF_RESKEY_snapshot/$OCF_RESKEY_name" + fi + + for op in $*; do + echo virsh $op $OCF_RESKEY_name ... + virsh $op $OCF_RESKEY_name + + timeout=$(get_timeout) + while [ $timeout -gt 0 ]; do + sleep 5 + ((timeout -= 5)) + state=$(do_status) + [ $? -eq 0 ] || return 0 + + if [ "$state" = "paused" ]; then + virsh destroy $OCF_RESKEY_name + fi + done + done + + ocf_log err "Stop operation timed out for vm '$OCF_RESKEY_name'" + return 1 +} + + +do_start() +{ + if [ "$OCF_RESKEY_use_virsh" = "1" ]; then + do_virsh_start $* + return $? + fi + + do_xm_start $* + return $? +} + + +do_stop() +{ + declare domstate rv + + domstate=$(do_status) + rv=$? + ocf_log debug "Virtual machine $OCF_RESKEY_name is $domstate" + if [ $rv -eq $OCF_APP_ERR_INDETERMINATE ]; then + ocf_log crit "xend/libvirtd is dead; cannot stop $OCF_RESKEY_name" + return 1 + fi + + if [ "$OCF_RESKEY_use_virsh" = "1" ]; then + do_virsh_stop $* + return $? + fi + + do_xm_stop $* + return $? +} + + +# +# Reconfigure a running VM. +# +reconfigure() +{ + return 0 +} + + +xm_status() +{ + service xend status &> /dev/null + if [ $? -ne 0 ]; then + # if xend died + echo indeterminate + return $OCF_APP_ERR_INDETERMINATE + fi + + xm list $OCF_RESKEY_name &> /dev/null + if [ $? -eq 0 ]; then + echo "running" + return 0 + fi + xm list migrating-$OCF_RESKEY_name &> /dev/null + if [ $? -eq 0 ]; then + echo "running" + return 0 + fi + echo "not running" + return $OCF_NOT_RUNNING +} + +# attempt to check domain status outside of libvirt using the emulator process +vm_pid_status() +{ + local rc=$OCF_ERR_GENERIC + local emulator + + if [ $ALLOW_PID_STATUS_CHECK -eq 0 ]; then + echo "indeterminate" + return $OCF_APP_ERR_INDETERMINATE + fi + + emulator=$(get_emulator) + case "$emulator" in + qemu-kvm|qemu-system-*) + rc=$OCF_NOT_RUNNING + ps awx | grep -E "[q]emu-(kvm|system).*-name $OCF_RESKEY_name " > /dev/null 2>&1 + if [ $? -eq 0 ]; then + rc=$OCF_SUCCESS + fi + ;; + # This can be expanded to check for additional emulators + *) + echo "indeterminate" + return $OCF_APP_ERR_INDETERMINATE + ;; + esac + + if [ $rc -eq $OCF_SUCCESS ]; then + echo "running" + elif [ $rc -eq $OCF_NOT_RUNNING ]; then + echo "shut off" + fi + + return $rc +} + +virsh_status() +{ + declare state pid + + if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then + service xend status &> /dev/null + if [ $? -ne 0 ]; then + echo indeterminate + return $OCF_APP_ERR_INDETERMINATE + fi + fi + + # + # libvirtd is required when using virsh even though + # not specifically when also using Xen. This is because + # libvirtd is required for migration. + # + pid=$(pidof libvirtd) + if [ -z "$pid" ]; then + # attempt to determine if vm is running from pid file + vm_pid_status + return $? + fi + + state=$(virsh domstate $OCF_RESKEY_name) + + echo $state + + if [ "$state" = "running" ] || [ "$state" = "paused" ] || [ "$state" = "no state" ] || + [ "$state" = "idle" ]; then + update_emulator_cache + return 0 + fi + + if [ "$state" = "shut off" ]; then + return $OCF_NOT_RUNNING + fi + + return $OCF_ERR_GENERIC +} + + +# +# Simple status check: Find the VM in the list of running +# VMs +# +do_status() +{ + if [ "$OCF_RESKEY_use_virsh" = "1" ]; then + virsh_status + return $? + fi + + xm_status + return $? +} + + +# +# virsh "path" attribute support +# +check_config_file() +{ + declare path=$1 + + if [ -f "$path/$OCF_RESKEY_name" ]; then + echo $path/$OCF_RESKEY_name + return 2 + elif [ -f "$path/$OCF_RESKEY_name.xml" ]; then + echo $path/$OCF_RESKEY_name.xml + return 2 + fi + + return 0 +} + + +parse_input() +{ + declare delim=$1 + declare input=$2 + declare func=$3 + declare inp + declare value + + while [ -n "$input" ]; do + value=${input/$delim*/} + if [ -n "$value" ]; then + eval $func $value + if [ $? -eq 2 ]; then + return 0 + fi + fi + inp=${input/$value$delim/} + if [ "$input" = "$inp" ]; then + inp=${input/$value/} + fi + input=$inp + done +} + + +search_config_path() +{ + declare config_file=$(parse_input ":" "$OCF_RESKEY_path" check_config_file) + + if [ -n "$config_file" ]; then + export OCF_RESKEY_xmlfile=$config_file + return 0 + fi + + return 1 +} + + +choose_management_tool() +{ + declare -i is_xml + + # + # Don't override user value for use_virsh if one is given + # + if [ -n "$OCF_RESKEY_use_virsh" ]; then + return 0 + fi + + which xmllint &> /dev/null + if [ $? -ne 0 ]; then + ocf_log warning "Could not find xmllint; assuming virsh mode" + export OCF_RESKEY_use_virsh=1 + unset OCF_RESKEY_path + return 0 + fi + + xmllint $OCF_RESKEY_xmlfile &> /dev/null + is_xml=$? + + if [ $is_xml -eq 0 ]; then + ocf_log debug "$OCF_RESKEY_xmlfile is XML; using virsh" + export OCF_RESKEY_use_virsh=1 + unset OCF_RESKEY_path + else + ocf_log debug "$OCF_RESKEY_xmlfile is not XML; using xm" + export OCF_RESKEY_use_virsh=0 + unset OCF_RESKEY_xmlfile + fi + + return 0 +} + +get_hypervisor() +{ + local hypervisor="`virsh version | grep \"Running hypervisor:\" | awk '{print $3}' | tr A-Z a-z`" + # if virsh gives us nothing (likely because libvirt is down), we can attempt + # to determine auto detect the hypervisor is qemu if a qemu emulator is used + # for this domain. + if [ -z "$hypervisor" ]; then + get_emulator | grep "qemu" > /dev/null 2>&1 + if [ $? -eq 0 ]; then + hypervisor="qemu" + fi + fi + echo $hypervisor +} + +validate_all() +{ + if [ "$(id -u)" != "0" ]; then + ocf_log err "Cannot control VMs. as non-root user." + return 1 + fi + + # + # If someone selects a hypervisor, honor it. + # Otherwise, ask virsh what the hypervisor is. + # + if [ -z "$OCF_RESKEY_hypervisor" ] || + [ "$OCF_RESKEY_hypervisor" = "auto" ]; then + export OCF_RESKEY_hypervisor="`get_hypervisor`" + if [ -z "$OCF_RESKEY_hypervisor" ]; then + ocf_log err "Could not determine Hypervisor" + return $OCF_ERR_ARGS + fi + echo Hypervisor: $OCF_RESKEY_hypervisor + fi + + # + # Xen hypervisor only for when use_virsh = 0. + # + if [ "$OCF_RESKEY_use_virsh" = "0" ]; then + if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then + ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" + return $OCF_ERR_ARGS + fi + + if [ -n "$OCF_RESKEY_xmlfile" ]; then + ocf_log err "Cannot use xmlfile if use_virsh is set to 0" + return $OCF_ERR_ARGS + fi + else + + # + # Virsh path support. + # + if [ -n "$OCF_RESKEY_path" ] && + [ "$OCF_RESKEY_path" != "/etc/xen" ]; then + if [ -n "$OCF_RESKEY_xmlfile" ]; then + ocf_log warning "Using $OCF_RESKEY_xmlfile instead of searching $OCF_RESKEY_path" + else + search_config_path + if [ $? -ne 0 ]; then + ocf_log warning "Could not find $OCF_RESKEY_name or $OCF_RESKEY_name.xml in search path $OCF_RESKEY_path" + unset OCF_RESKEY_xmlfile + else + ocf_log debug "Using $OCF_RESKEY_xmlfile" + fi + choose_management_tool + fi + else + export OCF_RESKEY_use_virsh=1 + fi + fi + + if [ "$OCF_RESKEY_use_virsh" = "0" ]; then + + echo "Management tool: xm" + which xm &> /dev/null + if [ $? -ne 0 ]; then + ocf_log err "Cannot find 'xm'; is it installed?" + return $OCF_ERR_INSTALLED + fi + + if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then + ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" + return $OCF_ERR_ARGS + fi + else + echo "Management tool: virsh" + which virsh &> /dev/null + if [ $? -ne 0 ]; then + ocf_log err "Cannot find 'virsh'; is it installed?" + return $OCF_ERR_INSTALLED + fi + fi + + # + # Set the hypervisor URI + # + if [ -z "$OCF_RESKEY_hypervisor_uri" -o "$OCF_RESKEY_hypervisor_uri" = "auto" ] && + [ "$OCF_RESKEY_use_virsh" = "1" ]; then + + # Virsh makes it easier to do this. Really. + if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then + OCF_RESKEY_hypervisor_uri="qemu:///system" + fi + + # I just need to believe in it more. + if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then + OCF_RESKEY_hypervisor_uri="xen:///" + fi + + echo Hypervisor URI: $OCF_RESKEY_hypervisor_uri + fi + + # + # Set the migration URI + # + if [ -z "$OCF_RESKEY_migration_uri" -o "$OCF_RESKEY_migration_uri" = "auto" ] && + [ "$OCF_RESKEY_use_virsh" = "1" ]; then + + # Virsh makes it easier to do this. Really. + if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then + export OCF_RESKEY_migration_uri="qemu+ssh://%s/system" + fi + + # I just need to believe in it more. + if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then + export OCF_RESKEY_migration_uri="xenmigr://%s/" + fi + + [ -n "$OCF_RESKEY_migration_uri" ] && echo Migration URI format: $(printf $OCF_RESKEY_migration_uri target_host) + fi + + if [ -z "$OCF_RESKEY_name" ]; then + echo No domain name specified + return $OCF_ERR_ARGS + fi + + if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then + export migrateuriopt="tcp:%s" + fi + + case "$OCF_RESKEY_no_kill" in + yes|true|1|YES|TRUE|on|ON) + OCF_RESKEY_no_kill=1 + ;; + esac + #virsh list --all | awk '{print $2}' | grep -q "^$OCF_RESKEY_name\$" + return $? +} + + +virsh_migrate() +{ + declare target=$1 + declare rv=1 + + # + # Xen and qemu have different migration mechanisms + # + if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then + cmd="virsh migrate $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $OCF_RESKEY_hypervisor_uri $(printf $OCF_RESKEY_migration_uri $target)" + ocf_log debug "$cmd" + + err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) + rv=$? + elif [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then + if [ -z "$tunnelled_opt" ]; then + cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)" + else + cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target)" + fi + ocf_log debug "$cmd" + + err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) + rv=$? + fi + + if [ $rv -ne 0 ]; then + ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" + ocf_log err "$err" + + if [ "$err" != "${err/does not exist/}" ]; then + return $OCF_ERR_CONFIGURED + fi + if [ "$err" != "${err/Domain not found/}" ]; then + return $OCF_ERR_CONFIGURED + fi + + return $OCF_ERR_GENERIC + fi + + return $rv +} + + +# +# XM migrate +# +xm_migrate() +{ + declare target=$1 + declare errstr rv migrate_opt cmd + + rv=1 + + if [ "$OCF_RESKEY_migrate" = "live" ]; then + migrate_opt="-l" + fi + + # migrate() function sets target using migration_mapping; + # no need to do it here anymore + cmd="xm migrate $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $target" + ocf_log debug "$cmd" + + err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) + rv=$? + + if [ $rv -ne 0 ]; then + ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" + ocf_log err "$err" + + if [ "$err" != "${err/does not exist/}" ]; then + return $OCF_NOT_RUNNING + fi + if [ "$err" != "${err/Connection refused/}" ]; then + return $OCF_ERR_CONFIGURED + fi + + return $OCF_ERR_GENERIC + fi + + return $? +} + +# +# Virsh migrate +# +migrate() +{ + declare target=$1 + declare rv migrate_opt + declare tunnelled_opt + + if [ "$OCF_RESKEY_migrate" = "live" ]; then + migrate_opt="--live" + fi + + case "$OCF_RESKEY_tunnelled" in + yes|true|1|YES|TRUE|on|ON) + tunnelled_opt="--tunnelled --p2p" + ;; + esac + + # Patch from Marcelo Azevedo to migrate over private + # LANs instead of public LANs + if [ -n "$OCF_RESKEY_migration_mapping" ] ; then + target=${OCF_RESKEY_migration_mapping#*$target:} target=${target%%,*} + fi + + if [ "$OCF_RESKEY_use_virsh" = "1" ]; then + virsh_migrate $target + rv=$? + else + xm_migrate $target + rv=$? + fi + + return $rv +} + + +wait_start() +{ + declare -i timeout_remaining=$(get_timeout) + declare -i start_time + declare -i end_time + declare -i delta + declare -i sleep_time + + if [ -z "$OCF_RESKEY_status_program" ]; then + return 0 + fi + + while [ $timeout_remaining -gt 0 ]; do + start_time=$(date +%s) + bash -c "$OCF_RESKEY_status_program" + if [ $? -eq 0 ]; then + return 0 + fi + end_time=$(date +%s) + delta=$(((end_time - start_time))) + sleep_time=$(((5 - delta))) + + ((timeout_remaining -= $delta)) + if [ $sleep_time -gt 0 ]; then + sleep $sleep_time + ((timeout_remaining -= $sleep_time)) + fi + done + + ocf_log err "Start of $OCF_RESOURCE_INSTANCE has failed" + ocf_log err "Timeout exceeded while waiting for \"$OCF_RESKEY_status_program\"" + + return 1 +} + +# +# +# + +case $1 in + start) + validate_all || exit $OCF_ERR_ARGS + do_start + rv=$? + if [ $rv -ne 0 ]; then + exit $rv + fi + + wait_start + exit $? + ;; + stop) + validate_all || exit $OCF_ERR_ARGS + if [ $OCF_RESKEY_no_kill -eq 1 ]; then + do_stop shutdown + else + do_stop shutdown destroy + fi + exit $? + ;; + kill) + validate_all || exit $OCF_ERR_ARGS + do_stop destroy + exit $? + ;; + recover|restart) + exit 0 + ;; + status|monitor) + validate_all || exit $OCF_ERR_ARGS + echo -n "Virtual machine $OCF_RESKEY_name is " + do_status + rv=$? + if [ $rv -ne 0 ]; then + exit $rv + fi + [ -z "$OCF_RESKEY_status_program" ] && exit 0 + [ -z "$OCF_CHECK_LEVEL" ] && exit 0 + [ $OCF_CHECK_LEVEL -lt 10 ] && exit 0 + + bash -c "$OCF_RESKEY_status_program" &> /dev/null + exit $? + ;; + migrate) + validate_all || exit $OCF_ERR_ARGS + migrate $2 # Send VM to this node + rv=$? + if [ $rv -eq $OCF_ERR_GENERIC ]; then + # Catch-all: If migration failed with + # an unhandled error, do a status check + # to see if the VM is really dead. + # + # If the VM is still in good health, return + # a value to rgmanager to indicate the + # non-critical error + # + # OCF states that codes 150-199 are reserved + # for application use, so we'll use 150 + # + do_status > /dev/null + if [ $? -eq 0 ]; then + rv=150 + fi + fi + exit $rv + ;; + reload) + exit 0 + ;; + reconfig) + validate_all || exit $OCF_ERR_ARGS + echo "$0 RECONFIGURING $OCF_RESKEY_memory" + reconfigure + exit $? + ;; + meta-data) + meta_data + exit 0 + ;; + validate-all) + validate_all + exit $? + ;; + *) + echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}" + exit 1 + ;; +esac |