summaryrefslogtreecommitdiffstats
path: root/heartbeat/db2
diff options
context:
space:
mode:
Diffstat (limited to 'heartbeat/db2')
-rwxr-xr-xheartbeat/db2919
1 files changed, 919 insertions, 0 deletions
diff --git a/heartbeat/db2 b/heartbeat/db2
new file mode 100755
index 0000000..95447ab
--- /dev/null
+++ b/heartbeat/db2
@@ -0,0 +1,919 @@
+#!/bin/sh
+#
+# db2
+#
+# Resource agent that manages a DB2 LUW database in Standard role
+# or HADR configuration in promotable configuration.
+# Multi partition is supported as well.
+#
+# Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de>
+#
+# This agent incoporates code of a previous release created by
+# Alan Robertson and the community.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+
+OCF_RESKEY_instance_default=""
+OCF_RESKEY_admin_default=""
+OCF_RESKEY_dbpartitionnum_default="0"
+
+: ${OCF_RESKEY_instance=${OCF_RESKEY_instance_default}}
+: ${OCF_RESKEY_admin=${OCF_RESKEY_admin_default}}
+: ${OCF_RESKEY_dbpartitionnum=${OCF_RESKEY_dbpartitionnum_default}}
+
+#######################################################################
+
+
+db2_usage() {
+ echo "db2 start|stop|monitor|promote|demote|notify|validate-all|meta-data"
+}
+
+db2_meta_data() {
+cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="db2" version="1.0">
+<version>1.0</version>
+<longdesc lang="en">
+Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in promotable configuration. Multiple partitions are supported.
+
+Standard mode:
+
+An instance including all or selected databases is made highly available.
+Configure each partition as a separate primitive resource.
+
+HADR mode:
+
+A single database in HADR configuration is made highly available by automating takeover operations.
+Configure a promotable resource with notifications enabled and an
+additional monitoring operation with role "Promoted".
+
+In case of HADR be very deliberate in specifying intervals/timeouts. The detection of a failure including promote must complete within HADR_PEER_WINDOW.
+
+In addition to honoring requirements for crash recovery etc. for your specific database use the following relations as guidance:
+
+"monitor interval" &lt; HADR_PEER_WINDOW - (appr 30 sec)
+
+"promote timeout" &lt; HADR_PEER_WINDOW + (appr 20 sec)
+
+For further information and examples consult http://www.linux-ha.org/wiki/db2_(resource_agent)
+</longdesc>
+<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as promotable configuration. Multiple partitions are supported.</shortdesc>
+
+<parameters>
+<parameter name="instance" unique="1" required="1">
+<longdesc lang="en">
+The instance of the database(s).
+</longdesc>
+<shortdesc lang="en">instance</shortdesc>
+<content type="string" default="${OCF_RESKEY_instance_default}" />
+</parameter>
+<parameter name="dblist" unique="0" required="0">
+<longdesc lang="en">
+List of databases to be managed, e.g "db1 db2".
+Defaults to all databases in the instance. Specify one db for HADR mode.
+</longdesc>
+<shortdesc lang="en">List of databases to be managed</shortdesc>
+<content type="string"/>
+</parameter>
+<parameter name="admin" unique="0" required="0">
+<longdesc lang="en">
+DEPRECATED: The admin user of the instance.
+</longdesc>
+<shortdesc lang="en">DEPRECATED: admin</shortdesc>
+<content type="string" default="${OCF_RESKEY_admin_default}" />
+</parameter>
+<parameter name="dbpartitionnum" unique="0" required="0">
+<longdesc lang="en">
+The number of the partition (DBPARTITIONNUM) to be managed.
+</longdesc>
+<shortdesc lang="en">database partition number (DBPARTITIONNUM)</shortdesc>
+<content type="string" default="${OCF_RESKEY_dbpartitionnum_default}" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120s"/>
+<action name="stop" timeout="120s"/>
+<action name="promote" timeout="120s"/>
+<action name="demote" timeout="120s"/>
+<action name="notify" timeout="10s"/>
+<action name="monitor" depth="0" timeout="60s" interval="20s"/>
+<action name="monitor" depth="0" timeout="60s" role="Promoted" interval="22s"/>
+<action name="validate-all" timeout="5s"/>
+<action name="meta-data" timeout="5s"/>
+</actions>
+</resource-agent>
+END
+}
+
+#
+# validate
+# .. and set global variables
+#
+# exit on error
+#
+db2_validate() {
+ local db2home db2sql db2instance
+
+ # db2 uses korn shell
+ check_binary "ksh"
+
+ # check required instance vars
+ if [ -z "$OCF_RESKEY_instance" ]
+ then
+ ocf_log err "DB2 required parameter instance is not set!"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ instance=$OCF_RESKEY_instance
+ if [ -n "$OCF_RESKEY_admin" ]
+ then
+ ocf_log warn "DB2 deprecated parameter admin is set, using $OCF_RESKEY_admin as instance."
+ instance=$OCF_RESKEY_admin
+ fi
+
+ db2node=${OCF_RESKEY_dbpartitionnum:-0}
+
+ db2home=$(sh -c "echo ~$instance")
+ db2sql=$db2home/sqllib
+ db2profile=$db2sql/db2profile
+ db2bin=$db2sql/bin
+
+ STATE_FILE=${HA_RSCTMP}/db2-${OCF_RESOURCE_INSTANCE}.state
+
+ # Let's make sure a few important things are there...
+ if ! [ -d "$db2sql" -a -d "$db2bin" -a -f "$db2profile" -a \
+ -x "$db2profile" -a -x "$db2bin/db2" ]
+ then
+ ocf_is_probe && exit $OCF_NOT_RUNNING
+ ocf_log err "DB2 required directories and/or files not found"
+ exit $OCF_ERR_INSTALLED
+ fi
+
+ db2instance=$(runasdb2 'echo $DB2INSTANCE')
+ if [ "$db2instance" != "$instance" ]
+ then
+ ocf_is_probe && exit $OCF_NOT_RUNNING
+ ocf_log err "DB2 parameter instance \"$instance\" != DB2INSTANCE \"$db2instance\""
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ # enough checking for stop to succeed
+ [ $__OCF_ACTION = stop ] && return $OCF_SUCCESS
+
+ dblist=$OCF_RESKEY_dblist
+ if [ -n "$dblist" ]
+ then
+ # support , as separator as well
+ dblist=$(echo "$dblist" | sed -e 's/[,]/ /g')
+ else
+ if ! dblist=$(db2_dblist)
+ then
+ ocf_log err "DB2 $instance($db2node): cannot retrieve db directory"
+ exit $OCF_ERR_INSTALLED
+ fi
+ fi
+
+ # check requirements for the HADR case
+ if ocf_is_ms
+ then
+ set -- $dblist
+ if [ $# != 1 ]
+ then
+ ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have exactly one name in dblist"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ $db2node != 0 ]
+ then
+ ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have dbpartitionnum=0"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+master_score()
+{
+ if ! have_binary "crm_master"; then
+ return
+ fi
+
+ crm_master $*
+}
+
+#
+# Run the given command as db2 instance user
+#
+runasdb2() {
+ su $instance -c ". $db2profile; $*"
+}
+
+#
+# Run a command as the DB2 admin, and log the output
+#
+logasdb2() {
+ local output rc
+
+ output=$(runasdb2 $*)
+ rc=$?
+ if [ $rc -eq 0 ]
+ then
+ ocf_log info "$output"
+ else
+ ocf_log err "$output"
+ fi
+ return $rc
+}
+
+
+#
+# maintain the fal (first active log) attribute
+# db2_fal_attrib DB {set val|get}
+#
+db2_fal_attrib() {
+ local db=$1
+ local attr val rc id node member me
+
+ attr=db2hadr_${instance}_${db}_fal
+
+ case "$2" in
+ set)
+ me=$(ocf_local_nodename)
+
+ # loop over all member nodes and set attribute
+ crm_node -l |
+ while read id node member
+ do
+ [ "$member" = member -a "$node" != "$me" ] || continue
+ crm_attribute -l forever --node=$node -n $attr -v "$3"
+ rc=$?
+ ocf_log info "DB2 instance $instance($db2node/$db: setting attrib for FAL to $FIRST_ACTIVE_LOG @ $node"
+ [ $rc != 0 ] && break
+ done
+ ;;
+
+ get)
+ crm_attribute -l forever -n $attr -G --quiet 2>&1
+ rc=$?
+ if ! ocf_is_true "$OCF_RESKEY_CRM_meta_notify" && [ $rc != 0 ]
+ then
+ ocf_log warn "DB2 instance $instance($db2node/$db: can't retrieve attribute $attr, are you sure notifications are enabled ?"
+ fi
+ ;;
+
+ *)
+ exit $OCF_ERR_CONFIGURED
+ esac
+
+ return $rc
+}
+
+#
+# unfortunately a first connect after a crash may need several minutes
+# for some internal cleanup stuff in DB2.
+# We run a connect in background so other connects (i.e. monitoring!) may proceed.
+#
+db2_run_connect() {
+ local db=$1
+
+ logasdb2 "db2 connect to $db; db2 terminate"
+}
+
+#
+# get some data from the database config
+# sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW
+#
+db2_get_cfg() {
+ local db=$1
+
+ local output hadr_vars
+
+ output=$(runasdb2 db2 get db cfg for $db)
+ [ $? != 0 ] && return $OCF_ERR_GENERIC
+
+ hadr_vars=$(echo "$output" |
+ awk '/HADR database role/ {printf "HADR_ROLE='%s'; ", $NF;}
+ /HADR_TIMEOUT/ {printf "HADR_TIMEOUT='%s'; ", $NF;}
+ /First active log file/ {printf "FIRST_ACTIVE_LOG='%s'\n", $NF;}
+ /HADR_PEER_WINDOW/ {printf "HADR_PEER_WINDOW='%s'\n", $NF;}')
+
+ # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW
+ HADR_ROLE=$(echo "$output" | awk '/HADR database role/ {print $NF;}')
+ HADR_TIMEOUT=$(echo "$output" | awk '/HADR_TIMEOUT/ {print $NF;}')
+ FIRST_ACTIVE_LOG=$(echo "$output" | awk '/First active log file/ {print $NF;}')
+ HADR_PEER_WINDOW=$(echo "$output" | awk '/HADR_PEER_WINDOW/ {print $NF;}')
+
+ # HADR_PEER_WINDOW comes with V9 and is checked later
+ if [ -z "$HADR_ROLE" -o -z "$HADR_TIMEOUT" ]
+ then
+ ocf_log error "DB2 cfg values invalid for $instance($db2node)/$db: $hadr_vars"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# return the list of databases in the instance
+#
+db2_dblist() {
+ local output
+
+ output=$(runasdb2 db2 list database directory) || return $OCF_ERR_GENERIC
+
+ echo "$output" | grep -i 'Database name.*=' | sed 's%.*= *%%'
+}
+
+#
+# Delayed check of the compatibility of DB2 instance and pacemaker
+# config.
+# Logically this belongs to validate but certain parameters can only
+# be retrieved once the instance is started.
+#
+db2_check_config_compatibility() {
+ local db=$1
+ local is_ms
+
+ ocf_is_ms
+ is_ms=$?
+
+ case "$HADR_ROLE/$is_ms" in
+ STANDARD/0)
+ ocf_log err "DB2 database $instance/$db is not in a HADR configuration but I am a M/S resource"
+ exit $OCF_ERR_INSTALLED
+ ;;
+
+ STANDARD/1)
+ # OK
+ ;;
+
+ */0)
+ if [ -z "$HADR_PEER_WINDOW" ]
+ then
+ ocf_log err "DB2 database $instance: release to old, need HADR_PEER_WINDOW (>=V9)"
+ exit $OCF_ERR_INSTALLED
+ fi
+ ;;
+
+ */1)
+ ocf_log err "DB2 database $instance/$db is in a HADR configuration but I must be a M/S resource"
+ esac
+
+}
+
+#
+# Start instance and DB.
+# Standard mode is through "db2 activate" in order to start in previous
+# mode (Standy/Primary).
+# If the database is a primary AND we can determine that the running master
+# has a higher "first active log" we conclude that we come up after a crash
+# an the previous Standby is now Primary.
+# The db is then started as Standby.
+#
+# Other cases: danger of split brain, log error and do nothing.
+#
+db2_start() {
+ local output start_cmd db
+ local start_opts="dbpartitionnum $db2node"
+
+ # If we detect that db partitions are not in use, and no
+ # partition is explicitly specified, activate without
+ # partition information. This allows db2 instances without
+ # partition support to be managed.
+ if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
+ start_opts=""
+ fi
+
+ if output=$(runasdb2 db2start $start_opts)
+ then
+ ocf_log info "DB2 instance $instance($db2node) started: $output"
+ else
+ case $output in
+ *SQL1026N*)
+ ocf_log info "DB2 instance $instance($db2node) already running: $output"
+ ;;
+
+ *)
+ ocf_log err "$output"
+ return $OCF_ERR_GENERIC
+ esac
+ fi
+
+ if ! db2_instance_status
+ then
+ ocf_log err "DB2 instance $instance($db2node) is not active!"
+ return $OCF_ERR_GENERIC
+ fi
+
+ [ $db2node = 0 ] || return $OCF_SUCCESS
+ # activate DB only on node 0
+
+ for db in $dblist
+ do
+ # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW FIRST_ACTIVE_LOG
+ db2_get_cfg $db || return $?
+
+ # Better late than never: can only check this when the instance is already up
+ db2_check_config_compatibility $db
+
+ start_cmd="db2 activate db $db"
+
+ if [ $HADR_ROLE = PRIMARY ]
+ then
+ local master_fal
+
+ # communicate our FAL to other nodes the might start concurrently
+ db2_fal_attrib $db set $FIRST_ACTIVE_LOG
+
+ # ignore false positive:
+ # error: Can't use > in [ ]. Escape it or use [[..]]. [SC2073]
+ # see https://github.com/koalaman/shellcheck/issues/691
+ # shellcheck disable=SC2073
+ if master_fal=$(db2_fal_attrib $db get) && [ "$master_fal" '>' $FIRST_ACTIVE_LOG ]
+ then
+ ocf_log info "DB2 database $instance($db2node)/$db is Primary and outdated, starting as secondary"
+ start_cmd="db2 start hadr on db $db as standby"
+ HADR_ROLE=STANDBY
+ fi
+ fi
+
+ if output=$(runasdb2 $start_cmd)
+ then
+ ocf_log info "DB2 database $instance($db2node)/$db started/activated"
+ [ $HADR_ROLE != STANDBY ] && db2_run_connect $db &
+ else
+ case $output in
+ SQL1490W*|SQL1494W*|SQL1497W*|SQL1777N*)
+ ocf_log info "DB2 database $instance($db2node)/$db already activated: $output"
+ ;;
+
+ SQL1768N*"Reason code = \"7\""*)
+ ocf_log err "DB2 database $instance($db2node)/$db is a Primary and the Standby is down"
+ ocf_log err "Possible split brain ! Manual intervention required."
+ ocf_log err "If this DB is outdated use \"db2 start hadr on db $db as standby\""
+ ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\""
+
+ # might be the Standby is not yet there
+ # might be a timing problem because "First active log" is delayed
+ # on the next start attempt we might succeed when FAL was advanced
+ # might be manual intervention is required
+ # ... so let pacemaker give it another try and we will succeed then
+ return $OCF_ERR_GENERIC
+ ;;
+
+ *)
+ ocf_log err "DB2 database $instance($db2node)/$db didn't start: $output"
+ return $OCF_ERR_GENERIC
+ esac
+ fi
+ done
+
+ # come here with success
+ # Even if we are a db2 Primary pacemaker requires start to end up in slave mode
+ echo SLAVE > $STATE_FILE
+ return $OCF_SUCCESS
+}
+
+#
+# helper function to be spawned
+# so we can detect a hang of the db2stop command
+#
+db2_stop_bg() {
+ local rc output
+ local stop_opts="dbpartitionnum $db2node"
+
+ rc=$OCF_SUCCESS
+
+ if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
+ stop_opts=""
+ fi
+
+ if output=$(runasdb2 db2stop force $stop_opts)
+ then
+ ocf_log info "DB2 instance $instance($db2node) stopped: $output"
+ else
+ case $output in
+ *SQL1032N*)
+ #SQL1032N No start database manager command was issued
+ ocf_log info "$output"
+ ;;
+
+ *)
+ ocf_log err "DB2 instance $instance($db2node) stop failed: $output"
+ rc=$OCF_ERR_GENERIC
+ esac
+ fi
+
+ return $rc
+}
+
+#
+# Stop the given db2 database instance
+#
+db2_stop() {
+ local stop_timeout grace_timeout stop_bg_pid i must_kill
+
+ # remove master score
+ master_score -D -l reboot
+
+ # be very early here in order to avoid stale data
+ rm -f $STATE_FILE
+
+ db2_instance_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ ocf_log info "DB2 instance $instance already stopped"
+ return $OCF_SUCCESS
+ fi
+
+ stop_timeout=${OCF_RESKEY_CRM_meta_timeout:-20000}
+
+ # grace_time is 4/5 (unit is ms)
+ grace_timeout=$((stop_timeout/1250))
+
+ # start db2stop in background as this may hang
+ db2_stop_bg &
+ stop_bg_pid=$!
+
+ # wait for grace_timeout
+ i=0
+ while [ $i -lt $grace_timeout ]
+ do
+ kill -0 $stop_bg_pid 2>/dev/null || break;
+ sleep 1
+ i=$((i+1))
+ done
+
+ # collect exit status but don't hang
+ if kill -0 $stop_bg_pid 2>/dev/null
+ then
+ stoprc=1
+ kill -9 $stop_bg_pid 2>/dev/null
+ else
+ wait $stop_bg_pid
+ stoprc=$?
+ fi
+
+ must_kill=0
+
+ if [ $stoprc -ne 0 ]
+ then
+ ocf_log warn "DB2 instance $instance($db2node): db2stop failed, using db2nkill"
+ must_kill=1
+ elif ! db2_instance_dead
+ then
+ ocf_log warn "DB2 instance $instance($db2node): db2stop indicated success but there a still processes, using db2nkill"
+ must_kill=1
+ fi
+
+ if [ $must_kill -eq 1 ]
+ then
+ # db2nkill kills *all* partitions on the node
+ if [ -x $db2bin/db2nkill ]
+ then
+ logasdb2 $db2bin/db2nkill $db2node
+ elif [ -x $db2bin/db2_kill ]
+ then
+ logasdb2 $db2bin/db2_kill
+ fi
+
+ # loop forever (or lrmd kills us due to timeout) until the
+ # instance is dead
+ while ! db2_instance_dead
+ do
+ ocf_log info "DB2 instance $instance($db2node): waiting for processes to exit"
+ sleep 1
+ done
+
+ ocf_log info "DB2 instance $instance($db2node) is now dead"
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# check whether `enough´ processes for a healthy instance are up
+#
+db2_instance_status() {
+ local pscount
+
+ pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- | grep ' db2[^ ]' | wc -l)
+ if [ $pscount -ge 4 ]; then
+ return $OCF_SUCCESS;
+ elif [ $pscount -ge 1 ]; then
+ return $OCF_ERR_GENERIC
+ fi
+ return $OCF_NOT_RUNNING
+}
+
+#
+# is the given db2 instance dead?
+#
+db2_instance_dead() {
+ local pscount
+
+ pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- | grep ' db2[^ ]' | wc -l)
+ test $pscount -eq 0
+}
+
+#
+# return the status of the db as "Role/Status"
+# e.g. Primary/Peer, Standby/RemoteCatchupPending
+#
+# If not in HADR configuration return "Standard/Standalone"
+#
+db2_hadr_status() {
+ local db=$1
+ local output
+
+ output=$(runasdb2 db2pd -hadr -db $db)
+ if [ $? != 0 ]
+ then
+ echo "Down/Off"
+ return 1
+ fi
+
+ echo "$output" |
+ awk '/^\s+HADR_(ROLE|STATE) =/ {printf $3"/"}
+ /^\s+HADR_CONNECT_STATUS =/ {print $3; exit; }
+ /^HADR is not active/ {print "Standard/Standalone"; exit; }
+ /^Role *State */ {getline; printf "%s/%s\n", $1, $2; exit; }'
+}
+
+#
+# Monitor the db
+# And as side effect set crm_master / FAL attribute
+#
+db2_monitor() {
+ local CMD output hadr db
+ local rc
+
+ db2_instance_status
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ # instance is dead remove master score
+ master_score -D -l reboot
+ exit $rc
+ fi
+
+ [ $db2node = 0 ] || return 0
+ # monitoring only for partition 0
+
+ for db in $dblist
+ do
+ hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC
+ ocf_log debug "Monitor: DB2 database $instance($db2node)/$db has HADR status $hadr"
+
+ # set master preference accordingly
+ case "$hadr" in
+ PRIMARY/*|Primary/*|Standard/*)
+ # perform a basic health check
+ CMD="if db2 connect to $db;
+ then
+ db2 select \* from sysibm.sysversions ; rc=\$?;
+ db2 terminate;
+ else
+ rc=\$?;
+ fi;
+ exit \$rc"
+
+ if ! output=$(runasdb2 $CMD)
+ then
+ case "$output" in
+ SQL1776N*)
+ # can't connect/select on standby, may be spurious turing takeover
+ ;;
+
+ *)
+ ocf_log err "DB2 database $instance($db2node)/$db is not working"
+ ocf_log err "DB2 message: $output"
+
+ # dead primary, remove master score
+ master_score -D -l reboot
+ return $OCF_ERR_GENERIC
+ esac
+ fi
+
+ ocf_log debug "DB2 database $instance($db2node)/$db appears to be working"
+ ocf_is_ms && master_score -v 10000 -l reboot
+ ;;
+
+ STANDBY/*PEER/*|Standby/*Peer)
+ master_score -v 8000 -l reboot
+ ;;
+
+ STANDBY/*|Standby/*)
+ ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted"
+ master_score -D -l reboot
+ ;;
+
+ *)
+ return $OCF_ERR_GENERIC
+ esac
+ done
+
+ # everything OK, return if running as slave
+ grep MASTER $STATE_FILE >/dev/null 2>&1 || return $OCF_SUCCESS
+
+ return $OCF_RUNNING_MASTER
+}
+
+#
+# Promote db to Primary
+#
+db2_promote() {
+ # validate ensured that dblist contains only one entry
+ local db=$dblist
+ local i hadr output force
+
+ # we run this twice as after a crash of the other node
+ # within HADR_TIMEOUT the status may be still reported as Peer
+ # although a connection no longer exists
+
+ for i in 1 2
+ do
+ hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC
+ ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be promoted"
+
+ case "$hadr" in
+ Standard/Standalone)
+ # this case only to keep ocf-tester happy
+ return $OCF_SUCCESS
+ ;;
+
+ PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|PRIMARY/REMOTE_CATCHUP_PENDING/CONNECTED|Primary/Peer)
+ # nothing to do, only update pacemaker's view
+ echo MASTER > $STATE_FILE
+ return $OCF_SUCCESS
+ ;;
+
+ STANDBY/PEER/CONNECTED|Standby/Peer)
+ # must take over
+ ;;
+
+ STANDBY/*PEER/DISCONNECTED|Standby/DisconnectedPeer)
+ # must take over by force peer window only
+ force="by force peer window only"
+ ;;
+
+ # must take over by force
+ STANDBY/REMOTE_CATCHUP_PENDING/DISCONNECTED)
+ force="by force"
+ ;;
+
+ *)
+ return $OCF_ERR_GENERIC
+ esac
+
+ if output=$(runasdb2 db2 takeover hadr on db $db $force)
+ then
+ # update pacemaker's view
+ echo MASTER > $STATE_FILE
+
+ # turn the log so we rapidly get a new FAL
+ logasdb2 "db2 archive log for db $db"
+ return $OCF_SUCCESS
+ fi
+
+ case "$output" in
+ SQL1770N*"Reason code = \"7\""*)
+ # expected, HADR_TIMEOUT is now expired
+ # go for the second try
+ continue
+ ;;
+
+ *)
+ ocf_log err "DB2 database $instance($db2node)/$db promote failed: $output"
+ return $OCF_ERR_GENERIC
+ esac
+ done
+
+ return $OCF_ERR_GENERIC
+}
+
+#
+# Demote db to standby
+#
+db2_demote() {
+ # validate ensured that dblist contains only one entry
+ local db=$dblist
+ local hadr
+
+ # house keeping, set pacemaker's view to slave
+ echo SLAVE > $STATE_FILE
+
+ hadr=$(db2_hadr_status $dblist) || return $OCF_ERR_GENERIC
+ ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be demoted"
+
+ db2_monitor
+ return $?
+}
+
+#
+# handle pre start notification
+# We record our first active log on the other nodes.
+# If two primaries come up after a crash they can safely determine who is
+# the outdated one.
+#
+db2_notify() {
+ local node
+
+ # only interested in pre-start
+ [ $OCF_RESKEY_CRM_meta_notify_type = pre \
+ -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCESS
+
+ # gets FIRST_ACTIVE_LOG
+ db2_get_cfg $dblist || return $?
+
+ db2_fal_attrib $dblist set $FIRST_ACTIVE_LOG || return $OCF_ERR_GENERIC
+ exit $OCF_SUCCESS
+}
+
+########
+# Main #
+########
+case "$__OCF_ACTION" in
+ meta-data)
+ db2_meta_data
+ exit $OCF_SUCCESS
+ ;;
+
+ usage)
+ db2_usage
+ exit $OCF_SUCCESS
+ ;;
+
+ start)
+ db2_validate
+ db2_start || exit $?
+ db2_monitor
+ exit $?
+ ;;
+
+ stop)
+ db2_validate
+ db2_stop
+ exit $?
+ ;;
+
+ promote)
+ db2_validate
+ db2_promote
+ exit $?
+ ;;
+
+ demote)
+ db2_validate
+ db2_demote
+ exit $?
+ ;;
+
+ notify)
+ db2_validate
+ db2_notify
+ exit $?
+ ;;
+
+ monitor)
+ db2_validate
+ db2_monitor
+ exit $?
+ ;;
+
+ validate-all)
+ db2_validate
+ exit $?
+ ;;
+
+ *)
+ db2_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+esac