#!/bin/sh
#
# db2
#
# Resource agent that manages a DB2 LUW database in Standard role 
# or HADR configuration in promotable configuration.
# Multi partition is supported as well.
#
# Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de>
#
# This agent incoporates code of a previous release created by
# Alan Robertson and the community.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Parameter defaults

OCF_RESKEY_instance_default=""
OCF_RESKEY_admin_default=""
OCF_RESKEY_dbpartitionnum_default="0"

: ${OCF_RESKEY_instance=${OCF_RESKEY_instance_default}}
: ${OCF_RESKEY_admin=${OCF_RESKEY_admin_default}}
: ${OCF_RESKEY_dbpartitionnum=${OCF_RESKEY_dbpartitionnum_default}}

#######################################################################


db2_usage() {
    echo "db2 start|stop|monitor|promote|demote|notify|validate-all|meta-data"
}

db2_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="db2" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in promotable configuration. Multiple partitions are supported.

Standard mode:

An instance including all or selected databases is made highly available.
Configure each partition as a separate primitive resource.

HADR mode:

A single database in HADR configuration is made highly available by automating takeover operations.
Configure a promotable resource with notifications enabled and an
additional monitoring operation with role "Promoted".

In case of HADR be very deliberate in specifying intervals/timeouts. The detection of a failure including promote must complete within HADR_PEER_WINDOW.

In addition to honoring requirements for crash recovery etc. for your specific database use the following relations as guidance:

"monitor interval" &lt; HADR_PEER_WINDOW - (appr 30 sec)

"promote timeout" &lt; HADR_PEER_WINDOW + (appr 20 sec)

For further information and examples consult http://www.linux-ha.org/wiki/db2_(resource_agent)
</longdesc>
<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as promotable configuration. Multiple partitions are supported.</shortdesc>

<parameters>
<parameter name="instance" unique="1" required="1">
<longdesc lang="en">
The instance of the database(s).
</longdesc>
<shortdesc lang="en">instance</shortdesc>
<content type="string" default="${OCF_RESKEY_instance_default}" />
</parameter>
<parameter name="dblist" unique="0" required="0">
<longdesc lang="en">
List of databases to be managed, e.g "db1 db2".
Defaults to all databases in the instance. Specify one db for HADR mode.
</longdesc>
<shortdesc lang="en">List of databases to be managed</shortdesc>
<content type="string"/>
</parameter>
<parameter name="admin" unique="0" required="0">
<longdesc lang="en">
DEPRECATED: The admin user of the instance.
</longdesc>
<shortdesc lang="en">DEPRECATED: admin</shortdesc>
<content type="string" default="${OCF_RESKEY_admin_default}" />
</parameter>
<parameter name="dbpartitionnum" unique="0" required="0">
<longdesc lang="en">
The number of the partition (DBPARTITIONNUM) to be managed.
</longdesc>
<shortdesc lang="en">database partition number (DBPARTITIONNUM)</shortdesc>
<content type="string" default="${OCF_RESKEY_dbpartitionnum_default}" />
</parameter>
</parameters>

<actions>
<action name="start" timeout="120s"/>
<action name="stop" timeout="120s"/>
<action name="promote" timeout="120s"/>
<action name="demote" timeout="120s"/>
<action name="notify" timeout="10s"/>
<action name="monitor" depth="0" timeout="60s" interval="20s"/>
<action name="monitor" depth="0" timeout="60s" role="Promoted" interval="22s"/>
<action name="validate-all" timeout="5s"/>
<action name="meta-data" timeout="5s"/>
</actions>
</resource-agent>
END
}

#
# validate
# .. and set global variables
#
# exit on error
#
db2_validate() {
    local db2home db2sql db2instance

    # db2 uses korn shell
    check_binary "ksh"

    # check required instance vars
    if [ -z "$OCF_RESKEY_instance" ]
    then
        ocf_log err "DB2 required parameter instance is not set!"
        return $OCF_ERR_CONFIGURED
    fi

    instance=$OCF_RESKEY_instance
    if [ -n "$OCF_RESKEY_admin" ]
    then
        ocf_log warn "DB2 deprecated parameter admin is set, using $OCF_RESKEY_admin as instance."
        instance=$OCF_RESKEY_admin
    fi

    db2node=${OCF_RESKEY_dbpartitionnum:-0}

    db2home=$(sh -c "echo ~$instance")
    db2sql=$db2home/sqllib
    db2profile=$db2sql/db2profile
    db2bin=$db2sql/bin

    STATE_FILE=${HA_RSCTMP}/db2-${OCF_RESOURCE_INSTANCE}.state

    #	Let's make sure a few important things are there...
    if ! [ -d "$db2sql" -a  -d "$db2bin" -a -f "$db2profile" -a \
           -x "$db2profile" -a -x "$db2bin/db2" ]
    then
        ocf_is_probe && exit $OCF_NOT_RUNNING
        ocf_log err "DB2 required directories and/or files not found"
        exit $OCF_ERR_INSTALLED
    fi

    db2instance=$(runasdb2 'echo $DB2INSTANCE')
    if [ "$db2instance" != "$instance" ]
    then
        ocf_is_probe && exit $OCF_NOT_RUNNING
        ocf_log err "DB2 parameter instance \"$instance\" != DB2INSTANCE \"$db2instance\""
        exit $OCF_ERR_CONFIGURED
    fi

    # enough checking for stop to succeed
    [ $__OCF_ACTION = stop ] && return $OCF_SUCCESS

    dblist=$OCF_RESKEY_dblist
    if [ -n "$dblist" ]
    then
        # support , as separator as well
        dblist=$(echo "$dblist" | sed -e 's/[,]/ /g')
    else
        if ! dblist=$(db2_dblist)
        then
            ocf_log err "DB2 $instance($db2node): cannot retrieve db directory"
            exit $OCF_ERR_INSTALLED
        fi
    fi

    # check requirements for the HADR case
    if ocf_is_ms
    then
        set -- $dblist
        if [ $# != 1 ]
        then
            ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have exactly one name in dblist"
            exit $OCF_ERR_CONFIGURED
        fi

        if [ $db2node != 0 ]
        then
            ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have dbpartitionnum=0"
            exit $OCF_ERR_CONFIGURED
        fi
    fi

    return $OCF_SUCCESS
}

master_score()
{
    if ! have_binary "crm_master"; then
        return
    fi

    crm_master $*
}

#
# Run the given command as db2 instance user
#
runasdb2() {
    su $instance -c ". $db2profile; $*"
}

#
# Run a command as the DB2 admin, and log the output
#
logasdb2() {
    local output rc

    output=$(runasdb2 $*)
    rc=$?
    if [ $rc -eq 0 ]
    then
        ocf_log info "$output"
    else
        ocf_log err "$output"
    fi
    return $rc
}


#
# maintain the fal (first active log) attribute
# db2_fal_attrib DB {set val|get}
#
db2_fal_attrib() {
    local db=$1
    local attr val rc id node member me

    attr=db2hadr_${instance}_${db}_fal

    case "$2" in
        set)
        me=$(ocf_local_nodename)

        # loop over all member nodes and set attribute
        crm_node -l |
        while read id node member
        do
            [ "$member" = member -a "$node" != "$me" ] || continue
            crm_attribute -l forever --node=$node -n $attr -v "$3"
            rc=$?
            ocf_log info "DB2 instance $instance($db2node/$db: setting attrib for FAL to $FIRST_ACTIVE_LOG @ $node"
            [ $rc != 0 ] && break
        done
        ;;

        get)
        crm_attribute -l forever -n $attr -G --quiet 2>&1
        rc=$?
        if ! ocf_is_true "$OCF_RESKEY_CRM_meta_notify" && [ $rc != 0 ]
        then
            ocf_log warn "DB2 instance $instance($db2node/$db: can't retrieve attribute $attr, are you sure notifications are enabled ?"
        fi
        ;;

        *)
        exit $OCF_ERR_CONFIGURED
    esac

    return $rc
}

#
# unfortunately a first connect after a crash may need several minutes
# for some internal cleanup stuff in DB2.
# We run a connect in background so other connects (i.e. monitoring!) may proceed.
#
db2_run_connect() {
    local db=$1

    logasdb2 "db2 connect to $db; db2 terminate"
}

#
# get some data from the database config
# sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW
#
db2_get_cfg() {
    local db=$1

    local output hadr_vars

    output=$(runasdb2 db2 get db cfg for $db)
    [ $? != 0 ] && return $OCF_ERR_GENERIC

    hadr_vars=$(echo "$output" |
        awk '/HADR database role/ {printf "HADR_ROLE='%s'; ", $NF;}
            /HADR_TIMEOUT/ {printf "HADR_TIMEOUT='%s'; ", $NF;}
            /First active log file/ {printf "FIRST_ACTIVE_LOG='%s'\n", $NF;}
            /HADR_PEER_WINDOW/ {printf "HADR_PEER_WINDOW='%s'\n", $NF;}')

    # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW 
    HADR_ROLE=$(echo "$output" | awk '/HADR database role/ {print $NF;}')
    HADR_TIMEOUT=$(echo "$output" | awk '/HADR_TIMEOUT/ {print $NF;}')
    FIRST_ACTIVE_LOG=$(echo "$output" | awk '/First active log file/ {print $NF;}')
    HADR_PEER_WINDOW=$(echo "$output" | awk '/HADR_PEER_WINDOW/ {print $NF;}')

    # HADR_PEER_WINDOW comes with V9 and is checked later
    if [ -z "$HADR_ROLE" -o -z "$HADR_TIMEOUT" ]
    then
        ocf_log error "DB2 cfg values invalid for $instance($db2node)/$db: $hadr_vars"
        return $OCF_ERR_GENERIC
    fi

    return $OCF_SUCCESS
}

#
# return the list of databases in the instance
#
db2_dblist() {
    local output

    output=$(runasdb2 db2 list database directory) || return $OCF_ERR_GENERIC
    
    echo "$output" | grep -i 'Database name.*=' | sed 's%.*= *%%'
}

#
# Delayed check of the compatibility of DB2 instance and pacemaker
# config.
# Logically this belongs to validate but certain parameters can only
# be retrieved once the instance is started.
#
db2_check_config_compatibility() {
    local db=$1
    local is_ms

    ocf_is_ms
    is_ms=$?

    case "$HADR_ROLE/$is_ms" in
        STANDARD/0)
        ocf_log err "DB2 database $instance/$db is not in a HADR configuration but I am a M/S resource"
        exit $OCF_ERR_INSTALLED
        ;;

        STANDARD/1)
        # OK
        ;;

        */0)
        if [ -z "$HADR_PEER_WINDOW" ]
        then
            ocf_log err "DB2 database $instance: release to old, need HADR_PEER_WINDOW (>=V9)"
            exit $OCF_ERR_INSTALLED
        fi
        ;;

        */1)
        ocf_log err "DB2 database $instance/$db is in a HADR configuration but I must be a M/S resource"
    esac

}

#
# Start instance and DB.
# Standard mode is through "db2 activate" in order to start in previous
# mode (Standy/Primary).
# If the database is a primary AND we can determine that the running master
# has a higher "first active log" we conclude that we come up after a crash
# an the previous Standby is now Primary.
# The db is then started as Standby.
#
# Other cases: danger of split brain, log error and do nothing.
#
db2_start() {
    local output start_cmd db
    local start_opts="dbpartitionnum $db2node"

    # If we detect that db partitions are not in use, and no
    # partition is explicitly specified, activate without
    # partition information. This allows db2 instances without
    # partition support to be managed. 
    if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
        start_opts=""
    fi

    if output=$(runasdb2 db2start $start_opts)
    then
        ocf_log info "DB2 instance $instance($db2node) started: $output"
    else
        case $output in
            *SQL1026N*)
            ocf_log info "DB2 instance $instance($db2node) already running: $output"
            ;;

            *)
            ocf_log err "$output"
            return $OCF_ERR_GENERIC
        esac
    fi

    if ! db2_instance_status
    then
        ocf_log err "DB2 instance $instance($db2node) is not active!"
        return $OCF_ERR_GENERIC
    fi

    [ $db2node = 0 ] || return $OCF_SUCCESS
    # activate DB only on node 0

    for db in $dblist
    do
        # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW FIRST_ACTIVE_LOG
        db2_get_cfg $db || return $?

        # Better late than never: can only check this when the instance is already up
        db2_check_config_compatibility $db

        start_cmd="db2 activate db $db"

        if [ $HADR_ROLE = PRIMARY ]
        then
            local master_fal

            # communicate our FAL to other nodes the might start concurrently
            db2_fal_attrib $db set $FIRST_ACTIVE_LOG

            # ignore false positive:
            # error: Can't use > in [ ]. Escape it or use [[..]]. [SC2073]
            # see https://github.com/koalaman/shellcheck/issues/691
            # shellcheck disable=SC2073
            if master_fal=$(db2_fal_attrib $db get) && [ "$master_fal" '>' $FIRST_ACTIVE_LOG ]
            then
                ocf_log info "DB2 database $instance($db2node)/$db is Primary and outdated, starting as secondary"
                start_cmd="db2 start hadr on db $db as standby"
                HADR_ROLE=STANDBY
            fi
        fi

        if output=$(runasdb2 $start_cmd)
        then
            ocf_log info "DB2 database $instance($db2node)/$db started/activated"
            [ $HADR_ROLE != STANDBY ] && db2_run_connect $db &
        else
            case $output in
                SQL1490W*|SQL1494W*|SQL1497W*|SQL1777N*)
                ocf_log info "DB2 database $instance($db2node)/$db already activated: $output"
                ;;

                SQL1768N*"Reason code = \"7\""*)
                ocf_log err "DB2 database $instance($db2node)/$db is a Primary and the Standby is down"
                ocf_log err "Possible split brain ! Manual intervention required."
                ocf_log err "If this DB is outdated use \"db2 start hadr on db $db as standby\""
                ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\""

                # might be the Standby is not yet there
                # might be a timing problem because "First active log" is delayed
                #    on the next start attempt we might succeed when FAL was advanced
                # might be manual intervention is required
                # ... so let pacemaker give it another try and we will succeed then
                return $OCF_ERR_GENERIC
                ;;

                *)
                ocf_log err "DB2 database $instance($db2node)/$db didn't start: $output"
                return $OCF_ERR_GENERIC
            esac
        fi
    done

    # come here with success
    # Even if we are a db2 Primary pacemaker requires start to end up in slave mode
    echo SLAVE > $STATE_FILE
    return $OCF_SUCCESS
}

#
# helper function to be spawned
# so we can detect a hang of the db2stop command
#
db2_stop_bg() {
    local rc output
    local stop_opts="dbpartitionnum $db2node"

    rc=$OCF_SUCCESS

    if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
        stop_opts=""
    fi

    if output=$(runasdb2 db2stop force $stop_opts)
    then
        ocf_log info "DB2 instance $instance($db2node) stopped: $output"
    else
        case $output in
            *SQL1032N*)
            #SQL1032N  No start database manager command was issued
            ocf_log info "$output"
            ;;

            *)
            ocf_log err "DB2 instance $instance($db2node) stop failed: $output"
            rc=$OCF_ERR_GENERIC
        esac
    fi

    return $rc
}

#
# Stop the given db2 database instance
#
db2_stop() {
    local stop_timeout grace_timeout stop_bg_pid i must_kill

    # remove master score
    master_score -D -l reboot

    # be very early here in order to avoid stale data
    rm -f $STATE_FILE

    db2_instance_status
    if [ $? -eq $OCF_NOT_RUNNING ]; then
        ocf_log info "DB2 instance $instance already stopped"
        return $OCF_SUCCESS
    fi

    stop_timeout=${OCF_RESKEY_CRM_meta_timeout:-20000}

    # grace_time is 4/5 (unit is ms)
    grace_timeout=$((stop_timeout/1250))

    # start db2stop in background as this may hang
    db2_stop_bg &
    stop_bg_pid=$!

    # wait for grace_timeout
    i=0
    while [ $i -lt $grace_timeout ]
    do
        kill -0 $stop_bg_pid 2>/dev/null || break;
        sleep 1
        i=$((i+1))
    done

    # collect exit status but don't hang
    if kill -0 $stop_bg_pid 2>/dev/null
    then
        stoprc=1
        kill -9 $stop_bg_pid 2>/dev/null
    else
        wait $stop_bg_pid
        stoprc=$?
    fi

    must_kill=0

    if [ $stoprc -ne 0 ]
    then
        ocf_log warn "DB2 instance $instance($db2node): db2stop failed, using db2nkill"
        must_kill=1
    elif ! db2_instance_dead
    then
        ocf_log warn "DB2 instance $instance($db2node): db2stop indicated success but there a still processes, using db2nkill"
        must_kill=1
    fi

    if [ $must_kill -eq 1 ]
    then
        # db2nkill kills *all* partitions on the node
        if [ -x $db2bin/db2nkill ]
        then
            logasdb2 $db2bin/db2nkill $db2node
        elif [ -x $db2bin/db2_kill ]
        then
            logasdb2 $db2bin/db2_kill
        fi

        # loop forever (or lrmd kills us due to timeout) until the
        # instance is dead
        while ! db2_instance_dead
        do
            ocf_log info "DB2 instance $instance($db2node): waiting for processes to exit"
            sleep 1
        done

        ocf_log info "DB2 instance $instance($db2node) is now dead"
    fi

    return $OCF_SUCCESS
}

#
# check whether `enough´ processes for a healthy instance are up
# 
db2_instance_status() {
    local pscount

    pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- |  grep ' db2[^ ]' | wc -l)
    if [ $pscount -ge 4 ]; then
        return $OCF_SUCCESS;
    elif [ $pscount -ge 1 ]; then
        return $OCF_ERR_GENERIC
    fi
    return $OCF_NOT_RUNNING
}

#
# is the given db2 instance dead?
# 
db2_instance_dead() {
    local pscount

    pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- |  grep ' db2[^ ]' | wc -l)
    test $pscount -eq 0
}

#
# return the status of the db as "Role/Status"
# e.g. Primary/Peer, Standby/RemoteCatchupPending
#
# If not in HADR configuration return "Standard/Standalone"
#
db2_hadr_status() {
    local db=$1
    local output

    output=$(runasdb2 db2pd -hadr -db $db)
    if [ $? != 0 ]
    then
        echo "Down/Off"
        return 1 
    fi

    echo "$output" |
    awk '/^\s+HADR_(ROLE|STATE) =/ {printf $3"/"}
         /^\s+HADR_CONNECT_STATUS =/ {print $3; exit; }
         /^HADR is not active/ {print "Standard/Standalone"; exit; }
         /^Role *State */ {getline; printf "%s/%s\n", $1, $2; exit; }'
}

#
# Monitor the db
# And as side effect set crm_master / FAL attribute
#
db2_monitor() {
    local CMD output hadr db
    local rc

    db2_instance_status
    rc=$?
    if [ $rc -ne $OCF_SUCCESS ]; then
        # instance is dead remove master score
        master_score -D -l reboot
        exit $rc
    fi

    [ $db2node = 0 ] || return 0
    # monitoring only for partition 0

    for db in $dblist
    do
        hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC
        ocf_log debug "Monitor: DB2 database $instance($db2node)/$db has HADR status $hadr"

        # set master preference accordingly
        case "$hadr" in
            PRIMARY/*|Primary/*|Standard/*)
            # perform  a basic health check
            CMD="if db2 connect to $db;
            then 
                db2 select \* from sysibm.sysversions ; rc=\$?;
                db2 terminate;
            else
                rc=\$?;
            fi;
            exit \$rc"

            if ! output=$(runasdb2 $CMD)
            then
                case "$output" in
                    SQL1776N*)
                    # can't connect/select on standby, may be spurious turing takeover
                    ;;

                    *)
                    ocf_log err "DB2 database $instance($db2node)/$db is not working"
                    ocf_log err "DB2 message: $output"

                    # dead primary, remove master score
                    master_score -D -l reboot
                    return $OCF_ERR_GENERIC
                esac
            fi

            ocf_log debug "DB2 database $instance($db2node)/$db appears to be working"
            ocf_is_ms && master_score -v 10000 -l reboot
            ;;

            STANDBY/*PEER/*|Standby/*Peer)
            master_score -v 8000 -l reboot
            ;;

            STANDBY/*|Standby/*)
            ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted"
            master_score -D -l reboot
            ;;

            *)
            return $OCF_ERR_GENERIC
        esac
    done

    # everything OK, return if running as slave
    grep MASTER $STATE_FILE >/dev/null 2>&1 || return $OCF_SUCCESS

    return $OCF_RUNNING_MASTER
}

#
# Promote db to Primary
#
db2_promote() {
    # validate ensured that dblist contains only one entry
    local db=$dblist
    local i hadr output force

    # we run this twice as after a crash of the other node
    # within HADR_TIMEOUT the status may be still reported as Peer
    # although a connection no longer exists

    for i in 1 2
    do
        hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC
        ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be promoted"

        case "$hadr" in
            Standard/Standalone)
            # this case only to keep ocf-tester happy
            return $OCF_SUCCESS
            ;;

            PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|PRIMARY/REMOTE_CATCHUP_PENDING/CONNECTED|Primary/Peer)
            # nothing to do, only update pacemaker's view
            echo MASTER > $STATE_FILE
            return $OCF_SUCCESS
            ;;

            STANDBY/PEER/CONNECTED|Standby/Peer)
            # must take over
            ;;

            STANDBY/*PEER/DISCONNECTED|Standby/DisconnectedPeer)
            # must take over by force peer window only
            force="by force peer window only"
            ;;

            # must take over by force
            STANDBY/REMOTE_CATCHUP_PENDING/DISCONNECTED)
            force="by force"
            ;;

            *)
            return $OCF_ERR_GENERIC
        esac

        if output=$(runasdb2 db2 takeover hadr on db $db $force)
        then
            # update pacemaker's view
            echo MASTER > $STATE_FILE

            # turn the log so we rapidly get a new FAL
            logasdb2 "db2 archive log for db $db"
            return $OCF_SUCCESS
        fi

        case "$output" in
            SQL1770N*"Reason code = \"7\""*)
            # expected, HADR_TIMEOUT is now expired
            # go for the second try
            continue
            ;;

            *)
            ocf_log err "DB2 database $instance($db2node)/$db promote failed: $output"
            return $OCF_ERR_GENERIC
        esac
    done

    return $OCF_ERR_GENERIC
}

#
# Demote db to standby
#
db2_demote() {
    # validate ensured that dblist contains only one entry
    local db=$dblist
    local hadr
    
    # house keeping, set pacemaker's view to slave
    echo SLAVE > $STATE_FILE

    hadr=$(db2_hadr_status $dblist) || return $OCF_ERR_GENERIC
    ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be demoted"

    db2_monitor
    return $?
}

#
# handle pre start notification
# We record our first active log on the other nodes.
# If two primaries come up after a crash they can safely determine who is
# the outdated one.
#
db2_notify() {
    local node

    # only interested in pre-start
    [  $OCF_RESKEY_CRM_meta_notify_type = pre \
    -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCCESS

    # gets FIRST_ACTIVE_LOG
    db2_get_cfg $dblist || return $?

    db2_fal_attrib $dblist set $FIRST_ACTIVE_LOG || return $OCF_ERR_GENERIC
    exit $OCF_SUCCESS
}

########
# Main #
########
case "$__OCF_ACTION" in
    meta-data)
    db2_meta_data
    exit $OCF_SUCCESS
    ;;

    usage)
    db2_usage
    exit $OCF_SUCCESS
    ;;

    start)
    db2_validate
    db2_start || exit $?
    db2_monitor
    exit $?
    ;;

    stop)
    db2_validate
    db2_stop
    exit $?
    ;;

    promote)
    db2_validate
    db2_promote
    exit $?
    ;;

    demote)
    db2_validate
    db2_demote
    exit $?
    ;;

    notify)
    db2_validate
    db2_notify
    exit $?
    ;;

    monitor)	
    db2_validate
    db2_monitor
    exit $?
    ;;

    validate-all)
    db2_validate
    exit $?
    ;;

    *)
    db2_usage
    exit $OCF_ERR_UNIMPLEMENTED
esac