From 7de03e4e519705301265c0415b3c0af85263a7ac Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 09:52:36 +0200 Subject: Adding upstream version 1:4.13.0. Signed-off-by: Daniel Baumann --- heartbeat/AoEtarget.in | 245 ++++ heartbeat/AudibleAlarm | 188 +++ heartbeat/CTDB.in | 996 +++++++++++++++ heartbeat/ClusterMon | 271 ++++ heartbeat/Delay | 227 ++++ heartbeat/Dummy | 186 +++ heartbeat/EvmsSCC | 222 ++++ heartbeat/Evmsd | 161 +++ heartbeat/Filesystem | 1128 +++++++++++++++++ heartbeat/ICP | 304 +++++ heartbeat/IPaddr | 912 ++++++++++++++ heartbeat/IPaddr2 | 1357 ++++++++++++++++++++ heartbeat/IPsrcaddr | 631 ++++++++++ heartbeat/IPv6addr.c | 899 ++++++++++++++ heartbeat/IPv6addr_utils.c | 147 +++ heartbeat/LVM | 470 +++++++ heartbeat/LVM-activate | 997 +++++++++++++++ heartbeat/LinuxSCSI | 322 +++++ heartbeat/MailTo | 199 +++ heartbeat/Makefile.am | 249 ++++ heartbeat/ManageRAID.in | 391 ++++++ heartbeat/ManageVE.in | 320 +++++ heartbeat/NodeUtilization | 237 ++++ heartbeat/Pure-FTPd | 260 ++++ heartbeat/README | 46 + heartbeat/README.galera | 148 +++ heartbeat/README.mariadb.md | 156 +++ heartbeat/Raid1 | 586 +++++++++ heartbeat/Route | 348 ++++++ heartbeat/SAPDatabase | 401 ++++++ heartbeat/SAPInstance | 1076 ++++++++++++++++ heartbeat/SendArp | 277 +++++ heartbeat/ServeRAID | 427 +++++++ heartbeat/SphinxSearchDaemon | 230 ++++ heartbeat/Squid.in | 472 +++++++ heartbeat/Stateful | 192 +++ heartbeat/SysInfo.in | 372 ++++++ heartbeat/VIPArip | 314 +++++ heartbeat/VirtualDomain | 1158 ++++++++++++++++++ heartbeat/WAS | 572 +++++++++ heartbeat/WAS6 | 546 +++++++++ heartbeat/WinPopup | 237 ++++ heartbeat/Xen | 653 ++++++++++ heartbeat/Xinetd | 256 ++++ heartbeat/ZFS | 212 ++++ heartbeat/aliyun-vpc-move-ip | 378 ++++++ heartbeat/anything | 344 ++++++ heartbeat/apache | 744 +++++++++++ heartbeat/apache-conf.sh | 196 +++ heartbeat/asterisk | 497 ++++++++ heartbeat/aws-vpc-move-ip | 495 ++++++++ heartbeat/aws-vpc-route53.in | 449 +++++++ heartbeat/awseip | 287 +++++ heartbeat/awsvip | 251 ++++ heartbeat/azure-events-az.in | 772 ++++++++++++ heartbeat/azure-events.in | 847 +++++++++++++ heartbeat/azure-lb | 229 ++++ heartbeat/clvm.in | 457 +++++++ heartbeat/conntrackd.in | 335 +++++ heartbeat/corosync-qnetd | 353 ++++++ heartbeat/crypt | 342 ++++++ heartbeat/db2 | 919 ++++++++++++++ heartbeat/dhcpd | 558 +++++++++ heartbeat/dnsupdate.in | 381 ++++++ heartbeat/docker | 605 +++++++++ heartbeat/docker-compose | 290 +++++ heartbeat/dovecot | 338 +++++ heartbeat/dummypy.in | 164 +++ heartbeat/eDir88.in | 476 ++++++++ heartbeat/ethmonitor | 580 +++++++++ heartbeat/exportfs | 492 ++++++++ heartbeat/findif.sh | 260 ++++ heartbeat/fio.in | 178 +++ heartbeat/galera.in | 1097 +++++++++++++++++ heartbeat/garbd | 436 +++++++ heartbeat/gcp-ilb | 344 ++++++ heartbeat/gcp-pd-move.in | 382 ++++++ heartbeat/gcp-vpc-move-ip.in | 374 ++++++ heartbeat/gcp-vpc-move-route.in | 490 ++++++++ heartbeat/gcp-vpc-move-vip.in | 466 +++++++ heartbeat/http-mon.sh | 140 +++ heartbeat/iSCSILogicalUnit.in | 830 +++++++++++++ heartbeat/iSCSITarget.in | 766 ++++++++++++ heartbeat/ids | 751 ++++++++++++ heartbeat/iface-bridge | 843 +++++++++++++ heartbeat/iface-macvlan | 363 ++++++ heartbeat/iface-vlan | 475 +++++++ heartbeat/ipsec | 200 +++ heartbeat/iscsi | 516 ++++++++ heartbeat/jboss | 672 ++++++++++ heartbeat/jira.in | 291 +++++ heartbeat/kamailio.in | 741 +++++++++++ heartbeat/lvm-clvm.sh | 86 ++ heartbeat/lvm-plain.sh | 62 + heartbeat/lvm-tag.sh | 205 ++++ heartbeat/lvmlockd | 401 ++++++ heartbeat/lxc.in | 358 ++++++ heartbeat/lxd-info.in | 156 +++ heartbeat/machine-info.in | 157 +++ heartbeat/mariadb.in | 1040 ++++++++++++++++ heartbeat/mdraid | 584 +++++++++ heartbeat/metadata.rng | 93 ++ heartbeat/minio | 289 +++++ heartbeat/mpathpersist.in | 686 +++++++++++ heartbeat/mysql | 1074 ++++++++++++++++ heartbeat/mysql-common.sh | 332 +++++ heartbeat/mysql-proxy | 741 +++++++++++ heartbeat/nagios | 246 ++++ heartbeat/named | 514 ++++++++ heartbeat/nfsnotify.in | 330 +++++ heartbeat/nfsserver | 1068 ++++++++++++++++ heartbeat/nfsserver-redhat.sh | 177 +++ heartbeat/nginx | 956 +++++++++++++++ heartbeat/nvmet-namespace | 205 ++++ heartbeat/nvmet-port | 238 ++++ heartbeat/nvmet-subsystem | 188 +++ heartbeat/ocf-binaries.in | 75 ++ heartbeat/ocf-directories.in | 22 + heartbeat/ocf-distro | 209 ++++ heartbeat/ocf-rarun | 146 +++ heartbeat/ocf-returncodes | 55 + heartbeat/ocf-shellfuncs.in | 1070 ++++++++++++++++ heartbeat/ocf.py | 486 ++++++++ heartbeat/ocivip | 263 ++++ heartbeat/openstack-cinder-volume | 294 +++++ heartbeat/openstack-common.sh | 173 +++ heartbeat/openstack-floating-ip | 257 ++++ heartbeat/openstack-info.in | 270 ++++ heartbeat/openstack-virtual-ip | 258 ++++ heartbeat/ora-common.sh | 90 ++ heartbeat/oraasm | 183 +++ heartbeat/oracle | 789 ++++++++++++ heartbeat/oralsnr | 293 +++++ heartbeat/ovsmonitor | 469 +++++++ heartbeat/pgagent | 139 +++ heartbeat/pgsql | 2263 ++++++++++++++++++++++++++++++++++ heartbeat/pingd | 297 +++++ heartbeat/podman | 628 ++++++++++ heartbeat/portblock | 666 ++++++++++ heartbeat/postfix | 429 +++++++ heartbeat/pound | 343 ++++++ heartbeat/proftpd | 311 +++++ heartbeat/ra-api-1.dtd | 40 + heartbeat/rabbitmq-cluster.in | 632 ++++++++++ heartbeat/rabbitmq-server-ha | 2444 +++++++++++++++++++++++++++++++++++++ heartbeat/redis.in | 783 ++++++++++++ heartbeat/rkt | 475 +++++++ heartbeat/rsyncd | 280 +++++ heartbeat/rsyslog.in | 264 ++++ heartbeat/sapdb-nosha.sh | 744 +++++++++++ heartbeat/sapdb.sh | 367 ++++++ heartbeat/scsi2reservation | 176 +++ heartbeat/send_ua.c | 133 ++ heartbeat/sfex | 311 +++++ heartbeat/sg_persist.in | 699 +++++++++++ heartbeat/shellfuncs.in | 96 ++ heartbeat/slapd.in | 594 +++++++++ heartbeat/smb-share.in | 494 ++++++++ heartbeat/storage-mon.in | 399 ++++++ heartbeat/sybaseASE.in | 905 ++++++++++++++ heartbeat/symlink | 245 ++++ heartbeat/syslog-ng.in | 467 +++++++ heartbeat/tomcat | 816 +++++++++++++ heartbeat/varnish | 504 ++++++++ heartbeat/vdo-vol | 240 ++++ heartbeat/vmware | 393 ++++++ heartbeat/vsftpd.in | 259 ++++ heartbeat/zabbixserver | 315 +++++ 168 files changed, 76131 insertions(+) create mode 100644 heartbeat/AoEtarget.in create mode 100755 heartbeat/AudibleAlarm create mode 100755 heartbeat/CTDB.in create mode 100755 heartbeat/ClusterMon create mode 100755 heartbeat/Delay create mode 100755 heartbeat/Dummy create mode 100755 heartbeat/EvmsSCC create mode 100755 heartbeat/Evmsd create mode 100755 heartbeat/Filesystem create mode 100755 heartbeat/ICP create mode 100755 heartbeat/IPaddr create mode 100755 heartbeat/IPaddr2 create mode 100755 heartbeat/IPsrcaddr create mode 100644 heartbeat/IPv6addr.c create mode 100644 heartbeat/IPv6addr_utils.c create mode 100755 heartbeat/LVM create mode 100755 heartbeat/LVM-activate create mode 100755 heartbeat/LinuxSCSI create mode 100755 heartbeat/MailTo create mode 100644 heartbeat/Makefile.am create mode 100644 heartbeat/ManageRAID.in create mode 100644 heartbeat/ManageVE.in create mode 100755 heartbeat/NodeUtilization create mode 100755 heartbeat/Pure-FTPd create mode 100644 heartbeat/README create mode 100644 heartbeat/README.galera create mode 100644 heartbeat/README.mariadb.md create mode 100755 heartbeat/Raid1 create mode 100755 heartbeat/Route create mode 100755 heartbeat/SAPDatabase create mode 100755 heartbeat/SAPInstance create mode 100755 heartbeat/SendArp create mode 100755 heartbeat/ServeRAID create mode 100755 heartbeat/SphinxSearchDaemon create mode 100644 heartbeat/Squid.in create mode 100755 heartbeat/Stateful create mode 100644 heartbeat/SysInfo.in create mode 100755 heartbeat/VIPArip create mode 100755 heartbeat/VirtualDomain create mode 100755 heartbeat/WAS create mode 100755 heartbeat/WAS6 create mode 100755 heartbeat/WinPopup create mode 100755 heartbeat/Xen create mode 100755 heartbeat/Xinetd create mode 100755 heartbeat/ZFS create mode 100755 heartbeat/aliyun-vpc-move-ip create mode 100755 heartbeat/anything create mode 100755 heartbeat/apache create mode 100644 heartbeat/apache-conf.sh create mode 100755 heartbeat/asterisk create mode 100755 heartbeat/aws-vpc-move-ip create mode 100644 heartbeat/aws-vpc-route53.in create mode 100755 heartbeat/awseip create mode 100755 heartbeat/awsvip create mode 100644 heartbeat/azure-events-az.in create mode 100644 heartbeat/azure-events.in create mode 100755 heartbeat/azure-lb create mode 100644 heartbeat/clvm.in create mode 100644 heartbeat/conntrackd.in create mode 100755 heartbeat/corosync-qnetd create mode 100755 heartbeat/crypt create mode 100755 heartbeat/db2 create mode 100755 heartbeat/dhcpd create mode 100755 heartbeat/dnsupdate.in create mode 100755 heartbeat/docker create mode 100755 heartbeat/docker-compose create mode 100755 heartbeat/dovecot create mode 100755 heartbeat/dummypy.in create mode 100644 heartbeat/eDir88.in create mode 100755 heartbeat/ethmonitor create mode 100755 heartbeat/exportfs create mode 100644 heartbeat/findif.sh create mode 100644 heartbeat/fio.in create mode 100755 heartbeat/galera.in create mode 100755 heartbeat/garbd create mode 100755 heartbeat/gcp-ilb create mode 100644 heartbeat/gcp-pd-move.in create mode 100755 heartbeat/gcp-vpc-move-ip.in create mode 100644 heartbeat/gcp-vpc-move-route.in create mode 100755 heartbeat/gcp-vpc-move-vip.in create mode 100644 heartbeat/http-mon.sh create mode 100644 heartbeat/iSCSILogicalUnit.in create mode 100644 heartbeat/iSCSITarget.in create mode 100755 heartbeat/ids create mode 100755 heartbeat/iface-bridge create mode 100755 heartbeat/iface-macvlan create mode 100755 heartbeat/iface-vlan create mode 100755 heartbeat/ipsec create mode 100755 heartbeat/iscsi create mode 100755 heartbeat/jboss create mode 100644 heartbeat/jira.in create mode 100644 heartbeat/kamailio.in create mode 100644 heartbeat/lvm-clvm.sh create mode 100644 heartbeat/lvm-plain.sh create mode 100644 heartbeat/lvm-tag.sh create mode 100755 heartbeat/lvmlockd create mode 100644 heartbeat/lxc.in create mode 100644 heartbeat/lxd-info.in create mode 100644 heartbeat/machine-info.in create mode 100644 heartbeat/mariadb.in create mode 100755 heartbeat/mdraid create mode 100644 heartbeat/metadata.rng create mode 100755 heartbeat/minio create mode 100644 heartbeat/mpathpersist.in create mode 100755 heartbeat/mysql create mode 100755 heartbeat/mysql-common.sh create mode 100755 heartbeat/mysql-proxy create mode 100755 heartbeat/nagios create mode 100755 heartbeat/named create mode 100644 heartbeat/nfsnotify.in create mode 100755 heartbeat/nfsserver create mode 100644 heartbeat/nfsserver-redhat.sh create mode 100755 heartbeat/nginx create mode 100755 heartbeat/nvmet-namespace create mode 100755 heartbeat/nvmet-port create mode 100755 heartbeat/nvmet-subsystem create mode 100644 heartbeat/ocf-binaries.in create mode 100644 heartbeat/ocf-directories.in create mode 100644 heartbeat/ocf-distro create mode 100644 heartbeat/ocf-rarun create mode 100644 heartbeat/ocf-returncodes create mode 100644 heartbeat/ocf-shellfuncs.in create mode 100644 heartbeat/ocf.py create mode 100755 heartbeat/ocivip create mode 100755 heartbeat/openstack-cinder-volume create mode 100644 heartbeat/openstack-common.sh create mode 100755 heartbeat/openstack-floating-ip create mode 100755 heartbeat/openstack-info.in create mode 100755 heartbeat/openstack-virtual-ip create mode 100644 heartbeat/ora-common.sh create mode 100755 heartbeat/oraasm create mode 100755 heartbeat/oracle create mode 100755 heartbeat/oralsnr create mode 100755 heartbeat/ovsmonitor create mode 100755 heartbeat/pgagent create mode 100755 heartbeat/pgsql create mode 100755 heartbeat/pingd create mode 100755 heartbeat/podman create mode 100755 heartbeat/portblock create mode 100755 heartbeat/postfix create mode 100755 heartbeat/pound create mode 100755 heartbeat/proftpd create mode 100644 heartbeat/ra-api-1.dtd create mode 100755 heartbeat/rabbitmq-cluster.in create mode 100755 heartbeat/rabbitmq-server-ha create mode 100755 heartbeat/redis.in create mode 100755 heartbeat/rkt create mode 100755 heartbeat/rsyncd create mode 100644 heartbeat/rsyslog.in create mode 100644 heartbeat/sapdb-nosha.sh create mode 100755 heartbeat/sapdb.sh create mode 100755 heartbeat/scsi2reservation create mode 100644 heartbeat/send_ua.c create mode 100755 heartbeat/sfex create mode 100644 heartbeat/sg_persist.in create mode 100644 heartbeat/shellfuncs.in create mode 100644 heartbeat/slapd.in create mode 100755 heartbeat/smb-share.in create mode 100644 heartbeat/storage-mon.in create mode 100755 heartbeat/sybaseASE.in create mode 100755 heartbeat/symlink create mode 100644 heartbeat/syslog-ng.in create mode 100755 heartbeat/tomcat create mode 100755 heartbeat/varnish create mode 100755 heartbeat/vdo-vol create mode 100755 heartbeat/vmware create mode 100644 heartbeat/vsftpd.in create mode 100755 heartbeat/zabbixserver (limited to 'heartbeat') diff --git a/heartbeat/AoEtarget.in b/heartbeat/AoEtarget.in new file mode 100644 index 0000000..5a14c1e --- /dev/null +++ b/heartbeat/AoEtarget.in @@ -0,0 +1,245 @@ +#!@BASH_SHELL@ +# +# +# AoEtarget OCF RA. +# Manages an ATA-over-Ethernet (AoE) target utilizing the vblade utility. +# +# (c) 2009-2010 Florian Haas, Dejan Muhamedagic, +# and Linux-HA contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +###################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Defaults +OCF_RESKEY_nic_default="eth0" +OCF_RESKEY_pid_default="${HA_RSCTMP}/AoEtarget-${OCF_RESOURCE_INSTANCE}.pid" +OCF_RESKEY_binary_default="/usr/sbin/vblade" + +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} + +####################################################################### + +meta_data() { + cat < + + + 1.0 + +This resource agent manages an ATA-over-Ethernet (AoE) target using vblade. +It exports any block device, or file, as an AoE target using the +specified Ethernet device, shelf, and slot number. + + Manages ATA-over-Ethernet (AoE) target exports + + + +The local block device (or file) to export as an AoE target. + + Device to export + + + + +The local Ethernet interface to use for exporting this AoE target. + + Ethernet interface + + + + +The AoE shelf number to use when exporting this target. + + AoE shelf number + + + + +The AoE slot number to use when exporting this target. + + AoE slot number + + + + +The file to record the daemon pid to. + + Daemon pid file + + + + +Location of the vblade binary. + + vblade binary + + + + + + + + + + + + +EOF +} + +####################################################################### + +AoEtarget_usage() { + cat <&1 & + rc=$? + pid=$! + if [ $rc -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + echo $pid > ${OCF_RESKEY_pid} && return $OCF_SUCCESS + return $OCF_ERR_GENERIC +} + +AoEtarget_stop() { + AoEtarget_monitor + if [ $? -eq $OCF_SUCCESS ]; then + ocf_log info "Unxporting device ${OCF_RESKEY_device} on ${OCF_RESKEY_nic} as shelf ${OCF_RESKEY_shelf}, slot ${OCF_RESKEY_slot}" + pid=$(cat ${OCF_RESKEY_pid}) + kill -TERM $pid + # loop until we're really stopped, wait for the LRM to time us + # out if not + while AoEtarget_monitor; do + sleep 1 + done + fi + # Clean up pid file + rm -f ${OCF_RESKEY_pid} + return $OCF_SUCCESS +} + +AoEtarget_monitor() { + ocf_pidfile_status ${OCF_RESKEY_pid} >/dev/null 2>&1 + rc=$? + if [ $rc -eq 2 ]; then + # no pid file, must assume we're not running + return $OCF_NOT_RUNNING + elif [ $rc -eq 1 ]; then + # stale pid file, assume something went wrong + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} + +AoEtarget_validate() { + # Is our binary executable? + if [ ! -x ${OCF_RESKEY_binary} ]; then + ocf_log error "${OCF_RESKEY_binary} not found or not executable" + return $OCF_ERR_INSTALLED + fi + + # Do we have all required variables? + for var in device nic shelf slot pid; do + param="OCF_RESKEY_${var}" + if [ -z "${!param}" ]; then + ocf_log error "Missing resource parameter \"$var\"!" + return $OCF_ERR_CONFIGURED + fi + done + + # Is the pid file directory writable? + pid_dir=`dirname "$OCF_RESKEY_pid"` + touch "$pid_dir/$$" + if [ $? != 0 ]; then + ocf_log error "Cannot create pid file in $pid_dir -- check directory permissions" + return $OCF_ERR_INSTALLED + fi + rm "$pid_dir/$$" + + # Does the device we are trying to export exist? + if [ ! -e ${OCF_RESKEY_device} ]; then + ocf_log error "${OCF_RESKEY_device} does not exist" + return $OCF_ERR_INSTALLED + fi + return $OCF_SUCCESS +} + +case $1 in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage|help) + AoEtarget_usage + exit $OCF_SUCCESS + ;; +esac + +# Everything except usage and meta-data must pass the validate test +AoEtarget_validate || exit $? + +case $__OCF_ACTION in + start) + AoEtarget_start + ;; + stop) + AoEtarget_stop + ;; + status|monitor) + AoEtarget_monitor + ;; + reload) + ocf_log err "Reloading..." + AoEtarget_start + ;; + validate-all) + AoEtarget_validate + ;; + *) + AoEtarget_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc + diff --git a/heartbeat/AudibleAlarm b/heartbeat/AudibleAlarm new file mode 100755 index 0000000..44a3088 --- /dev/null +++ b/heartbeat/AudibleAlarm @@ -0,0 +1,188 @@ +#!/bin/sh +# +# Startup script for the Audible Alarm +# +# author: Kirk Lawson +# Horms +# +# description: sets an audible alarm running by beeping at a set interval +# processname: alarm +# config: /etc/AudibleAlarm/AudibleAlarm.conf - not yet implemented +# +# OCF parameters are as below: +# OCF_RESKEY_nodelist +# +# License: GNU General Public License (GPL) + +####################################################################### +# Source function library. +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### +PIDFILE=${HA_VARRUN}/heartbeat-bell +#For testing +#PIDFILE=/tmp/heartbeat-bell + +# What host are we running on? +us=`uname -n` + +usage() { + echo "Usage: $0 {start|stop|restart|status|monitor|meta-data|validate-all}" + echo " The node list is an optional space delimited" + echo " list of hosts that should never sound the alarm." +} + +meta_data() { + cat < + + +1.0 + + +Resource script for AudibleAlarm. It sets an audible alarm running by beeping +at a set interval. + +Emits audible beeps at a configurable interval + + + + +The node list that should never sound the alarm. + +Node list + + + + + + + + + + + + + + +END +} + +audiblealarm_start () { + ocf_log info "$0: Starting" + if [ -f $PIDFILE ]; then + PID=`head -n 1 $PIDFILE` + if [ -n "$PID" ]; then + ocf_log info "$0: Appears to already be running, killing [$PID]" + kill $PID > /dev/null + fi + fi + # Use () to create a subshell to make the redirection be synchronized. + ( while [ 1 ]; do + sleep 1 #Sleep first, incase we bail out + printf "\a" > /dev/console + # Uncomment this line to cause floppy drive light + # to flash (requires fdutils package). + # /usr/bin/floppycontrol --pollstate > /dev/null + # + # To avoid issues when called by lrmd, redirect stdout->stderr. + done & + if echo $! > $PIDFILE; then + : + else + ocf_log info "$0: Could not write to pid file \"$PIDFILE\", bailing" + kill $! + return $OCF_ERR_GENERIC + fi) >&2 + + return $? +} + +audiblealarm_stop () { + ocf_log info "$0: Shutting down" + if [ -f $PIDFILE ]; then + PID=`head -n 1 $PIDFILE` + # ocf_log info "$0: Appears to already be running, killing [$PID]" + # commented by Phost, since the confusion in the log. + + if [ -n "$PID" ]; then + # Donnot remove PIDFILE in case the `kill` fails. + kill $PID > /dev/null && rm -f $PIDFILE + fi + fi + + return $? +} + +audiblealarm_restart () { + audiblealarm_stop + audiblealarm_start + + return $? +} + +audiblealarm_status () { + if [ -f $PIDFILE ]; then + PID=`head -n 1 $PIDFILE` + if [ -n "$PID" ]; then + echo running + return $OCF_SUCCESS + fi + fi + + echo stopped + return $OCF_NOT_RUNNING +} + +audiblealarm_validate_all () { + check_binary printf + + echo "Validate OK" + return $OCF_SUCCESS +} +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + start) + for arg in $OCF_RESKEY_nodelist + do + if [ "$us" = "$arg" ]; then + # We should not start because we are on a host + # listed in our argument list. + exit $OCF_SUCCESS + fi + done + audiblealarm_start + ;; + stop) + audiblealarm_stop + ;; + restart) + audiblealarm_restart + ;; + status|monitor) + audiblealarm_status + ;; + validate-all) + audiblealarm_validate_all + ;; + usage) + usage + exit $OCF_SUCCESS + ;; + + *) + usage + exit $OCF_ERR_ARGS + ;; +esac + +exit $? diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in new file mode 100755 index 0000000..b4af66b --- /dev/null +++ b/heartbeat/CTDB.in @@ -0,0 +1,996 @@ +#!@BASH_SHELL@ +# +# OCF Resource Agent for managing CTDB +# +# Copyright (c) 2009-2010 Novell Inc., Tim Serong +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# +# OVERVIEW +# +# When run by itself, CTDB can handle IP failover and includes scripts +# to manage various services (Samba, Winbind, HTTP, etc.). When run as +# a resource in a Pacemaker cluster, this additional functionality +# should not be used; instead one should define separate resources for +# CTDB, Samba, Winbind, IP addresses, etc. +# +# As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so +# it is still possible to configure CTDB so that it manages these +# resources itself. In future, once Samba and Winbind RAs are +# available, this ability will be deprecated and ultimately removed. +# +# This RA intentionally provides no ability to configure CTDB such that +# it manages IP failover, HTTP, NFS, etc. +# +# +# TODO: +# - ctdb_stop doesn't really support multiple independent CTDB instances, +# unless they're running from distinct ctdbd binaries (it uses pkill +# $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might +# not under heavy load - this will kill all ctdbd instances on the +# system). OTOH, running multiple CTDB instances per node is, well, +# AFAIK, completely crazy. Can't run more than one in a vanilla CTDB +# cluster, with the CTDB init script. So it might be nice to address +# this for complete semantic correctness of the RA, but shouldn't +# actually cause any trouble in real life. +# - As much as possible, get rid of auto config generation +# - Especially smb.conf +# - Verify timeouts are sane +# - Monitor differentiate between error and not running? +# - Do we need to verify globally unique setting? +# - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on +# current nodes) +# - Look at enabling set_ctdb_variables() if necessary. +# - Probably possible for sysconfig file to not be restored if +# CTDB dies unexpectedly. +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### +# Default parameter values: + +# Some distro's ctdb package stores the persistent db in /var/lib/ctdb, +# others store in /var/ctdb. This attempts to detect the correct default +# directory. +var_prefix="/var/lib/ctdb" +if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then + var_prefix="/var/ctdb" +fi + +run_prefix="/run" +if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then + var_prefix="/var/run" +fi + +# Parameter defaults + +OCF_RESKEY_ctdb_recovery_lock_default="" +OCF_RESKEY_ctdb_manages_samba_default="no" +OCF_RESKEY_ctdb_manages_winbind_default="no" +OCF_RESKEY_ctdb_service_smb_default="" +OCF_RESKEY_ctdb_service_nmb_default="" +OCF_RESKEY_ctdb_service_winbind_default="" +OCF_RESKEY_ctdb_samba_skip_share_check_default="yes" +OCF_RESKEY_ctdb_monitor_free_memory_default="100" +OCF_RESKEY_ctdb_start_as_disabled_default="no" + +: ${OCF_RESKEY_ctdb_recovery_lock=${OCF_RESKEY_ctdb_recovery_lock_default}} +: ${OCF_RESKEY_ctdb_manages_samba=${OCF_RESKEY_ctdb_manages_samba_default}} +: ${OCF_RESKEY_ctdb_manages_winbind=${OCF_RESKEY_ctdb_manages_winbind_default}} +: ${OCF_RESKEY_ctdb_service_smb=${OCF_RESKEY_ctdb_service_smb_default}} +: ${OCF_RESKEY_ctdb_service_nmb=${OCF_RESKEY_ctdb_service_nmb_default}} +: ${OCF_RESKEY_ctdb_service_winbind=${OCF_RESKEY_ctdb_service_winbind_default}} +: ${OCF_RESKEY_ctdb_samba_skip_share_check=${OCF_RESKEY_ctdb_samba_skip_share_check_default}} +: ${OCF_RESKEY_ctdb_monitor_free_memory=${OCF_RESKEY_ctdb_monitor_free_memory_default}} +: ${OCF_RESKEY_ctdb_start_as_disabled=${OCF_RESKEY_ctdb_start_as_disabled_default}} + +OCF_RESKEY_ctdb_config_dir_default="/etc/ctdb" +OCF_RESKEY_ctdb_binary_default="/usr/bin/ctdb" +OCF_RESKEY_ctdbd_binary_default="/usr/sbin/ctdbd" +OCF_RESKEY_ctdb_dbdir_default="${var_prefix}" +OCF_RESKEY_ctdb_logfile_default="/var/log/ctdb/log.ctdb" +OCF_RESKEY_ctdb_rundir_default="${run_prefix}/ctdb" +OCF_RESKEY_ctdb_timeout_default="10" + +: ${OCF_RESKEY_ctdb_config_dir=${OCF_RESKEY_ctdb_config_dir_default}} +: ${OCF_RESKEY_ctdb_binary=${OCF_RESKEY_ctdb_binary_default}} +: ${OCF_RESKEY_ctdbd_binary=${OCF_RESKEY_ctdbd_binary_default}} +: ${OCF_RESKEY_ctdb_dbdir=${OCF_RESKEY_ctdb_dbdir_default}} +: ${OCF_RESKEY_ctdb_logfile=${OCF_RESKEY_ctdb_logfile_default}} +: ${OCF_RESKEY_ctdb_rundir=${OCF_RESKEY_ctdb_rundir_default}} +: ${OCF_RESKEY_ctdb_timeout=${OCF_RESKEY_ctdb_timeout_default}} + +OCF_RESKEY_ctdb_socket_default="${OCF_RESKEY_ctdb_rundir}/ctdbd.socket" +OCF_RESKEY_ctdb_debuglevel_default="2" +OCF_RESKEY_ctdb_max_open_files_default="" + +: ${OCF_RESKEY_ctdb_socket=${OCF_RESKEY_ctdb_socket_default}} +: ${OCF_RESKEY_ctdb_debuglevel=${OCF_RESKEY_ctdb_debuglevel_default}} +: ${OCF_RESKEY_ctdb_max_open_files=${OCF_RESKEY_ctdb_max_open_files_default}} + +OCF_RESKEY_smb_conf_default="/etc/samba/smb.conf" +OCF_RESKEY_smb_private_dir_default="" +OCF_RESKEY_smb_passdb_backend_default="tdbsam" +OCF_RESKEY_smb_idmap_backend_default="tdb2" +OCF_RESKEY_smb_fileid_algorithm_default="" + +: ${OCF_RESKEY_smb_conf=${OCF_RESKEY_smb_conf_default}} +: ${OCF_RESKEY_smb_private_dir=${OCF_RESKEY_smb_private_dir_default}} +: ${OCF_RESKEY_smb_passdb_backend=${OCF_RESKEY_smb_passdb_backend_default}} +: ${OCF_RESKEY_smb_idmap_backend=${OCF_RESKEY_smb_idmap_backend_default}} +: ${OCF_RESKEY_smb_fileid_algorithm=${OCF_RESKEY_smb_fileid_algorithm_default}} + +####################################################################### + +ctdb_version() { + $OCF_RESKEY_ctdb_binary version | awk '{print $NF}' | sed "s/[-\.]\?[[:alpha:]].*//" +} + +meta_data() { + cat < + + +1.0 + + +This resource agent manages CTDB, allowing one to use Clustered Samba in a +Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on +which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list +of private IP addresses of each node in the cluster, then configure this RA +as a clone. This agent expects the samba and windbind resources +to be managed outside of CTDB's control as a separate set of resources controlled +by the cluster manager. The optional support for enabling CTDB management of these +daemons will be depreciated. + +For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) + +CTDB Resource Agent + + + + + +The location of a shared lock file or helper binary, common across all nodes. +See CTDB documentation for details. + +CTDB shared lock file + + + + + +Should CTDB manage starting/stopping the Samba service for you? +This will be deprecated in future, in favor of configuring a +separate Samba resource. + +Should CTDB manage Samba? + + + + + +Should CTDB manage starting/stopping the Winbind service for you? +This will be deprecated in future, in favor of configuring a +separate Winbind resource. + +Should CTDB manage Winbind? + + + + + +Name of smb init script. Only necessary if CTDB is managing +Samba directly. Will usually be auto-detected. + +Name of smb init script + + + + + +Name of nmb init script. Only necessary if CTDB is managing +Samba directly. Will usually be auto-detected. + +Name of nmb init script + + + + + +Name of winbind init script. Only necessary if CTDB is managing +Winbind directly. Will usually be auto-detected. + +Name of winbind init script + + + + + +If there are very many shares it may not be feasible to check that all +of them are available during each monitoring interval. In that case +this check can be disabled. + +Skip share check during monitor? + + + + + +If the amount of free memory drops below this value the node will +become unhealthy and ctdb and all managed services will be shutdown. +Once this occurs, the administrator needs to find the reason for the +OOM situation, rectify it and restart ctdb with "service ctdb start". +With CTDB 4.4.0 and later this parameter is ignored. + +Minimum amount of free memory (MB) + + + + + +When set to yes, the CTDB node will start in DISABLED mode and not +host any public ip addresses. + +Start CTDB disabled? + + + + + +The directory containing various CTDB configuration files. +The "nodes" and "notify.sh" scripts are expected to be +in this directory. + +CTDB config file directory + + + + + +Full path to the CTDB binary. + +CTDB binary path + + + + + +Full path to the CTDB cluster daemon binary. + +CTDB Daemon binary path + + + + + +Full path to the domain socket that ctdbd will create, used for +local clients to attach and communicate with the ctdb daemon. +With CTDB 4.9.0 and later the socket path is hardcoded at build +time, so this parameter is ignored. + +CTDB socket location (ignored with CTDB 4.9+) + + + + + +The directory to put the local CTDB database files in. +Persistent database files will be put in ctdb_dbdir/persistent. + +CTDB database directory + + + + + +Full path to log file. To log to syslog instead, use the +value "syslog". + +CTDB log file location + + + + + +Full path to ctdb runtime directory, used for storage of socket +lock state. + +CTDB runtime directory location + + + + + +Indicates that ctdb should wait up to TIMEOUT seconds for a response to most commands sent to the CTDB daemon. + +CTDB timeout in seconds + + + + + +What debug level to run at (0-10). Higher means more verbose. + +CTDB debug level + + + + + +Maximum number of open files (for ulimit -n) + +Max open files + + + + + +Path to default samba config file. Only necessary if CTDB +is managing Samba. + +Path to smb.conf + + + + + +The directory for smbd to use for storing such files as +smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) +required this to be on shared storage. This parameter should not +be set for current versions of CTDB, and only remains in the RA +for backwards compatibility. + +Samba private dir (deprecated) + + + + + +Which backend to use for storing user and possibly group +information. Only necessary if CTDB is managing Samba. + +Samba passdb backend + + + + + +Which backend to use for SID/uid/gid mapping. Only necessary +if CTDB is managing Samba. + +Samba idmap backend + + + + + +Which fileid:algorithm to use with vfs_fileid. The correct +value depends on which clustered filesystem is in use, e.g.: +for OCFS2, this should be set to "fsid". Only necessary if +CTDB is managing Samba. + +Samba VFS fileid algorithm + + + + + + + + + + + + + +END +} + +####################################################################### + +# Figure out path to /etc/sysconfig/ctdb (same logic as +# loadconfig() from /etc/ctdb/functions +if [ -f /etc/sysconfig/ctdb ]; then + CTDB_SYSCONFIG=/etc/sysconfig/ctdb +elif [ -f /etc/default/ctdb ]; then + CTDB_SYSCONFIG=/etc/default/ctdb +elif [ -f "$OCF_RESKEY_ctdb_config_dir/ctdb" ]; then + CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb +elif [ -f "$OCF_RESKEY_ctdb_config_dir/ctdbd.conf" ]; then + CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdbd.conf +fi + +# Backup paths +CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig + +invoke_ctdb() { + # CTDB's defaults are: + local timelimit + timelimit=120 + # ...but we override with the timeout for the current op: + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) + fi + + local vers=$(ctdb_version) + ocf_version_cmp "$vers" "4.9.0" + + # if version < 4.9.0 specify '--socket' otherwise it's + # a compiled option + if [ "$?" -eq "0" ]; then + $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \ + -t ${OCF_RESKEY_ctdb_timeout} -T $timelimit \ + "$@" + else + $OCF_RESKEY_ctdb_binary \ + -t ${OCF_RESKEY_ctdb_timeout} -T $timelimit \ + "$@" + fi +} + +# Enable any event scripts that are explicitly required. +# Any others will ultimately be invoked or not based on how they ship +# with CTDB, but will generally have no effect, beacuase the relevant +# CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. +enable_event_scripts_chmod() { + local event_dir + event_dir=$OCF_RESKEY_ctdb_config_dir/events.d + + chmod u+x "$event_dir/00.ctdb" # core database health check + + if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then + chmod u+x "$event_dir/10.interface" + else + chmod a-x "$event_dir/10.interface" + fi + if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then + chmod u+x "$event_dir/11.routing" + else + chmod a-x "$event_dir/11.routing" + fi + if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ + ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then + chmod u+x "$event_dir/50.samba" + else + chmod a-x "$event_dir/50.samba" + fi +} + +enable_event_scripts_symlink() { + # event scripts are symlinked once enabled, with the link source in... + mkdir -p "$OCF_RESKEY_ctdb_config_dir/events/legacy" 2>/dev/null + + invoke_ctdb event script enable legacy 00.ctdb + + if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then + invoke_ctdb event script enable legacy 10.interface + else + invoke_ctdb event script disable legacy 10.interface + fi + if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then + invoke_ctdb event script enable legacy 11.routing + else + invoke_ctdb event script disable legacy 11.routing + fi + + if ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then + invoke_ctdb event script enable legacy 49.winbind + else + invoke_ctdb event script disable legacy 49.winbind + fi + + if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba"; then + invoke_ctdb event script enable legacy 50.samba + else + invoke_ctdb event script disable legacy 50.samba + fi +} + +# This function has no effect (currently no way to set CTDB_SET_*) +# but remains here in case we need it in future. +set_ctdb_variables() { + rv=$OCF_SUCCESS + set | grep ^CTDB_SET_ | cut -d_ -f3- | + while read v; do + varname=$(echo "$v" | cut -d= -f1) + value=$(echo "$v" | cut -d= -f2) + invoke_ctdb setvar "$varname" "$value" || rv=$OCF_ERR_GENERIC + done || rv=$OCF_ERR_GENERIC + return $rv +} + + +# Add necessary settings to /etc/samba/smb.conf. In a perfect world, +# we'd be able to generate a new, temporary, smb.conf file somewhere, +# something like: +# include = /etc/samba/smb.conf +# [global] +# clustering = yes +# # ...etc... +# Unfortunately, we can't do this, because there's no way to tell the +# smb init script where the temporary config is, so we just edit +# the default config file. +init_smb_conf() { + # Don't screw around with the config if CTDB isn't managing Samba! + ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 + + # replace these things in smb.conf + local repl + repl='# CTDB-RA:|passdb backend|clustering|idmap backend|idmap config[[:space:]]*\*[[:space:]]*:[[:space:]]*backend|private dir|ctdbd socket' + + local private_dir + [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" + + local vfs_fileid + local do_vfs + do_vfs=0 + if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then + repl="${repl}|fileid:algorithm|fileid:mapping" + vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" + if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ + grep -Eq '^[[:space:]]*vfs objects'; then + # vfs objects already specified, will append fileid to existing line + do_vfs=1 + else + vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" + fi + fi + # Preserve permissions of smb.conf + local idmap_config + if grep -Eqs '^[[:space:]]*idmap backend[[:space:]]*=' $OCF_RESKEY_smb_conf; then + idmap_config=old + else + idmap_config=new + fi + cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" + awk ' + /^[[:space:]]*\[/ { global = 0 } + /^[[:space:]]*\[global\]/ { global = 1 } + { + if(global) { + if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { + print $0" fileid" + } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { + print + } + } else { + print + } + }' "$OCF_RESKEY_smb_conf" | sed "/^[[:space:]]*\[global\]/ a\\ +\t# CTDB-RA: Begin auto-generated section (do not change below)\n\ +\tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ +\tclustering = yes\n\ +\tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ +\t# CTDB-RA: End auto-generated section (do not change above)" > "$OCF_RESKEY_smb_conf.$$" + if [ "$idmap_config" = "old" ]; then + sed -i "/^[[:space:]]*clustering = yes/ a\\ +\tidmap backend = $OCF_RESKEY_smb_idmap_backend" $OCF_RESKEY_smb_conf.$$ + else + sed -i "/^[[:space:]]*clustering = yes/ a\\ +\tidmap config * : backend = $OCF_RESKEY_smb_idmap_backend" $OCF_RESKEY_smb_conf.$$ + fi + dd conv=notrunc,fsync of="$OCF_RESKEY_smb_conf.$$" if=/dev/null >/dev/null 2>&1 + mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" +} + + +# Get rid of that section we added +cleanup_smb_conf() { + ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 + + # preserve permissions of smb.conf + cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" + sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' "$OCF_RESKEY_smb_conf" > "$OCF_RESKEY_smb_conf.$$" + mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" +} + +append_conf() { + local file_path="$1" + shift + [ -n "$2" ] && echo "$1=$2" >> "$file_path" +} + +generate_ctdb_config() { + local ctdb_config="$OCF_RESKEY_ctdb_config_dir/ctdb.conf" + + # Backup existing config if we're not already using an auto-generated one + grep -qa '# CTDB-RA: Auto-generated' $ctdb_config || cp -p $ctdb_config ${ctdb_config}.ctdb-ra-orig + if [ $? -ne 0 ]; then + ocf_log warn "Unable to backup $ctdb_config to ${ctdb_config}.ctdb-ra-orig" + fi + + local log_option="file:$OCF_RESKEY_ctdb_logfile" + if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then + log_option="syslog" + fi + + local start_as_disabled="false" + ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" && start_as_disabled="true" + + local dbdir_volatile="$OCF_RESKEY_ctdb_dbdir/volatile" + [ -d "$dbdir_volatile" ] || mkdir -p "$dbdir_volatile" 2>/dev/null + local dbdir_persistent="$OCF_RESKEY_ctdb_dbdir/persistent" + [ -d "$dbdir_persistent" ] || mkdir -p "$dbdir_persistent" 2>/dev/null + local dbdir_state="$OCF_RESKEY_ctdb_dbdir/state" + [ -d "$dbdir_state" ] || mkdir -p "$dbdir_state" 2>/dev/null + +cat >$ctdb_config <$script_options <$CTDB_SYSCONFIG </dev/null + + # set nofile ulimit for ctdbd process + if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then + ulimit -n "$OCF_RESKEY_ctdb_max_open_files" + fi + + # Start her up + invoke_ctdbd "$version" + + if [ $? -ne 0 ]; then + # cleanup smb.conf + cleanup_smb_conf + + ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary." + return $OCF_ERR_GENERIC + else + # Wait a bit for CTDB to stabilize + # (until start times out if necessary) + while true; do + # Initial sleep is intentional (ctdb init script + # has sleep after ctdbd start, but before invoking + # ctdb to talk to it) + sleep 1 + status=$(invoke_ctdb status 2>/dev/null) + if [ $? -ne 0 ]; then + # CTDB will be running, kill it before returning + ctdb_stop + ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary status" + return $OCF_ERR_GENERIC + fi + if ! echo "$status" | grep -qs 'UNHEALTHY (THIS'; then + # Status does not say this node is unhealthy, + # so we're good to go. Do a bit of final + # setup and (hopefully) return success. + set_ctdb_variables + return $? + fi + done + fi + + # ctdbd will (or can) actually still be running at this point, so kill it + ctdb_stop + + ocf_exit_reason "Timeout waiting for CTDB to stabilize" + return $OCF_ERR_GENERIC +} + + +ctdb_stop() { + # Do nothing if already stopped + pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS + + # Tell it to die nicely + invoke_ctdb shutdown >/dev/null 2>&1 + rv=$? + + # No more Mr. Nice Guy + count=0 + while pkill -0 -f "$OCF_RESKEY_ctdbd_binary" ; do + sleep 1 + count=$((count + 1)) + [ $count -gt 10 ] && { + ocf_log info "killing ctdbd " + pkill -9 -f "$OCF_RESKEY_ctdbd_binary" + pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events" + } + done + + # Cleanup smb.conf + cleanup_smb_conf + + # It was a clean shutdown, return success + [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS + + # Unclean shutdown, return success if there's no ctdbds left (we + # killed them forcibly, but at least they're good and dead). + pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS + + # Problem: ctdb shutdown didn't work and neither did some vigorous + # kill -9ing. Only thing to do is report failure. + return $OCF_ERR_GENERIC +} + + +ctdb_monitor() { + local status + # "ctdb status" exits non-zero if CTDB isn't running. + # It can also exit non-zero if there's a timeout (ctdbd blocked, + # stalled, massive load, or otherwise wedged). If it's actually + # not running, STDERR will say "Errno:Connection refused(111)", + # whereas if it's wedged, it'll say various other unpleasant things. + status=$(invoke_ctdb status 2>&1) + if [ $? -ne 0 ]; then + if echo "$status" | grep -qs 'Connection refused'; then + return $OCF_NOT_RUNNING + elif echo "$status" | grep -qs 'No such file or directory'; then + return $OCF_NOT_RUNNING + elif echo $status | grep -qs 'connect() failed'; then + return $OCF_NOT_RUNNING + else + ocf_exit_reason "CTDB status call failed: $status" + return $OCF_ERR_GENERIC + fi + fi + if echo "$status" | grep -Eqs '(OK|DISABLED) \(THIS'; then + return $OCF_SUCCESS + fi + + ocf_exit_reason "CTDB status is bad: $status" + return $OCF_ERR_GENERIC +} + + +ctdb_validate() { + # Required binaries + for binary in pkill; do + check_binary $binary + done + + if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then + ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist." + return $OCF_ERR_INSTALLED + fi + + if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then + ocf_log info "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" + fi + + if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then + ocf_exit_reason "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." + return $OCF_ERR_ARGS + fi + + if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then + ocf_exit_reason "ctdb_recovery_lock not specified." + return $OCF_ERR_CONFIGURED + fi + + if [ "${OCF_RESKEY_ctdb_recovery_lock:0:1}" == '!' ]; then + # '!' prefix means recovery lock is handled via a helper binary + binary="${OCF_RESKEY_ctdb_recovery_lock:1}" + binary="${binary%% *}" # trim any parameters + if [ -z "$binary" ]; then + ocf_exit_reason "ctdb_recovery_lock invalid helper" + return $OCF_ERR_CONFIGURED + fi + check_binary "${binary}" + else + lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") + touch "$lock_dir/$$" 2>/dev/null + if [ $? != 0 ]; then + ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." + return $OCF_ERR_ARGS + fi + rm "$lock_dir/$$" + fi + + return $OCF_SUCCESS +} + + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) ctdb_start;; +stop) ctdb_stop;; +monitor) ctdb_monitor;; +validate-all) ctdb_validate;; +usage|help) ctdb_usage + exit $OCF_SUCCESS + ;; +*) ctdb_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc diff --git a/heartbeat/ClusterMon b/heartbeat/ClusterMon new file mode 100755 index 0000000..161e309 --- /dev/null +++ b/heartbeat/ClusterMon @@ -0,0 +1,271 @@ +#!/bin/sh +# +# +# ClusterMon OCF RA. +# Starts crm_mon in background which logs cluster status as +# html to the specified file. +# +# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# OCF instance parameters: +# OCF_RESKEY_user +# OCF_RESKEY_pidfile +# OCF_RESKEY_update +# OCF_RESKEY_extra_options +# OCF_RESKEY_htmlfile + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_user_default="root" +OCF_RESKEY_update_default="15000" +OCF_RESKEY_extra_options_default="" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid" +OCF_RESKEY_htmlfile_default="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.html" + +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_update=${OCF_RESKEY_update_default}} +: ${OCF_RESKEY_extra_options=${OCF_RESKEY_extra_options_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_htmlfile=${OCF_RESKEY_htmlfile_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This is a ClusterMon Resource Agent. +It outputs current cluster status to the html. + +Runs crm_mon in the background, recording the cluster status to an HTML file + + + + + +The user we want to run crm_mon as + +The user we want to run crm_mon as + + + + + +How frequently should we update the cluster status + +Update interval + + + + + +Additional options to pass to crm_mon. Eg. -n -r + +Extra options + + + + + +PID file location to ensure only one instance is running + +PID file + + + + + +Location to write HTML output to. + +HTML output + + + + + + + + + + + + +END +} + +####################################################################### + +ClusterMon_usage() { + cat </dev/null + if [ $? -eq 0 ]; then + : Yes, user exists. We can further check his permission on crm_mon if necessary + else + ocf_log err "The user $OCF_RESKEY_user does not exist!" + exit $OCF_ERR_ARGS + fi + fi + +# Pidfile better be an absolute path + case $OCF_RESKEY_pidfile in + /*) ;; + *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; + esac + +# Check the update interval + if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then + : + else + ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" + exit $OCF_ERR_ARGS + fi + + if CheckOptions $OCF_RESKEY_extra_options; then + : + else + ocf_log err "Invalid options $OCF_RESKEY_extra_options!" + exit $OCF_ERR_ARGS + fi + +# Htmlfile better be an absolute path + case $OCF_RESKEY_htmlfile in + /*) ;; + *) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;; + esac + + + echo "Validate OK" + return $OCF_SUCCESS +} + +if [ $# -ne 1 ]; then + ClusterMon_usage + exit $OCF_ERR_ARGS +fi + +OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) ClusterMon_start + ;; +stop) ClusterMon_stop + ;; +monitor) ClusterMon_monitor + ;; +validate-all) ClusterMon_validate + ;; +usage|help) ClusterMon_usage + exit $OCF_SUCCESS + ;; +*) ClusterMon_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? diff --git a/heartbeat/Delay b/heartbeat/Delay new file mode 100755 index 0000000..5aa8f46 --- /dev/null +++ b/heartbeat/Delay @@ -0,0 +1,227 @@ +#!/bin/sh +# +# +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# +# This script is a test resource for introducing delay. +# +# usage: $0 {start|stop|status|monitor|meta-data} +# +# OCF parameters are as below: +# OCF_RESKEY_startdelay +# OCF_RESKEY_stopdelay +# OCF_RESKEY_mondelay +# +# +# OCF_RESKEY_startdelay defaults to 20 (seconds) +# OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay +# OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay +# +# +# This is really a test resource script. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_startdelay_default="20" +OCF_RESKEY_stopdelay_default="30" +OCF_RESKEY_mondelay_default="30" + +: ${OCF_RESKEY_startdelay=${OCF_RESKEY_startdelay_default}} +: ${OCF_RESKEY_stopdelay=${OCF_RESKEY_stopdelay_default}} +: ${OCF_RESKEY_mondelay=${OCF_RESKEY_mondelay_default}} + +####################################################################### + +usage() { + cat <<-! + usage: $0 {start|stop|status|monitor|meta-data|validate-all} + ! +} + +meta_data() { + cat < + + +1.0 + + +This script is a test resource for introducing delay. + +Waits for a defined timespan + + + + + +How long in seconds to delay on start operation. + +Start delay + + + + + +How long in seconds to delay on stop operation. + +Stop delay + + + + + +How long in seconds to delay on monitor operation. + +Monitor delay + + + + + + + + + + + + + +END +} + +Delay_stat() { + ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor +} + +Delay_Status() { + if + Delay_stat + then + ocf_log info "Delay is running OK" + return $OCF_SUCCESS + else + ocf_log info "Delay is stopped" + return $OCF_NOT_RUNNING + fi +} + +Delay_Monitor() { + Delay_Validate_All -q + sleep $OCF_RESKEY_mondelay + Delay_Status +} + +Delay_Start() { + if + Delay_stat + then + ocf_log info "Delay already running." + return $OCF_SUCCESS + else + Delay_Validate_All -q + ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start + rc=$? + sleep $OCF_RESKEY_startdelay + if + [ $rc -ne 0 ] + then + return $OCF_ERR_PERM + fi + return $OCF_SUCCESS + fi +} + +Delay_Stop() { + if + Delay_stat + then + Delay_Validate_All -q + ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop + rc=$? + sleep $OCF_RESKEY_stopdelay + if + [ $rc -ne 0 ] + then + return $OCF_ERR_PERM + fi + return $OCF_SUCCESS + else + ocf_log info "Delay already stopped." + return $OCF_SUCCESS + fi +} + +# Check if all the arguments are valid numbers, a string is considered valid if: +# 1. It does not contain any character but digits and period "."; +# 2. The period "." does not occur more than once + +Are_Valid_Numbers() { + for i in "$@"; do + echo $i |grep -v "[^0-9.]" |grep -q -v "[.].*[.]" + if test $? -ne 0; then + return $OCF_ERR_ARGS + fi + done + return $OCF_SUCCESS +} + +Delay_Validate_All() { +# Be quiet when specified -q option _and_ validation succeded + getopts "q" option + + if test $option = "q"; then + quiet=yes + else + quiet=no + fi + shift $(($OPTIND -1)) + + if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \ + $OCF_RESKEY_mondelay; then + if test $quiet = "no"; then + echo "Validate OK" + fi +# _Return_ on validation success + return $OCF_SUCCESS + else + ocf_exit_reason "Some of the instance parameters are invalid" +# _Exit_ on validation failure + exit $OCF_ERR_ARGS + fi +} + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + start) Delay_Start + ;; + stop) Delay_Stop + ;; + monitor) Delay_Monitor + ;; + status) Delay_Status + ;; + validate-all) Delay_Validate_All + ;; + usage) usage + exit $OCF_SUCCESS + ;; + *) usage + exit $OCF_ERR_ARGS + ;; +esac +exit $? diff --git a/heartbeat/Dummy b/heartbeat/Dummy new file mode 100755 index 0000000..81a675d --- /dev/null +++ b/heartbeat/Dummy @@ -0,0 +1,186 @@ +#!/bin/sh +# +# +# Dummy OCF RA. Does nothing except track its own state. +# Use it only as a testing tool or example for how to write +# a resource agent. +# +# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_state_default="${HA_RSCTMP}/Dummy-${OCF_RESOURCE_INSTANCE}.state" +OCF_RESKEY_fake_default="dummy" + +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} +: ${OCF_RESKEY_fake=${OCF_RESKEY_fake_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This is a Dummy Resource Agent. It does absolutely nothing except +keep track of whether its running or not. +Its purpose in life is for testing and to serve as a template for RA writers. + +NB: Please pay attention to the timeouts specified in the actions +section below. They should be meaningful for the kind of resource +the agent manages. They should be the minimum advised timeouts, +but they shouldn't/cannot cover _all_ possible resource +instances. So, try to be neither overly generous nor too stingy, +but moderate. The minimum timeouts should never be below 10 seconds. + +Example stateless resource agent + + + + +Location to store the resource state in. + +State file + + + + + +Fake attribute that can be changed to cause a reload + +Fake attribute that can be changed to cause a reload + + + + + + + + + + + + + + + + +END +} + +####################################################################### + +dummy_usage() { + cat < + + +1.0 + + +Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- +Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster. + +Manages EVMS Shared Cluster Containers (SCCs) (deprecated) + + + + +If set to true, suppresses the deprecation warning for this agent. + +Suppress deprecation warning + + + + + + + + + + + + + +END +} + +EvmsSCC_status() +{ + # At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call. + return $OCF_NOT_RUNNING +} + +EvmsSCC_notify() +{ + local n_type="$OCF_RESKEY_CRM_meta_notify_type" + local n_op="$OCF_RESKEY_CRM_meta_notify_operation" + local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" + local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" + local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" + + case "$n_type" in + pre) + case "$n_op" in + start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start." + EvmsSCC_start_notify_common + ;; + esac + ;; + esac + + + return $OCF_SUCCESS +} + +EvmsSCC_start() +{ + local n_type="$OCF_RESKEY_CRM_meta_notify_type" + local n_op="$OCF_RESKEY_CRM_meta_notify_operation" + local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" + local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" + local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" + + ocf_log debug "EvmsSCC: Start: starting node(s): $n_start." + + EvmsSCC_start_notify_common + + return $OCF_SUCCESS +} + +EvmsSCC_stop() +{ + return $OCF_SUCCESS +} + +EvmsSCC_start_notify_common() +{ + local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} + ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself." + + n_active="$n_active $n_start" + case " $n_active " in + *" $n_myself "*) ;; + *) ocf_log err "EvmsSCC: $n_myself (local) not on active list!" + return $OCF_ERR_GENERIC + ;; + esac + + #pick the first node from the starting list + #when the cluster boots this will be one of the many booting nodes + #when a node later joins the cluster, this will be the joining node + local n_first=$(echo $n_start | cut -d ' ' -f 1) + ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first." + + if [ "$n_myself" = "$n_first" ] ; then + ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}." + while true ; do + if ! ${EVMSACTIVATE} -q 2> /dev/null ; then + SLEEP_TIME=$(($(ocf_maybe_random) % 40)) + ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again." + sleep $SLEEP_TIME + else + break + fi + done + + fi + + return $OCF_SUCCESS +} + +# Check the arguments passed to this script +if + [ $# -ne 1 ] +then + usage + exit $OCF_ERR_ARGS +fi + +OP=$1 + +case $OP in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + usage) usage + exit $OCF_SUCCESS + ;; +esac + +# Be obnoxious, log deprecation warning on every invocation (unless +# suppressed by resource configuration). +ocf_deprecated + +check_binary $CUT +check_binary $EVMSACTIVATE + +case $OP in + start) EvmsSCC_start + ;; + notify) EvmsSCC_notify + ;; + stop) EvmsSCC_stop + ;; + status|monitor) EvmsSCC_status + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $? diff --git a/heartbeat/Evmsd b/heartbeat/Evmsd new file mode 100755 index 0000000..6e30eae --- /dev/null +++ b/heartbeat/Evmsd @@ -0,0 +1,161 @@ +#!/bin/sh +# +# Evmsd OCF RA. +# +# Copyright (c) 2004 SUSE LINUX AG, Jo De Baer +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_ignore_deprecation_default="false" + +: ${OCF_RESKEY_ignore_deprecation=${OCF_RESKEY_ignore_deprecation_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- +This is a Evmsd Resource Agent. + +Controls clustered EVMS volume management +(deprecated) + + + + +If set to true, suppresses the deprecation warning for this agent. + +Suppress deprecation warning + + + + + + + + + + + +END +} + +####################################################################### + +evmsd_usage() { + cat </dev/null + + case "$?" in + # RHEL >= 9 + 1|2) + OCF_RESKEY_force_unmount_default="safe";; + # RHEL < 9 and fallback if ocf_version_cmp() fails + *) + OCF_RESKEY_fast_stop_default="yes";; + esac +fi + + +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} +: ${OCF_RESKEY_directory=${OCF_RESKEY_directory_default}} +: ${OCF_RESKEY_fstype=${OCF_RESKEY_fstype_default}} +: ${OCF_RESKEY_options=${OCF_RESKEY_options_default}} +: ${OCF_RESKEY_statusfile_prefix=${OCF_RESKEY_statusfile_prefix_default}} +: ${OCF_RESKEY_run_fsck=${OCF_RESKEY_run_fsck_default}} +if [ -z "${OCF_RESKEY_fast_stop}" ]; then + case "$OCF_RESKEY_fstype" in + gfs2) + OCF_RESKEY_fast_stop="no";; + *) + OCF_RESKEY_fast_stop=${OCF_RESKEY_fast_stop_default};; + esac +fi +: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} +: ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}} +: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}} +: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}} +: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}} + +# Variables used by multiple methods +HOSTOS=$(uname) +TAB=' ' + +# The status file is going to an extra directory, by default +# +prefix=${OCF_RESKEY_statusfile_prefix} +: ${prefix:=$DFLT_STATUSDIR} +suffix="${OCF_RESOURCE_INSTANCE}" +[ "$OCF_RESKEY_CRM_meta_clone" ] && + suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" +suffix="${suffix}_$(uname -n)" +STATUSFILE="${OCF_RESKEY_directory}/$prefix$suffix" + +####################################################################### + +usage() { + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|meta-data} + EOT +} + +meta_data() { + cat < + + +1.0 + + +Resource script for Filesystem. It manages a Filesystem on a +shared storage medium. + +The standard monitor operation of depth 0 (also known as probe) +checks if the filesystem is mounted. If you want deeper tests, +set OCF_CHECK_LEVEL to one of the following values: + +10: read first 16 blocks of the device (raw read) + +This doesn't exercise the filesystem at all, but the device on +which the filesystem lives. This is noop for non-block devices +such as NFS, SMBFS, or bind mounts. + +20: test if a status file can be written and read + +The status file must be writable by root. This is not always the +case with an NFS mount, as NFS exports usually have the +"root_squash" option set. In such a setup, you must either use +read-only monitoring (depth=10), export with "no_root_squash" on +your NFS server, or grant world write permissions on the +directory where the status file is to be placed. + +Manages filesystem mounts + + + + +The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. + +NOTE: On Linux /dev/disk/by-{uuid,label}/ are preferred to -U/-L. + +block device + + + + + +The mount point for the filesystem. + +mount point + + + + + +The type of filesystem to be mounted. + +filesystem type + + + + + +Any extra options to be given as -o options to mount. + +For bind mounts, add "bind" here and set fstype to "none". +We will do the right thing for options such as "bind,ro". + +options + + + + + +The prefix to be used for a status file for resource monitoring +with depth 20. If you don't specify this parameter, all status +files will be created in a separate directory. + +status file prefix + + + + + +Specify how to decide whether to run fsck or not. + +"auto" : decide to run fsck depending on the fstype(default) +"force" : always run fsck regardless of the fstype +"no" : do not run fsck ever. + +run_fsck + + + + + +Normally, we expect no users of the filesystem and the stop +operation to finish quickly. If you cannot control the filesystem +users easily and want to prevent the stop action from failing, +then set this parameter to "no" and add an appropriate timeout +for the stop operation. + +This defaults to "no" for GFS2 filesystems. + +fast stop + + + + + +The use of a clone setup for local filesystems is forbidden +by default. For special setups like glusterfs, cloning a mount +of a local device with a filesystem like ext4 or xfs independently +on several nodes is a valid use case. + +Only set this to "true" if you know what you are doing! + +allow running as a clone, regardless of filesystem type + + + + + +This option allows specifying how to handle processes that are +currently accessing the mount directory. + +"true" : Kill processes accessing mount point +"safe" : Kill processes accessing mount point using methods that + avoid functions that could potentially block during process + detection +"false" : Do not kill any processes. + +The 'safe' option uses shell logic to walk the /procs/ directory +for pids using the mount point while the default option uses the +fuser cli tool. fuser is known to perform operations that can potentially +block if unresponsive nfs mounts are in use on the system. + +Kill processes before unmount + + + + + +Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action. + +Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action + + + + + +Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action. + +Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action + + + + + +How many seconds to wait after sending term/kill signals to processes in stop-action. + +How many seconds to wait after sending term/kill signals to processes in stop-action + + + + + + + + + + + + + +END +} + +# +# Make sure the kernel does the right thing with the FS buffers +# This function should be called after unmounting and before mounting +# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt +# anything either... +# +# It's really a bug that you have to do this at all... +# +flushbufs() { + if have_binary $BLOCKDEV ; then + if [ "$blockdevice" = "yes" ] ; then + $BLOCKDEV --flushbufs $1 + return $? + fi + fi + return 0 +} + +# Take advantage of /etc/mtab if present, use portable mount command +# otherwise. Normalize format to "dev mountpoint fstype". +is_bind_mount() { + echo "$options" | grep -w bind >/dev/null 2>&1 +} + +list_mounts() { + local inpf="" + local mount_list="" + local check_list="x" + + if [ -e "/proc/mounts" ] && ! is_bind_mount; then + inpf=/proc/mounts + elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then + inpf=/etc/mtab + fi + + # Make sure that the mount list has not been changed while reading. + while [ "$mount_list" != "$check_list" ]; do + check_list="$mount_list" + if [ "$inpf" ]; then + # ... + # Spaces in device or mountpoint are octal \040 in $inpf + # Convert literal spaces (field separators) to tabs + mount_list=$(cut -d' ' -f1,2,3 < $inpf | tr ' ' "$TAB") + else + # on type ... + # Use tabs as field separators + match_string='\(.*\) on \(.*\) type \([^[:space:]]\+\) .*' + replace_string="\\1${TAB}\\2${TAB}\\3" + mount_list=$($MOUNT | sed "s/$match_string/$replace_string/g") + fi + done + + # Convert octal \040 to space characters + printf "$mount_list" +} + +determine_blockdevice() { + if [ $blockdevice = "yes" ]; then + return + fi + + # Get the current real device name, if possible. + # (specified devname could be -L or -U...) + case "$FSTYPE" in + nfs4|nfs|efs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre) + : ;; + *) + match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}" + DEVICE=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1) + if [ -b "$DEVICE" ]; then + blockdevice=yes + fi + ;; + esac +} + +# Lists all filesystems potentially mounted under a given path, +# excluding the path itself. +list_submounts() { + list_mounts | grep "${TAB}${1}/" | cut -d"$TAB" -f2 | sort -r +} + +# Lists all bind mounts of a given file system, +# excluding the path itself. +list_bindmounts() { + if is_bind_mount; then + # skip bind mount + # we should not umount the original file system via a bind mount + return + fi + + match_string="${TAB}${1}${TAB}" + if list_mounts | grep "$match_string" >/dev/null 2>&1; then + mount_disk=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1) + else + return + fi + + if [ -b "$mount_disk" ]; then + list_mounts | grep "$mount_disk" | grep -v "$match_string" | cut -d"$TAB" -f2 | sort -r + fi +} + +# kernels < 2.6.26 can't handle bind remounts +bind_kernel_check() { + echo "$options" | grep -w ro >/dev/null 2>&1 || + return + uname -r | awk -F. ' + $1==2 && $2==6 { + sub("[^0-9].*","",$3); + if ($3<26) + exit(1); + }' + [ $? -ne 0 ] && + ocf_log warn "kernel $(uname -r) cannot handle read only bind mounts" +} + +bind_root_mount_check() { + if [ "$(df -P "$1" | awk 'END{print $6}')" = "/" ]; then + return 1 + else + return 0 + fi +} + +bind_mount() { + if is_bind_mount && [ "$options" != "-o bind" ] + then + bind_kernel_check + bind_opts=$(echo "$options" | sed 's/bind/remount/') + $MOUNT $bind_opts "$MOUNTPOINT" + else + true # make sure to return OK + fi +} + +is_option() { + echo "$OCF_RESKEY_options" | grep -w "$1" >/dev/null 2>&1 +} + +is_fsck_needed() { + case $OCF_RESKEY_run_fsck in + force) true;; + no) false;; + ""|auto) + case "$FSTYPE" in + ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|efs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs) + false;; + *) + true;; + esac;; + *) + ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" + OCF_RESKEY_run_fsck="auto" + is_fsck_needed;; + esac +} + +fstype_supported() +{ + local support="$FSTYPE" + local rc + + if [ "X${HOSTOS}" = "XOpenBSD" ];then + # skip checking /proc/filesystems for obsd + return $OCF_SUCCESS + fi + + if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then + : No FSTYPE specified, rely on the system has the right file-system support already + return $OCF_SUCCESS + fi + + # support fuse-filesystems (e.g. GlusterFS) and Amazon Elastic File + # System (EFS) + case "$FSTYPE" in + fuse.*|glusterfs|rozofs) support="fuse";; + efs) check_binary "mount.efs"; support="nfs4";; + esac + + if [ "$support" != "$FSTYPE" ]; then + ocf_log info "Checking support for $FSTYPE as \"$support\"" + fi + + grep -w "$support"'$' /proc/filesystems >/dev/null + if [ $? -eq 0 ]; then + # found the fs type + return $OCF_SUCCESS + fi + + # if here, we should attempt to load the module and then + # check the if the filesystem support exists again. + $MODPROBE $support >/dev/null + if [ $? -ne 0 ]; then + ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems and failed to load kernel module" + return $OCF_ERR_INSTALLED + fi + + # It is possible for the module to load and not be complete initialized + # before we check /proc/filesystems again. Give this a few trys before + # giving up entirely. + for try in $(seq 5); do + grep -w "$support"'$' /proc/filesystems >/dev/null + if [ $? -eq 0 ] ; then + # yes. found the filesystem after doing the modprobe + return $OCF_SUCCESS + fi + ocf_log debug "Unable to find support for $support in /proc/filesystems after modprobe, trying again" + sleep 1 + done + + ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems" + return $OCF_ERR_INSTALLED +} + + +# +# In the case a fresh filesystem is just created from another +# node on the shared storage, and is not visible yet. Then try +# partprobe to refresh /dev/disk/by-{label,uuid}/* up to date. +# +# DEVICE can be /dev/xxx, -U, -L +# +trigger_udev_rules_if_needed() +{ + local refresh_flag="no" + local tmp + local timeout + + if [ $blockdevice = "yes" ]; then + tmp="$DEVICE" + if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then + refresh_flag="yes" + fi + else + tmp="$(echo $DEVICE|awk '{$1=""; print substr($0,2)}')" + case "$DEVICE" in + -U*|--uuid*) + tmp="/dev/disk/by-uuid/$tmp" + ;; + -L*|--label*) + tmp="/dev/disk/by-label/$tmp" + ;; + *) + # bind mount? + return ;; + esac + [ ! -b "$tmp" ] && refresh_flag="yes" + fi + + [ "$refresh_flag" = "no" ] && return + + have_binary partprobe && partprobe >/dev/null 2>&1 + timeout=${OCF_RESKEY_CRM_meta_timeout:="60000"} + timeout=$((timeout/1000)) + have_binary udevadm && udevadm settle -t $timeout --exit-if-exists=$tmp + + return $? +} + +# +# START: Start up the filesystem +# +Filesystem_start() +{ + # Check if there are any mounts mounted under the mountpoint + match_string="${TAB}${CANONICALIZED_MOUNTPOINT}" + if list_mounts | grep -E "$match_string/\w+" >/dev/null 2>&1; then + ocf_log err "There is one or more mounts mounted under $MOUNTPOINT." + return $OCF_ERR_CONFIGURED + fi + + # See if the device is already mounted. + if Filesystem_status >/dev/null 2>&1 ; then + ocf_log info "Filesystem $MOUNTPOINT is already mounted." + return $OCF_SUCCESS + fi + + fstype_supported || exit $OCF_ERR_INSTALLED + + # Check the filesystem & auto repair. + # NOTE: Some filesystem types don't need this step... Please modify + # accordingly + + trigger_udev_rules_if_needed + + if [ $blockdevice = "yes" ]; then + if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then + ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + exit $OCF_ERR_INSTALLED + fi + + if is_fsck_needed; then + ocf_log info "Starting filesystem check on $DEVICE" + if [ -z "$FSTYPE" ]; then + $FSCK -p "$DEVICE" + else + $FSCK -t "$FSTYPE" -p "$DEVICE" + fi + + # NOTE: if any errors at all are detected, it returns non-zero + # if the error is >= 4 then there is a big problem + if [ $? -ge 4 ]; then + ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE" + return $OCF_ERR_GENERIC + fi + fi + fi + + [ -d "$MOUNTPOINT" ] || + ocf_run mkdir -p "$MOUNTPOINT" + if [ ! -d "$MOUNTPOINT" ] ; then + ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point" + exit $OCF_ERR_INSTALLED + fi + + flushbufs "$DEVICE" + # Mount the filesystem. + case "$FSTYPE" in + none) $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" && + bind_mount + ;; + "") $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" ;; + *) $MOUNT -t "$FSTYPE" $options $device_opt "$DEVICE" "$MOUNTPOINT" ;; + esac + + if [ $? -ne 0 ]; then + ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} +# end of Filesystem_start + +get_pids() +{ + local dir=$1 + local procs + local mmap_procs + + if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_root_mount_check "$DEVICE"; then + ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'" + FORCE_UNMOUNT=safe + fi + + if ocf_is_true "$FORCE_UNMOUNT"; then + if [ "X${HOSTOS}" = "XOpenBSD" ];then + fstat | grep $dir | awk '{print $3}' + else + $FUSER -m $dir 2>/dev/null + fi + elif [ "$FORCE_UNMOUNT" = "safe" ]; then + procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}') + mmap_procs=$(grep " ${dir}/" /proc/[0-9]*/maps | awk -F/ '{print $3}') + printf "${procs}\n${mmap_procs}" | sort | uniq + fi +} + +signal_processes() { + local dir=$1 + local sig=$2 + local pids pid + # fuser returns a non-zero return code if none of the + # specified files is accessed or in case of a fatal + # error. + pids=$(get_pids "$dir") + if [ -z "$pids" ]; then + ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" + return + fi + for pid in $pids; do + ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)" + kill -s $sig $pid + done +} +try_umount() { + local SUB="$1" + $UMOUNT $umount_force "$SUB" + list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || { + ocf_log info "unmounted $SUB successfully" + return $OCF_SUCCESS + } + return $OCF_ERR_GENERIC +} +timeout_child() { + local pid="$1" timeout="$2" killer ret + + # start job in the background that will KILL the given process after timeout expires + sleep $timeout && kill -s KILL $pid & + killer=$! + + # block until the child process either exits on its own or gets killed by the above killer pipeline + wait $pid + ret=$? + + # ret would be 127 + child exit code if the timeout expired + [ $ret -lt 128 ] && kill -s KILL $killer + return $ret +} +fs_stop_loop() { + local SUB="$1" signals="$2" sig + while true; do + for sig in $signals; do + signal_processes "$SUB" $sig + done + sleep $OCF_RESKEY_signal_delay + try_umount "$SUB" && return $OCF_SUCCESS + done +} +fs_stop() { + local SUB="$1" timeout=$2 grace_time ret + grace_time=$((timeout/2)) + + # try gracefully terminating processes for up to half of the configured timeout + fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret + + # try killing them for the rest of the timeout + fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret + + # timeout expired + ocf_exit_reason "Couldn't unmount $SUB within given timeout" + return $OCF_ERR_GENERIC +} + +# +# STOP: Unmount the filesystem +# +Filesystem_stop() +{ + # See if the device is currently mounted + Filesystem_status >/dev/null 2>&1 + if [ $? -eq $OCF_NOT_RUNNING ]; then + # Already unmounted, wonderful. + rc=$OCF_SUCCESS + else + # Wipe the status file, but continue with a warning if + # removal fails -- the file system might be read only + if [ $OCF_CHECK_LEVEL -eq 20 ]; then + rm -f "${STATUSFILE}" + if [ $? -ne 0 ]; then + ocf_log warn "Failed to remove status file ${STATUSFILE}." + fi + fi + + # Determine the real blockdevice this is mounted on (if + # possible) prior to unmounting. + determine_blockdevice + + # For networked filesystems, there's merit in trying -f: + case "$FSTYPE" in + nfs4|nfs|efs|cifs|smbfs) umount_force="-f" ;; + esac + + # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. + local timeout + while read SUB; do + ocf_log info "Trying to unmount $SUB" + if ocf_is_true "$FAST_STOP"; then + timeout=6 + else + timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} + timeout=$((timeout/1000)) + fi + fs_stop "$SUB" $timeout + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Couldn't unmount $SUB, giving up!" + fi + done <<-EOF + $(list_submounts "$CANONICALIZED_MOUNTPOINT"; \ + list_bindmounts "$CANONICALIZED_MOUNTPOINT"; \ + echo $CANONICALIZED_MOUNTPOINT) + EOF + fi + + flushbufs "$DEVICE" + + return $rc +} +# end of Filesystem_stop + +# +# STATUS: is the filesystem mounted or not? +# +Filesystem_status() +{ + match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}" + if list_mounts | grep "$match_string" >/dev/null 2>&1; then + rc=$OCF_SUCCESS + msg="$MOUNTPOINT is mounted (running)" + else + rc=$OCF_NOT_RUNNING + msg="$MOUNTPOINT is unmounted (stopped)" + fi + + # Special case "monitor" to check whether the UUID cached and + # on-disk still match? + case "$OP" in + status) ocf_log info "$msg";; + esac + + return $rc +} +# end of Filesystem_status + + +# Note: the read/write tests below will stall in case the +# underlying block device (or in the case of a NAS mount, the +# NAS server) has gone away. In that case, if I/O does not +# return to normal in time, the operation hits its timeout +# and it is up to the CRM to initiate appropriate recovery +# actions (such as fencing the node). +# +# MONITOR 10: read the device +# +Filesystem_monitor_10() +{ + if [ "$blockdevice" = "no" ] ; then + ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" + return $OCF_SUCCESS + fi + dd_opts="iflag=direct bs=4k count=1" + err_output=$(dd if="$DEVICE" $dd_opts 2>&1 >/dev/null) + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to read device $DEVICE" + ocf_log err "dd said: $err_output" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} +# +# MONITOR 20: write and read a status file +# +Filesystem_monitor_20() +{ + if [ "$blockdevice" = "no" ] ; then + # O_DIRECT not supported on cifs/smbfs + dd_opts="oflag=sync bs=4k conv=fsync,sync" + else + # Writing to the device in O_DIRECT mode is imperative + # to bypass caches. + dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" + fi + status_dir=$(dirname "$STATUSFILE") + [ -d "$status_dir" ] || mkdir -p "$status_dir" + err_output=$(echo "${OCF_RESOURCE_INSTANCE}" | dd of="${STATUSFILE}" $dd_opts 2>&1) + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to write status file ${STATUSFILE}" + ocf_log err "dd said: $err_output" + return $OCF_ERR_GENERIC + fi + test -f "${STATUSFILE}" + if [ $? -ne 0 ]; then + ocf_exit_reason "Cannot stat the status file ${STATUSFILE}" + return $OCF_ERR_GENERIC + fi + cat "${STATUSFILE}" > /dev/null + if [ $? -ne 0 ]; then + ocf_exit_reason "Cannot read the status file ${STATUSFILE}" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} +Filesystem_monitor() +{ + Filesystem_status + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then + case "$OCF_CHECK_LEVEL" in + 10) Filesystem_monitor_10; rc=$?;; + 20) Filesystem_monitor_20; rc=$?;; + *) + ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" + rc=$OCF_ERR_CONFIGURED + ;; + esac + fi + return $rc +} +# end of Filesystem_monitor + + +# +# VALIDATE_ALL: Are the instance parameters valid? +# FIXME!! The only part that's useful is the return code. +# This code always returns $OCF_SUCCESS (!) +# FIXME!! Needs some tuning to match fstype_supported() (e.g., for +# fuse). Can we just call fstype_supported() with a flag like +# "no_modprobe" instead? +# +Filesystem_validate_all() +{ + # Check if the $FSTYPE is workable + # NOTE: Without inserting the $FSTYPE module, this step may be imprecise + # TODO: This is Linux specific crap. + if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then + cut -f2 /proc/filesystems | grep "^${FSTYPE}$" >/dev/null 2>&1 + if [ $? -ne 0 ]; then + modpath=/lib/modules/$(uname -r) + moddep=$modpath/modules.dep + # Do we have $FSTYPE in modules.dep? + cut -d' ' -f1 $moddep \ + | grep "^${modpath}.*${FSTYPE}\.k\?o:$" >/dev/null 2>&1 + if [ $? -ne 0 ]; then + ocf_log info "It seems we do not have $FSTYPE support" + fi + fi + fi + + # If we are supposed to do monitoring with status files, then + # we need a utility to write in O_DIRECT mode. + if [ $OCF_CHECK_LEVEL -gt 0 ]; then + check_binary dd + # Note: really old coreutils version do not support + # the "oflag" option for dd. We don't check for that + # here. In case dd does not support oflag, monitor is + # bound to fail, with dd spewing an error message to + # the logs. On such systems, we must do without status + # file monitoring. + fi + + #TODO: How to check the $options ? + return $OCF_SUCCESS +} + +# +# set the blockdevice variable to "no" or "yes" +# +set_blockdevice_var() { + blockdevice=no + + # these are definitely not block devices + case "$FSTYPE" in + nfs4|nfs|efs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;; + esac + + if $(is_option "loop"); then + return + fi + + case "$DEVICE" in + --uuid=*|--uuid\ *|--label=*|--label\ *) + device_opt=$(echo $DEVICE | sed "s/\([[:blank:]]\|=\).*//") + DEVICE=$(echo $DEVICE | sed -E "s/$device_opt([[:blank:]]*|=)//") + ;; + -U*|-L*) # short versions of --uuid/--label + device_opt=$(echo $DEVICE | cut -c1-2) + DEVICE=$(echo $DEVICE | sed "s/$device_opt[[:blank:]]*//") + ;; + /dev/null) # Special case for BSC + blockdevice=yes + ;; + *) + if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then + ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + fi + if [ ! -d "$DEVICE" ]; then + blockdevice=yes + fi + ;; + esac +} + +# Check the arguments passed to this script +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +# Check the OCF_RESKEY_ environment variables... +FORCE_UNMOUNT="yes" +if [ -n "${OCF_RESKEY_force_unmount}" ]; then + FORCE_UNMOUNT=$OCF_RESKEY_force_unmount +fi + +DEVICE="$OCF_RESKEY_device" +FSTYPE=$OCF_RESKEY_fstype +if [ ! -z "$OCF_RESKEY_options" ]; then + options="-o $OCF_RESKEY_options" +fi +FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} + +OP=$1 + +# These operations do not require instance parameters +case $OP in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + usage) usage + exit $OCF_SUCCESS + ;; +esac + +if [ x = x"$DEVICE" ]; then + ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed" + exit $OCF_ERR_CONFIGURED +fi + +set_blockdevice_var + +# Normalize instance parameters: + +# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". +# But the output of `mount` and /proc/mounts do not. +if [ -z "$OCF_RESKEY_directory" ]; then + if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then + ocf_exit_reason "Please specify the directory" + exit $OCF_ERR_CONFIGURED + fi +else + MOUNTPOINT="$(echo "$OCF_RESKEY_directory" | sed 's/\/*$//')" + : ${MOUNTPOINT:=/} + if [ -e "$MOUNTPOINT" ] ; then + CANONICALIZED_MOUNTPOINT="$(readlink -f "$MOUNTPOINT")" + if [ $? -ne 0 ]; then + ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed" + exit $OCF_ERR_GENERIC + fi + else + CANONICALIZED_MOUNTPOINT="$MOUNTPOINT" + fi + # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" + # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll + # kill the whole system. Is that a good idea? +fi + +# Check to make sure the utilites are found +if [ "X${HOSTOS}" != "XOpenBSD" ];then +check_binary $MODPROBE +check_binary $FUSER +fi +check_binary $FSCK +check_binary $MOUNT +check_binary $UMOUNT + +if [ "$OP" != "monitor" ]; then + ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" +fi + +case $OP in + status) Filesystem_status + exit $? + ;; + monitor) Filesystem_monitor + exit $? + ;; + validate-all) Filesystem_validate_all + exit $? + ;; + stop) Filesystem_stop + exit $? + ;; +esac + +CLUSTERSAFE=0 +is_option "ro" && + CLUSTERSAFE=2 + +case "$FSTYPE" in +nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre) + CLUSTERSAFE=1 # this is kind of safe too + systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" + ;; +# add here CLUSTERSAFE=0 for all filesystems which are not +# cluster aware and which, even if when mounted read-only, +# could still modify parts of it such as journal/metadata +ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) + if ocf_is_true "$OCF_RESKEY_force_clones"; then + CLUSTERSAFE=2 + systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" + else + CLUSTERSAFE=0 # these are not allowed + fi + ;; +esac + +if ocf_is_clone; then + case $CLUSTERSAFE in + 0) + ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" + ocf_log err "DO NOT RUN IT AS A CLONE!" + ocf_log err "Politely refusing to proceed to avoid data corruption." + exit $OCF_ERR_CONFIGURED + ;; + 2) + ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" + if ocf_is_true "$OCF_RESKEY_force_clones"; then + ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." + else + ocf_log warn "But we'll let it run because it is mounted read-only." + ocf_log warn "Please make sure that it's meta data is read-only too!" + fi + ;; + esac +fi + +case $OP in + start) Filesystem_start + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; + esac +exit $? + + diff --git a/heartbeat/ICP b/heartbeat/ICP new file mode 100755 index 0000000..0bf37de --- /dev/null +++ b/heartbeat/ICP @@ -0,0 +1,304 @@ +#!/bin/sh +# +# +# ICP +# +# Description: Manages an ICP Vortex clustered host drive as an HA resource +# +# +# Author: Lars Marowsky-Bree +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: (C) 2002 SuSE Linux AG +# +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname +# +# Notice that you will need to get the utility "icpclucon" from the ICP +# support to use this. +# +# See usage() function below for more details... +# +# OCF parameters are as below: +# OCF_RESKEY_driveid +# OCF_RESKEY_device + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_driveid_default="" +OCF_RESKEY_device_default="" + +: ${OCF_RESKEY_driveid=${OCF_RESKEY_driveid_default}} +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} + +####################################################################### + +# +ICPCLUCON=/usr/sbin/icpclucon +# + +usage() { + methods=`ICP_methods | grep -v methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-! + usage: $0 ($methods) + + $0 manages an ICP Vortex clustered host drive. + + The 'start' operation reserves the given host drive. + The 'stop' operation releses the given host drive. + The 'status' operation reports whether the host drive is reserved. + The 'monitor' operation reports whether the host drive is reserved. + The 'validate-all' operation reports whether OCF instance parameters are valid. + The 'methods' operation reports on the methods $0 supports + + ! +} + +meta_data() { + cat < + + +1.0 + + +Resource script for ICP. It Manages an ICP Vortex clustered host drive as an +HA resource. + +Manages an ICP Vortex clustered host drive + + + + +The ICP cluster drive ID. + +ICP cluster drive ID + + + + + +The device name. + +device + + + + + + + + + + + + + +END +} + +# +# methods: What methods/operations do we support? +# +ICP_methods() { + cat <<-! + start + stop + status + monitor + methods + validate-all + meta-data + usage + ! +} + +ICP_status() { + local icp_out + + icp_out=$($ICPCLUCON -v -status $1) + if [ $? -ne 0 ]; then + ocf_log "err" "Hostdrive not reserved by us." + return $OCF_ERR_GENERIC + fi + + if expr match "$icp_out" \ + '.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then + ocf_log "info" "Volume $1 is reserved by us." + return $OCF_SUCCESS + elif expr match "$icp_out" \ + '.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then + ocf_log "err" "Volume $1 not reserved by any host." + return $OCF_NOT_RUNNING + else + ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:" + ocf_log "err" "$icp_out" + return $OCF_NOT_RUNNING + fi +} + +ICP_report_status() { + if ICP_status $1 ; then + echo "$1: running" + return $OCF_SUCCESS + else + echo "$1: not running" + return $OCF_NOT_RUNNING + fi +} + + +# +# Monitor the host drive - does it really seem to be working? +# +# +ICP_monitor() { + + if + ICP_status $1 + then + return $? + else + ocf_log "err" "ICP host drive $1 is offline" + return $OCF_NOT_RUNNING + fi + +} + +Clear_bufs() { + $BLOCKDEV --flushbufs $1 +} + +# +# Enable ICP host drive +# +ICP_start() { + + ocf_log "info" "Activating host drive $1" + ocf_run $ICPCLUCON -v -reserve $1 + if [ $? -ne 0 ]; then + ocf_log "info" "Forcing reservation of $1" + ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC + fi + + if + ICP_status $1 + then + : OK + # A reservation isn't as prompt as it should be + sleep 3 + return $OCF_SUCCESS + else + ocf_log "err" "ICP: $1 was not reserved correctly" + return $OCF_ERR_GENERIC + fi +} + +# +# Release the ICP host drive +# +ICP_stop() { + + ocf_log "info" "Releasing ICP host drive $1" + ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC + + ocf_log "info" "Verifying reservation" + if ICP_status $1 ; then + ocf_log "err" "ICP: $1 was not released correctly" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} + +ICP_validate_all() { + check_binary $BLOCKDEV + check_binary $ICPCLUCON + $ICPCLUCON -v -status $driveid >/dev/null 2>&1 + if [ $? -ne 0 ]; then + ocf_log err "Invalid driveid $driveid" + exit $OCF_ERR_ARGS + fi + + if [ ! -b $device ]; then + ocf_log err "Device $device is not a block device" + exit $OCF_ERR_ARGS + fi + +# Do not know how to check the association of $device with $driveid. + + return $OCF_SUCCESS +} + +# +# 'main' starts here... +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations do not require OCF instance parameters to be set +case "$1" in + + meta-data) meta_data + exit $OCF_SUCCESS;; + + methods) ICP_methods + exit $OCF_SUCCESS;; + + usage) usage + exit $OCF_SUCCESS;; + + *) ;; +esac + +if + [ -z "$OCF_RESKEY_driveid" ] +then + ocf_log err "Please specify OCF_RESKEY_driveid" + exit $OCF_ERR_ARGS +fi + +if [ -z "$OCF_RESKEY_device" ]; then + ocf_log err "Please specify OCF_RESKEY_device" + exit $OCF_ERR_ARGS +fi + +driveid=$OCF_RESKEY_driveid +device=$OCF_RESKEY_device + +# What kind of method was invoked? +case "$1" in + + start) ICP_validate_all + ICP_start $driveid + Clear_bufs $device + exit $?;; + + stop) ICP_stop $driveid + Clear_bufs $device + exit $?;; + + status) ICP_report_status $driveid + exit $?;; + + monitor) ICP_monitor $driveid + exit $?;; + + validate-all) ICP_validate_all + exit $?;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr new file mode 100755 index 0000000..9b0ea81 --- /dev/null +++ b/heartbeat/IPaddr @@ -0,0 +1,912 @@ +#!/bin/sh +# +# License: GNU General Public License (GPL) +# Support: users@clusterlabs.org +# +# This script manages IP alias IP addresses +# +# It can add an IP alias, or remove one. +# +# usage: $0 {start|stop|status|monitor|validate-all|meta-data} +# +# The "start" arg adds an IP alias. +# +# Surprisingly, the "stop" arg removes one. :-) +# +# OCF parameters are as below +# OCF_RESKEY_ip +# OCF_RESKEY_broadcast +# OCF_RESKEY_nic +# OCF_RESKEY_cidr_netmask +# OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) +# OCF_RESKEY_ARP_INTERVAL_MS +# OCF_RESKEY_ARP_REPEAT +# OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) +# OCF_RESKEY_ARP_NETMASK +# OCF_RESKEY_local_start_script +# OCF_RESKEY_local_stop_script +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="eth0" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_broadcast_default="" +OCF_RESKEY_iflabel_default="" +OCF_RESKEY_lvs_support_default="false" +OCF_RESKEY_local_stop_script_default="" +OCF_RESKEY_local_start_script_default="" +OCF_RESKEY_ARP_INTERVAL_MS_default="500" +OCF_RESKEY_ARP_REPEAT_default="10" +OCF_RESKEY_ARP_BACKGROUND_default="yes" +OCF_RESKEY_ARP_NETMASK_default="ffffffffffff" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} +: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} +: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} +: ${OCF_RESKEY_local_stop_script=${OCF_RESKEY_local_stop_script_default}} +: ${OCF_RESKEY_local_start_script=${OCF_RESKEY_local_start_script_default}} +: ${OCF_RESKEY_ARP_INTERVAL_MS=${OCF_RESKEY_ARP_INTERVAL_MS_default}} +: ${OCF_RESKEY_ARP_REPEAT=${OCF_RESKEY_ARP_REPEAT_default}} +: ${OCF_RESKEY_ARP_BACKGROUND=${OCF_RESKEY_ARP_BACKGROUND_default}} +: ${OCF_RESKEY_ARP_NETMASK=${OCF_RESKEY_ARP_NETMASK_default}} + +SENDARP=$HA_BIN/send_arp +FINDIF=$HA_BIN/findif +VLDIR=$HA_RSCTMP +SENDARPPIDDIR=$HA_RSCTMP +SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" +USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + +####################################################################### + +SYSTYPE="`uname -s`" +case "$SYSTYPE" in + SunOS) + # `uname -r` = 5.9 -> SYSVERSION = 9 + SYSVERSION="`uname -r | cut -d. -f 2`" + ;; + Darwin) + # Treat Darwin the same as the other BSD variants (matched as *BSD) + SYSTYPE="${SYSTYPE}BSD" + ;; + *) + ;; +esac + + + +meta_data() { + cat < + + +1.0 + +This script manages IP alias IP addresses +It can add an IP alias, or remove one. + +Manages virtual IPv4 addresses (portable version) + + + + +The IPv4 address to be configured in dotted quad notation, for example +"192.168.1.1". + +IPv4 address + + + + +The base network interface on which the IP address will be brought +online. + +If left empty, the script will try and determine this from the +routing table. + +Do NOT specify an alias interface in the form eth0:1 or anything here; +rather, specify the base interface only. + +Prerequisite: + +There must be at least one static IP address, which is not managed by +the cluster, assigned to the network interface. + +If you can not assign any static IP address on the interface, +modify this kernel parameter: +sysctl -w net.ipv4.conf.all.promote_secondaries=1 +(or per device) + + +Network interface + + + + + +The netmask for the interface in CIDR format. (ie, 24), or in +dotted quad notation 255.255.255.0). + +If unspecified, the script will also try to determine this from the +routing table. + +Netmask + + + + + +Broadcast address associated with the IP. If left empty, the script will +determine this from the netmask. + +Broadcast address + + + + + +You can specify an additional label for your IP address here. + +Interface label + + + + + +Enable support for LVS Direct Routing configurations. In case a IP +address is stopped, only move it to the loopback device to allow the +local node to continue to service requests, but no longer advertise it +on the network. + +Enable support for LVS DR + + + + + +Script called when the IP is released + +Script called when the IP is released + + + + + +Script called when the IP is added + +Script called when the IP is added + + + + + +milliseconds between ARPs + +milliseconds between gratuitous ARPs + + + + + +How many gratuitous ARPs to send out when bringing up a new address + +repeat count + + + + + +run in background (no longer any reason to do this) + +run in background + + + + + +netmask for ARP - in nonstandard hexadecimal format. + +netmask for ARP + + + + + + + + + + + + + +END + exit $OCF_SUCCESS +} + +# The 'ping' command takes highly OS-dependent arguments, so this +# function creates a suitable argument list for the host OS's 'ping'. +# We use a subset of its functionality: +# 1. single packet +# 2. reasonable timeout (say 1 second) +# +# arguments: +# $1: IP address to ping +# result string: +# arguments for ping command +# +# If more flexibility is needed, they could be specified in the environment +# to this function, to adjust the resulting 'ping' arguments. +# David Lee May 2007 +pingargs() { + _baseip=$1 + _timeout=1 # seconds + _pktcount=1 + _systype="`uname -s`" + case $_systype in + Linux) + # Default is perpetual ping: need "-c $_pktcount". + # -c count -t timetolive -q(uiet) -n(umeric) -W timeout + _pingargs="-c $_pktcount -q -n $_baseip" + ;; + SunOS) + # Default is immediate (or timeout) return. + _pingargs="$_baseip $_timeout" + ;; + *) + _pingargs="-c $_pktcount $_baseip" + ;; + esac + + echo "$_pingargs" +} + +# On Linux systems the (hidden) loopback interface may +# conflict with the requested IP address. If so, this +# unoriginal code will remove the offending loopback address +# and save it in VLDIR so it can be added back in later +# when the IPaddr is released. +# +lvs_remove_conflicting_loopback() { + ipaddr="$1" + ifname="$2" + + ocf_log info "Removing conflicting loopback $ifname." + if + echo $ifname > "$VLDIR/$ipaddr" + then + : Saved loopback information in $VLDIR/$ipaddr + else + ocf_log err "Could not save conflicting loopback $ifname." \ + "it will not be restored." + fi + + if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then + if [ -x "${OCF_RESKEY_local_stop_script}" ]; then + ${OCF_RESKEY_local_stop_script} $* + fi + fi + + delete_interface "$ifname" "$ipaddr" + + # Forcibly remove the route (if it exists) to the loopback. + delete_route "$ipaddr" +} + +# +# On Linux systems the (hidden) loopback interface may +# need to be restored if it has been taken down previously +# by lvs_remove_conflicting_loopback() +# +lvs_restore_loopback() { + ipaddr="$1" + + if [ ! -s "$VLDIR/$ipaddr" ]; then + return + fi + + ifname=`cat "$VLDIR/$ipaddr"` + ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." + + CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" + if + NICINFO=`eval $CMD` + NICINFO=`echo $NICINFO | tr " " " " | tr -s " "` + then + netmask_text=`echo "$NICINFO" | cut -f3 -d " "` + broadcast=`echo "$NICINFO" | cut -f5 -d " "` + else + echo "ERROR: $CMD failed (rc=$rc)" + exit $OCF_ERR_GENERIC + fi + + add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast + rm -f "$VLDIR/$ipaddr" +} + +# +# Find out which alias serves the given IP address +# The argument is an IP address, and its output +# is an aliased interface name (e.g., "eth0:0"). +# +find_interface_solaris() { + ipaddr="$1" + + $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | + while read ifname linkstuff + do + : ifname = $ifname + read inet addr junk + : inet = $inet addr = $addr + while + read line && [ "X$line" != "X" ] + do + : Nothing + done + + case $ifname in + *:*) ;; + *) continue;; + esac + + # This doesn't look right for a box with multiple NICs. + # It looks like it always selects the first interface on + # a machine. Yet, we appear to use the results for this case too... + ifname=`echo "$ifname" | sed s'%:$%%'` + + case $addr in + addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; + $ipaddr) echo $ifname; return $OCF_SUCCESS;; + esac + done + return $OCF_ERR_GENERIC +} + +find_interface_bsd() { + $IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" ' + /UP,/ && $0 ~ /^[a-z]+[0-9]:/ { + if_name=$1; sub(":$","",if_name); + } + $1 == "inet" && $2 == ip_addr { + print if_name + exit(0) + }' + +} + +# +# Find out which alias serves the given IP address +# The argument is an IP address, and its output +# is an aliased interface name (e.g., "eth0:0"). +# +find_interface_generic() { + ipaddr="$1" + $IFCONFIG $IFCONFIG_A_OPT | + while read ifname linkstuff + do + : Read gave us ifname = $ifname + + read inet addr junk + : Read gave us inet = $inet addr = $addr + + while + read line && [ "X$line" != "X" ] + do + : Nothing + done + + case $ifname in + *:*) ifname=`echo $ifname | sed 's/:$//'`;; + *) continue;; + esac + + : "comparing $ipaddr to $addr (from ifconfig)" + case $addr in + addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; + $ipaddr) echo $ifname; return $OCF_SUCCESS;; + esac + done + return $OCF_ERR_GENERIC +} + +# +# Find out which alias serves the given IP address +# The argument is an IP address, and its output +# is an aliased interface name (e.g., "eth0:0"). +# +find_interface() { + ipaddr="$1" + case "$SYSTYPE" in + SunOS) + NIC=`find_interface_solaris $ipaddr`;; + *BSD) + NIC=`find_interface_bsd $ipaddr`;; + *) + NIC=`find_interface_generic $ipaddr`;; + esac + + echo $NIC + return $OCF_SUCCESS; +} + +# +# Find an unused interface/alias name for us to use for new IP alias +# The argument is an IP address, and the output +# is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). +# +find_free_interface() { + NIC="$1" + + if [ "X$NIC" = "X" ]; then + ocf_log err "No free interface found for $OCF_RESKEY_ip" + return $OCF_ERR_GENERIC; + fi + + NICBASE="$VLDIR/IPaddr-$NIC" + touch "$NICBASE" + + case "$SYSTYPE" in + *BSD) + echo $NIC; + return $OCF_SUCCESS;; + SunOS) + j=1 + IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ + grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; + *) + j=0 + IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ + grep "^$NIC:[0-9]" | sed 's% .*%%'` + TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` + if [ -f "${NICBASE}:${TRYADRCNT}" ]; then + : OK + else + j="${TRYADRCNT}" + fi + ;; + esac + + IFLIST=" `echo $IFLIST` " + while + [ $j -lt 512 ] + do + case $IFLIST in + *" "$NIC:$j" "*) + ;; + *) + NICLINK="$NICBASE:$j" + if + ln "$NICBASE" "$NICLINK" 2>/dev/null + then + echo "$NIC:$j" + return $OCF_SUCCESS + fi + ;; + esac + j=`expr $j + 1` + done + return $OCF_ERR_GENERIC +} + +delete_route () { + ipaddr="$1" + + case "$SYSTYPE" in + SunOS) return 0;; + *BSD) CMD="$ROUTE -n delete -host $ipaddr";; + *) CMD="$ROUTE -n del -host $ipaddr";; + esac + + $CMD + + return $? +} + +delete_interface () { + ifname="$1" + ipaddr="$2" + + case "$SYSTYPE" in + SunOS) + if [ "$SYSVERSION" -ge 8 ] ; then + CMD="$IFCONFIG $ifname unplumb" + else + CMD="$IFCONFIG $ifname 0 down" + fi;; + Darwin*) + CMD="$IFCONFIG $ifname $ipaddr delete";; + *BSD) + CMD="$IFCONFIG $ifname inet $ipaddr delete";; + *) + CMD="$IFCONFIG $ifname down";; + esac + + ocf_log info "$CMD" + $CMD + + return $? +} + + +add_interface () { + ipaddr="$1" + iface_base="$2" + iface="$3" + netmask="$4" + broadcast="$5" + + if [ $# != 5 ]; then + ocf_log err "Insufficient arguments to add_interface: $*" + exit $OCF_ERR_ARGS + fi + + case "$SYSTYPE" in + SunOS) + if [ "$SYSVERSION" -ge 8 ] ; then + $IFCONFIG $iface plumb + rc=$? + if [ $rc -ne 0 ] ; then + echo "ERROR: '$IFCONFIG $iface plumb' failed." + return $rc + fi + fi + # At Solaris 10, this single-command version sometimes broke. + # Almost certainly an S10 bug. + # CMD="$IFCONFIG $iface inet $ipaddr $text up" + # So hack the following workaround: + CMD="$IFCONFIG $iface inet $ipaddr" + CMD="$CMD && $IFCONFIG $iface netmask $netmask" + CMD="$CMD && $IFCONFIG $iface up" + ;; + + *BSD) + # netmask is always set to 255.255.255.255 for an alias + CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; + *) + CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; + esac + + # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. + ocf_log info "eval $CMD" + eval $CMD + rc=$? + if [ $rc != 0 ]; then + echo "ERROR: eval $CMD failed (rc=$rc)" + fi + + return $rc +} + +# +# Remove the IP alias for the requested IP address... +# +ip_stop() { + SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" + NIC=`find_interface $OCF_RESKEY_ip` + + if [ -f "$SENDARPPIDFILE" ]; then + cat "$SENDARPPIDFILE" | xargs kill + rm -f "$SENDARPPIDFILE" + fi + + if [ -z "$NIC" ]; then + : Requested interface not in use + return $OCF_SUCCESS + fi + + if [ ${OCF_RESKEY_lvs_support} = 1 ]; then + case $NIC in + lo*) + : Requested interface is on loopback + return $OCF_SUCCESS;; + esac + fi + + delete_route "$OCF_RESKEY_ip" + delete_interface "$NIC" "$OCF_RESKEY_ip" + rc=$? + + if [ ${OCF_RESKEY_lvs_support} = 1 ]; then + lvs_restore_loopback "$OCF_RESKEY_ip" + fi + + # remove lock file... + rm -f "$VLDIR/IPaddr-$NIC" + + if [ $rc != 0 ]; then + ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc" + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS +} + + +# +# Add an IP alias for the requested IP address... +# +# It could be that we already have taken it, in which case it should +# do nothing. +# + +ip_start() { + # + # Do we already service this IP address? + # + ip_status_internal + if [ $? = $OCF_SUCCESS ]; then + # Nothing to do, the IP is already active + return $OCF_SUCCESS; + fi + + NIC_unique=`find_free_interface $OCF_RESKEY_nic` + if [ -n "$NIC_unique" ]; then + : OK got interface [$NIC_unique] for $OCF_RESKEY_ip + else + return $OCF_ERR_GENERIC + fi + + # This logic is mostly to support LVS (If I understand it correctly) + if [ ${OCF_RESKEY_lvs_support} = 1 ]; then + NIC_current=`find_interface $OCF_RESKEY_ip` + case $NIC_unique in + lo*) + if [ x"$NIC_unique" = x"$NIC_current" ]; then + # Its already "running" and not moving, nothing to do. + ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" + return $OCF_ERR_GENERIC + fi;; + *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; + esac + fi + + if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then + if [ -x "${OCF_RESKEY_local_start_script}" ]; then + ${OCF_RESKEY_local_start_script} $* + fi + fi + + add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ + "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" + rc=$? + if [ $rc != 0 ]; then + ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc" + return $rc + fi + + # The address is active, now notify others about it using sendarp + + if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then + # Darwin can't send ARPs on loopback devices + SENDARP="x$SENDARP" # Prevent the binary from being found + fi + + if [ -x $SENDARP ]; then + TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` + SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" + + ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" + ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" + ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" + + ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" + case $OCF_RESKEY_ARP_BACKGROUND in + yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; + *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; + esac + fi + + ip_status_internal + return $? +} + +ip_status_internal() { + NIC=`find_interface "$OCF_RESKEY_ip"` + + if [ "x$NIC" = x ]; then + return $OCF_NOT_RUNNING + + elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then + case $NIC in + lo*) return $OCF_NOT_RUNNING;; + *) return $OCF_SUCCESS;; + esac + else + if [ x$OCF_RESKEY_nic != x ]; then + simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` + simple_NIC=`echo $NIC | awk -F: '{print $1}'` + if [ $simple_OCF_NIC != $simple_NIC ]; then + ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" + return $OCF_ERR_GENERIC + fi + fi + return $OCF_SUCCESS + fi +} + +ip_status() { + ip_status_internal + rc=$? + if [ $rc = $OCF_SUCCESS ]; then + echo "running" + elif [ $rc = $OCF_NOT_RUNNING ]; then + echo "stopped" + else + echo "unknown" + fi + return $rc; +} + +# +# Determine if this IP address is really being served, or not. +# Note that we must distinguish if *we're* serving it locally... +# +ip_monitor() { + ip_status_internal + rc=$? + + if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then + return $rc + fi + + ocf_log info "Checking IP stack" + + PINGARGS="`pingargs $OCF_RESKEY_ip`" + for j in 1 2 3 4 5 6 7 8 9 10; do + MSG=`$PING $PINGARGS 2>&1` + if [ $? = 0 ]; then + return $OCF_SUCCESS + fi + done + + ocf_log err "$MSG" + return $OCF_ERR_GENERIC +} + +is_positive_integer() { + ocf_is_decimal $1 && [ $1 -ge 1 ] + if [ $? = 0 ]; then + return 1 + fi + return 0 +} + +ip_validate_all() { + check_binary $AWK + check_binary $IFCONFIG + check_binary $ROUTE + check_binary $PING + + if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS + then + ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" + return $OCF_ERR_ARGS + fi + + if is_positive_integer $OCF_RESKEY_ARP_REPEAT + then + ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" + return $OCF_ERR_ARGS + fi + + if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then + : + else + if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then + ocf_log err "$SYSTYPE does not support LVS" + return $OCF_ERR_GENERIC + fi + fi + + case $OCF_RESKEY_ip in + "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" + return $OCF_ERR_CONFIGURED;; + [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; + *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" + return $OCF_ERR_CONFIGURED;; + esac + + # Unconditionally do this? + case $OCF_RESKEY_nic in + *:*) + OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` + ;; + esac + + NICINFO=`$FINDIF` + rc=$? + + if [ $rc != 0 ]; then + ocf_log err "$FINDIF failed [rc=$rc]." + return $OCF_ERR_GENERIC + fi + + tmp=`echo "$NICINFO" | cut -f1` + if + [ "x$OCF_RESKEY_nic" = "x" ] + then + ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" + OCF_RESKEY_nic=$tmp + elif + [ x$tmp != x${OCF_RESKEY_nic} ] + then + ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]" + return $OCF_ERR_ARGS + fi + + tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` + if + [ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ] + then + ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" + fi + + # Always use the calculated version becuase it might have been specified + # using CIDR notation which not every system accepts + OCF_RESKEY_netmask=$tmp + OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask + + tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` + if + [ "x$OCF_RESKEY_broadcast" = "x" ] + then + ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" + OCF_RESKEY_broadcast=$tmp + + elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then + ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]" + return $OCF_ERR_ARGS + fi + + return $OCF_SUCCESS +} + +usage() { + echo $USAGE >&2 + return $1 +} + +if [ $# -ne 1 ]; then + usage $OCF_ERR_ARGS +fi + +# Normalize the value of lvs_support +if [ "${OCF_RESKEY_lvs_support}" = "true" \ + -o "${OCF_RESKEY_lvs_support}" = "on" \ + -o "${OCF_RESKEY_lvs_support}" = "yes" \ + -o "${OCF_RESKEY_lvs_support}" = "1" ]; then + OCF_RESKEY_lvs_support=1 +else + OCF_RESKEY_lvs_support=0 +fi + +# Note: We had a version out there for a while which used +# netmask instead of cidr_netmask. So, don't remove this aliasing code! +if + [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] +then + OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask + export OCF_RESKEY_cidr_netmask +fi + +case $1 in + meta-data) meta_data;; + start) ip_validate_all && ip_start;; + stop) ip_stop;; + status) ip_status;; + monitor) ip_monitor;; + validate-all) ip_validate_all;; + usage) usage $OCF_SUCCESS;; + *) usage $OCF_ERR_UNIMPLEMENTED;; +esac + +exit $? diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 new file mode 100755 index 0000000..97a7431 --- /dev/null +++ b/heartbeat/IPaddr2 @@ -0,0 +1,1357 @@ +#!/bin/sh +# +# $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ +# +# OCF Resource Agent compliant IPaddr2 script. +# +# Based on work by Tuomo Soini, ported to the OCF RA API by Lars +# Marowsky-Brée. Implements Cluster Alias IP functionality too. +# +# Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff +# +# +# Copyright (c) 2003 Tuomo Soini +# Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + + +# TODO: +# - There ought to be an ocf_run_cmd function which does all logging, +# timeout handling etc for us +# - Make this the standard IP address agent on Linux; the other +# platforms simply should ignore the additional parameters OR can use +# the legacy heartbeat resource script... +# - Check LVS <-> clusterip incompatibilities. +# +# OCF parameters are as below +# OCF_RESKEY_ip +# OCF_RESKEY_broadcast +# OCF_RESKEY_nic +# OCF_RESKEY_cidr_netmask +# OCF_RESKEY_iflabel +# OCF_RESKEY_mac +# OCF_RESKEY_clusterip_hash +# OCF_RESKEY_arp_interval +# OCF_RESKEY_arp_count +# OCF_RESKEY_arp_bg +# OCF_RESKEY_preferred_lft +# +# OCF_RESKEY_CRM_meta_clone +# OCF_RESKEY_CRM_meta_clone_max + + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +. ${OCF_FUNCTIONS_DIR}/findif.sh + +# Defaults +OCF_RESKEY_ip_default="" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_broadcast_default="" +OCF_RESKEY_iflabel_default="" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_lvs_support_default=false +OCF_RESKEY_lvs_ipv6_addrlabel_default=false +OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99 +OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" +OCF_RESKEY_mac_default="" +OCF_RESKEY_unique_clone_address_default=false +OCF_RESKEY_arp_interval_default=200 +OCF_RESKEY_arp_count_default=5 +OCF_RESKEY_arp_count_refresh_default=0 +OCF_RESKEY_arp_bg_default="" +OCF_RESKEY_arp_sender_default="" +OCF_RESKEY_send_arp_opts_default="" +OCF_RESKEY_flush_routes_default="false" +OCF_RESKEY_run_arping_default=false +OCF_RESKEY_nodad_default=false +OCF_RESKEY_noprefixroute_default="false" +OCF_RESKEY_preferred_lft_default="forever" +OCF_RESKEY_network_namespace_default="" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} +: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} +: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} +: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} +: ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}} +: ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} +: ${OCF_RESKEY_mac=${OCF_RESKEY_mac_default}} +: ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} +: ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} +: ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} +: ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} +: ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} +: ${OCF_RESKEY_arp_sender=${OCF_RESKEY_arp_sender_default}} +: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}} +: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}} +: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}} +: ${OCF_RESKEY_nodad=${OCF_RESKEY_nodad_default}} +: ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}} +: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}} +: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}} + +####################################################################### + +SENDARP=$HA_BIN/send_arp +SENDUA=$HA_BIN/send_ua +FINDIF=findif +VLDIR=$HA_RSCTMP +SENDARPPIDDIR=$HA_RSCTMP +CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} + +IPADDR2_CIP_IPTABLES=$IPTABLES + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This Linux-specific resource manages IP alias IP addresses. +It can add an IP alias, or remove one. +In addition, it can implement Cluster Alias IP functionality +if invoked as a clone resource. + +If used as a clone, "shared address with a trivial, stateless +(autonomous) load-balancing/mutual exclusion on ingress" mode gets +applied (as opposed to "assume resource uniqueness" mode otherwise). +For that, Linux firewall (kernel and userspace) is assumed, and since +recent distributions are ambivalent in plain "iptables" command to +particular back-end resolution, "iptables-legacy" (when present) gets +prioritized so as to avoid incompatibilities (note that respective +ipt_CLUSTERIP firewall extension in use here is, at the same time, +marked deprecated, yet said "legacy" layer can make it workable, +literally, to this day) with "netfilter" one (as in "iptables-nft"). +In that case, you should explicitly set clone-node-max >= 2, +and/or clone-max < number of nodes. In case of node failure, +clone instances need to be re-allocated on surviving nodes. +This would not be possible if there is already an instance +on those nodes, and clone-node-max=1 (which is the default). + +When the specified IP address gets assigned to a respective interface, the +resource agent sends unsolicited ARP (Address Resolution Protocol, IPv4) or NA +(Neighbor Advertisement, IPv6) packets to inform neighboring machines about the +change. This functionality is controlled for both IPv4 and IPv6 by shared +'arp_*' parameters. + + +Manages virtual IPv4 and IPv6 addresses (Linux specific version) + + + + +The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation) +example IPv4 "192.168.1.1". +example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF". + +IPv4 or IPv6 address + + + + +The base network interface on which the IP address will be brought +online. +If left empty, the script will try and determine this from the +routing table. + +Do NOT specify an alias interface in the form eth0:1 or anything here; +rather, specify the base interface only. +If you want a label, see the iflabel parameter. + +Prerequisite: + +There must be at least one static IP address, which is not managed by +the cluster, assigned to the network interface. +If you can not assign any static IP address on the interface, +modify this kernel parameter: + +sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) + +Network interface + + + + + +The netmask for the interface in CIDR format +(e.g., 24 and not 255.255.255.0) + +If unspecified, the script will also try to determine this from the +routing table. + +CIDR netmask + + + + + +Broadcast address associated with the IP. It is possible to use the +special symbols '+' and '-' instead of the broadcast address. In this +case, the broadcast address is derived by setting/resetting the host +bits of the interface prefix. + +Broadcast address + + + + + +You can specify an additional label for your IP address here. +This label is appended to your interface name. + +The kernel allows alphanumeric labels up to a maximum length of 15 +characters including the interface name and colon (e.g. eth0:foobar1234) + +A label can be specified in nic parameter but it is deprecated. +If a label is specified in nic name, this parameter has no effect. + +Interface label + + + + + +Enable support for LVS Direct Routing configurations. In case a IP +address is stopped, only move it to the loopback device to allow the +local node to continue to service requests, but no longer advertise it +on the network. + +Notes for IPv6: +It is not necessary to enable this option on IPv6. +Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6. + +Enable support for LVS DR + + + + + +Enable adding IPv6 address label so IPv6 traffic originating from +the address's interface does not use this address as the source. +This is necessary for LVS-DR health checks to realservers to work. Without it, +the most recently added IPv6 address (probably the address added by IPaddr2) +will be used as the source address for IPv6 traffic from that interface and +since that address exists on loopback on the realservers, the realserver +response to pings/connections will never leave its loopback. +See RFC3484 for the detail of the source address selection. + +See also 'lvs_ipv6_addrlabel_value' parameter. + +Enable adding IPv6 address label. + + + + + +Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled. +The value should be an unused label in the policy table +which is shown by 'ip addrlabel list' command. +You would rarely need to change this parameter. + +IPv6 address label value. + + + + + +Set the interface MAC address explicitly. Currently only used in case of +the Cluster IP Alias. Leave empty to chose automatically. + + +Cluster IP MAC address + + + + + +Specify the hashing algorithm used for the Cluster IP functionality. + + +Cluster IP hashing function + + + + + +If true, add the clone ID to the supplied value of IP to create +a unique address to manage + +Create a unique address for cloned instances + + + + + +Specify the interval between unsolicited ARP (IPv4) or NA (IPv6) packets in +milliseconds. + +This parameter is deprecated and used for the backward compatibility only. +It is effective only for the send_arp binary which is built with libnet, +and send_ua for IPv6. It has no effect for other arp_sender. + +ARP/NA packet interval in ms (deprecated) + + + + + +Number of unsolicited ARP (IPv4) or NA (IPv6) packets to send at resource +initialization. + +ARP/NA packet count sent during initialization + + + + + +For IPv4, number of unsolicited ARP packets to send during resource monitoring. +Doing so helps mitigate issues of stuck ARP caches resulting from split-brain +situations. + +ARP packet count sent during monitoring + + + + + +Whether or not to send the ARP (IPv4) or NA (IPv6) packets in the background. +The default is true for IPv4 and false for IPv6. + +ARP/NA from background + + + + + +For IPv4, the program to send ARP packets with on start. Available options are: + - send_arp: default + - ipoibarping: default for infiniband interfaces if ipoibarping is available + - iputils_arping: use arping in iputils package + - libnet_arping: use another variant of arping based on libnet + +ARP sender + + + + + +For IPv4, extra options to pass to the arp_sender program. +Available options are vary depending on which arp_sender is used. + +A typical use case is specifying '-A' for iputils_arping to use +ARP REPLY instead of ARP REQUEST as Gratuitous ARPs. + +Options for ARP sender + + + + + +Flush the routing table on stop. This is for +applications which use the cluster IP address +and which run on the same physical host that the +IP address lives on. The Linux kernel may force that +application to take a shortcut to the local loopback +interface, instead of the interface the address +is really bound to. Under those circumstances, an +application may, somewhat unexpectedly, continue +to use connections for some time even after the +IP address is deconfigured. Set this parameter in +order to immediately disable said shortcut when the +IP address goes away. + +Flush kernel routing table on stop + + + + + +For IPv4, whether or not to run arping for collision detection check. + +Run arping for IPv4 collision detection check + + + + + +For IPv6, do not perform Duplicate Address Detection when adding the address. + +Use nodad flag + + + + + +Use noprefixroute flag (see 'man ip-address'). + +Use noprefixroute flag + + + + + +For IPv6, set the preferred lifetime of the IP address. +This can be used to ensure that the created IP address will not +be used as a source address for routing. +Expects a value as specified in section 5.5.4 of RFC 4862. + +IPv6 preferred lifetime + + + + + +Specifies the network namespace to operate within. +The namespace must already exist, and the interface to be used must be within +the namespace. + +Network namespace to use + + + + + + + + + + + + + +END + + exit $OCF_SUCCESS +} + +ip_init() { + local rc + + if [ X`uname -s` != "XLinux" ]; then + ocf_exit_reason "IPaddr2 only supported Linux." + exit $OCF_ERR_INSTALLED + fi + + if [ X"$OCF_RESKEY_ip" = "X" ] && [ "$__OCF_ACTION" != "stop" ]; then + ocf_exit_reason "IP address (the ip parameter) is mandatory" + exit $OCF_ERR_CONFIGURED + fi + + if + case $__OCF_ACTION in + start|stop) ocf_is_root;; + *) true;; + esac + then + : YAY! + else + ocf_exit_reason "You must be root for $__OCF_ACTION operation." + exit $OCF_ERR_PERM + fi + + BASEIP="$OCF_RESKEY_ip" + BRDCAST="$OCF_RESKEY_broadcast" + NIC="$OCF_RESKEY_nic" + # Note: We had a version out there for a while which used + # netmask instead of cidr_netmask. Don't remove this aliasing code! + if + [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] + then + OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask + export OCF_RESKEY_cidr_netmask + fi + NETMASK="$OCF_RESKEY_cidr_netmask" + IFLABEL="$OCF_RESKEY_iflabel" + IF_MAC="$OCF_RESKEY_mac" + + IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} + IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` + + if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then + ocf_exit_reason "LVS and load sharing do not go together well" + exit $OCF_ERR_CONFIGURED + fi + + if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then + : + else + ocf_exit_reason "Invalid meta-attribute clone_max [$IP_INC_GLOBAL], should be positive integer" + exit $OCF_ERR_CONFIGURED + fi + + echo $OCF_RESKEY_ip | grep -qs ":" + if [ $? -ne 0 ];then + FAMILY=inet + if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then + ocf_exit_reason "IPv4 does not support lvs_ipv6_addrlabel" + exit $OCF_ERR_CONFIGURED + fi + if [ -z "$OCF_RESKEY_arp_bg" ]; then + OCF_RESKEY_arp_bg=true + fi + else + FAMILY=inet6 + # address sanitization defined in RFC5952 + SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip 2> /dev/null | awk '$1~/:/ {print $1} $2~/:/ {print $2}') + if [ -n "$SANITIZED_IP" ]; then + OCF_RESKEY_ip="$SANITIZED_IP" + fi + + if ocf_is_true $OCF_RESKEY_lvs_support ;then + ocf_exit_reason "The IPv6 does not support lvs_support" + exit $OCF_ERR_CONFIGURED + fi + if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then + if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then + : + else + ocf_exit_reason "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer" + exit $OCF_ERR_CONFIGURED + fi + fi + if [ -z "$OCF_RESKEY_arp_bg" ]; then + OCF_RESKEY_arp_bg=false + fi + fi + + # support nic:iflabel format in nic parameter + case $NIC in + *:*) + IFLABEL=`echo $NIC | sed 's/[^:]*://'` + NIC=`echo $NIC | sed 's/:.*//'` + # only the base name should be passed to findif + OCF_RESKEY_nic=$NIC + ;; + esac + + # $FINDIF takes its parameters from the environment + # + NICINFO=`$FINDIF` + rc=$? + if + [ $rc -eq 0 ] + then + NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //'` + NIC=`echo "$NICINFO" | cut -d" " -f1` + NETMASK=`echo "$NICINFO" | cut -d" " -f2` + BRDCAST=`echo "$NICINFO" | cut -d" " -f3` + else + # findif couldn't find the interface + if ocf_is_probe; then + ocf_log info "[$FINDIF] failed" + exit $OCF_NOT_RUNNING + elif [ "$__OCF_ACTION" = stop ]; then + ocf_log warn "[$FINDIF] failed" + exit $OCF_SUCCESS + else + ocf_exit_reason "[$FINDIF] failed" + exit $rc + fi + fi + + SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" + + if [ -n "$IFLABEL" ]; then + IFLABEL=${NIC}:${IFLABEL} + if [ ${#IFLABEL} -gt 15 ]; then + ocf_exit_reason "Interface label [$IFLABEL] exceeds maximum character limit of 15" + exit $OCF_ERR_CONFIGURED + fi + fi + + if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then + IP_CIP="yes" + IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" + if [ -z "$IF_MAC" ]; then + # Choose a MAC + # 1. Concatenate some input together + # 2. This doesn't need to be a cryptographically + # secure hash. + # 3. Drop everything after the first 6 octets (12 chars) + # 4. Delimit the octets with ':' + # 5. Make sure the first octet is odd, + # so the result is a multicast MAC + IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ + md5sum | \ + sed -e 's#\(............\).*#\1#' \ + -e 's#..#&:#g; s#:$##' \ + -e 's#^\(.\)[02468aAcCeE]#\11#'` + fi + IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" + fi +} + +# +# Find out which interfaces serve the given IP address and netmask. +# The arguments are an IP address and a netmask. +# Its output are interface names devided by spaces (e.g., "eth0 eth1"). +# +find_interface() { + local ipaddr="$1" + local netmask="$2" + + # + # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces + # + local iface="`$IP2UTIL -o -f $FAMILY addr show \ + | grep "\ $ipaddr/$netmask" \ + | cut -d ' ' -f2 \ + | grep -v '^ipsec[0-9][0-9]*$'`" + + echo "$iface" + return 0 +} + +# +# Delete an interface +# +delete_interface () { + ipaddr="$1" + iface="$2" + netmask="$3" + + CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface" + + ocf_run $CMD || return $OCF_ERR_GENERIC + + if ocf_is_true $OCF_RESKEY_flush_routes; then + ocf_run $IP2UTIL route flush cache + fi + + if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then + delete_ipv6_addrlabel $ipaddr + fi + + return $OCF_SUCCESS +} + +# +# Add an interface +# +add_interface () { + local cmd msg extra_opts ipaddr netmask broadcast iface label + + ipaddr="$1" + netmask="$2" + broadcast="$3" + iface="$4" + label="$5" + + if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping && + check_binary arping; then + arping -q -c 2 -w 3 -D -I $iface $ipaddr + if [ $? = 1 ]; then + ocf_log err "IPv4 address collision $ipaddr [DAD]" + return $OCF_ERR_GENERIC + fi + fi + + if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then + add_ipv6_addrlabel $ipaddr + fi + + cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface" + msg="Adding $FAMILY address $ipaddr/$netmask to device $iface" + if [ "$broadcast" != "none" ]; then + cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface" + msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface" + fi + + extra_opts="" + if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then + extra_opts="$extra_opts nodad" + fi + + if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then + extra_opts="$extra_opts noprefixroute" + fi + + if [ ! -z "$label" ]; then + extra_opts="$extra_opts label $label" + fi + if [ "$FAMILY" = "inet6" ] ;then + extra_opts="$extra_opts preferred_lft $OCF_RESKEY_preferred_lft" + fi + if [ -n "$extra_opts" ]; then + cmd="$cmd$extra_opts" + msg="$msg (with$extra_opts)" + fi + + ocf_log info "$msg" + ocf_run $cmd || return $OCF_ERR_GENERIC + + msg="Bringing device $iface up" + cmd="$IP2UTIL link set $iface up" + ocf_log info "$msg" + ocf_run $cmd || return $OCF_ERR_GENERIC + + return $OCF_SUCCESS +} + +# +# Delete a route +# +delete_route () { + prefix="$1" + iface="$2" + + CMD="$IP2UTIL route delete $prefix dev $iface" + + ocf_log info "$CMD" + $CMD + + return $? +} + +# On Linux systems the (hidden) loopback interface may +# conflict with the requested IP address. If so, this +# unoriginal code will remove the offending loopback address +# and save it in VLDIR so it can be added back in later +# when the IPaddr is released. +# +# TODO: This is very ugly and should be controlled by an additional +# instance parameter. Or even: multi-state, with the IP only being +# "active" on the master!? +# +remove_conflicting_loopback() { + ipaddr="$1" + netmask="$2" + broadcast="$3" + ifname="$4" + + ocf_log info "Removing conflicting loopback $ifname." + if + echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" + then + : Saved loopback information in $VLDIR/$ipaddr + else + ocf_log err "Could not save conflicting loopback $ifname." \ + "it will not be restored." + fi + delete_interface "$ipaddr" "$ifname" "$netmask" + # Forcibly remove the route (if it exists) to the loopback. + delete_route "$ipaddr" "$ifname" +} + +# +# On Linux systems the (hidden) loopback interface may +# need to be restored if it has been taken down previously +# by remove_conflicting_loopback() +# +restore_loopback() { + ipaddr="$1" + + if [ -s "$VLDIR/$ipaddr" ]; then + ifinfo=`cat "$VLDIR/$ipaddr"` + ocf_log info "Restoring loopback IP Address " \ + "$ifinfo." + add_interface $ifinfo + rm -f "$VLDIR/$ipaddr" + fi +} + +add_ipv6_addrlabel() { + local cmd ipaddr value + ipaddr="$1" + value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" + + cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value" + ocf_log info "Adding IPv6 address label prefix $ipaddr label $value" + ocf_run $cmd || ocf_log warn "$cmd failed." +} + +delete_ipv6_addrlabel() { + local cmd ipaddr value + ipaddr="$1" + value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" + + cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value" + ocf_run $cmd # an error can be ignored +} + +is_infiniband() { + $IP2UTIL link show $NIC | grep link/infiniband >/dev/null +} + +log_arp_sender() { + local cmdline + local output + local rc + cmdline="$@" + + output=$($cmdline 2>&1) + rc=$? + if [ $rc -ne 0 ] && \ + [ "$ARP_SENDER" != "libnet_arping" ] ; then + # libnet_arping always return an error as no answers + ocf_log err "Could not send gratuitous arps: rc=$rc" + fi + ocf_log $LOGLEVEL "$output" +} + +# wrapper function to manage PID file to run arping in background +run_with_pidfile() { + local cmdline + local pid + local rc + + cmdline="$@" + + $cmdline & + pid=$! + echo "$pid" > $SENDARPPIDFILE + wait $pid + rc=$? + rm -f $SENDARPPIDFILE + return $rc +} + +build_arp_sender_cmd() { + case "$ARP_SENDER" in + send_arp) + if [ "x$IP_CIP" = "xyes" ] ; then + if [ x = "x$IF_MAC" ] ; then + MY_MAC=auto + else + # send_arp.linux should return without doing anything in this case + MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` + fi + else + MY_MAC=auto + fi + + ARGS="$OCF_RESKEY_send_arp_opts -i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" + ARP_SENDER_CMD="$SENDARP $ARGS" + ;; + iputils_arping) + ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -I $NIC $OCF_RESKEY_ip" + ARP_SENDER_CMD="run_with_pidfile arping $ARGS" + ;; + libnet_arping) + ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -i $NIC -S $OCF_RESKEY_ip $OCF_RESKEY_ip" + ARP_SENDER_CMD="run_with_pidfile arping $ARGS" + ;; + ipoibarping) + ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" + ARP_SENDER_CMD="ipoibarping $ARGS" + ;; + *) + # should not occur + ocf_exit_reason "unrecognized arp_sender value: $ARP_SENDER" + exit $OCF_ERR_GENERIC + ;; + esac +} + +# +# Send Unsolicited ARPs to update neighbor's ARP cache +# +run_arp_sender() { + if [ "x$1" = "xrefresh" ] ; then + ARP_COUNT=$OCF_RESKEY_arp_count_refresh + LOGLEVEL=debug + else + ARP_COUNT=$OCF_RESKEY_arp_count + LOGLEVEL=info + fi + if [ $ARP_COUNT -eq 0 ] ; then + return + fi + + # do not need to send Gratuitous ARPs in the Cluster IP configuration + # except send_arp.libnet binary to retain the old behavior + if [ "x$IP_CIP" = "xyes" ] && \ + [ "x$ARP_SENDER" != "xsend_arp" ] ; then + ocf_log info "Gratuitous ARPs are not sent in the Cluster IP configuration" + return + fi + + # prepare arguments for each arp sender program + # $ARP_SENDER_CMD should be set + build_arp_sender_cmd + + ocf_log $LOGLEVEL "$ARP_SENDER_CMD" + + if ocf_is_true $OCF_RESKEY_arp_bg; then + log_arp_sender $ARP_SENDER_CMD & + else + log_arp_sender $ARP_SENDER_CMD + fi +} + +log_send_ua() { + local cmdline + local output + local rc + + cmdline="$@" + output=$($cmdline 2>&1) + rc=$? + if [ $rc -ne 0 ] ; then + ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements: rc=$rc" + fi + ocf_log info "$output" + return $rc +} + +# +# Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements. +# +run_send_ua() { + local i + + # Duplicate Address Detection [DAD] + # Kernel will flag the IP as 'tentative' until it ensured that + # there is no duplicates. + # If there is, it will flag it as 'dadfailed' + for i in $(seq 1 10); do + ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK) + case "$ipstatus" in + *dadfailed*) + ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]" + $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK + if [ $? -ne 0 ]; then + ocf_log err "Could not delete IPv6 address" + fi + return $OCF_ERR_GENERIC + ;; + *tentative*) + if [ $i -eq 10 ]; then + ocf_log warn "IPv6 address : DAD is still in tentative" + fi + ;; + *) + break + ;; + esac + sleep 1 + done + # Now the address should be usable + + ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC" + ocf_log info "$SENDUA $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + log_send_ua $SENDUA $ARGS & + else + log_send_ua $SENDUA $ARGS + fi +} + +# Do we already serve this IP address on the given $NIC? +# +# returns: +# ok = served (for CIP: + hash bucket) +# partial = served and no hash bucket (CIP only) +# partial2 = served and no CIP iptables rule +# no = nothing +# +ip_served() { + if [ -z "$NIC" ]; then # no nic found or specified + echo "no" + return 0 + fi + + cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" + + if [ -z "$cur_nic" ]; then + echo "no" + return 0 + fi + + if [ -z "$IP_CIP" ]; then + for i in $cur_nic; do + # only mark as served when on the same interfaces as $NIC + [ "$i" = "$NIC" ] || continue + echo "ok" + return 0 + done + # There used to be logic here to pretend "not served", + # if ${OCF_RESKEY_lvs_support} was enabled, and the IP was + # found active on "lo*" only. With lvs_support on, you should + # have NIC != lo, so thats already filtered + # by the continue above. + + echo "no" + return 0 + fi + + # Special handling for the CIP: + if [ ! -e $IP_CIP_FILE ]; then + echo "partial2" + return 0 + fi + if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then + echo "ok" + return 0 + else + echo "partial" + return 0 + fi + + exit $OCF_ERR_GENERIC +} + +####################################################################### + +ip_usage() { + cat <$IP_CIP_FILE + fi + + if [ "$ip_status" = "no" ]; then + if ocf_is_true ${OCF_RESKEY_lvs_support}; then + for i in `find_interface $OCF_RESKEY_ip 32`; do + case $i in + lo*) + remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo + ;; + esac + done + fi + + add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL + rc=$? + + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Failed to add $OCF_RESKEY_ip" + exit $rc + fi + fi + + case $NIC in + lo*) + : no need to run send_arp on loopback + ;; + *) + if [ $FAMILY = "inet" ];then + run_arp_sender + else + if [ -x $SENDUA ]; then + run_send_ua + if [ $? -ne 0 ]; then + ocf_exit_reason "run_send_ua failed." + exit $OCF_ERR_GENERIC + fi + fi + fi + ;; + esac + exit $OCF_SUCCESS +} + +ip_stop() { + local ip_del_if="yes" + if [ -n "$IP_CIP" ]; then + # Cluster IPs need special processing when the last bucket + # is removed from the node... take a lock to make sure only one + # process executes that code + ocf_take_lock $CIP_lockfile + ocf_release_lock_on_exit $CIP_lockfile + fi + + if [ -f "$SENDARPPIDFILE" ] ; then + kill `cat "$SENDARPPIDFILE"` + if [ $? -ne 0 ]; then + ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" + else + ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" + fi + rm -f "$SENDARPPIDFILE" + fi + local ip_status=`ip_served` + ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" + + if [ $ip_status = "no" ]; then + : Requested interface not in use + exit $OCF_SUCCESS + fi + + if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then + if [ $ip_status = "partial" ]; then + exit $OCF_SUCCESS + fi + echo "-$IP_INC_NO" >$IP_CIP_FILE + if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then + ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH + i=1 + while [ $i -le $IP_INC_GLOBAL ]; do + ocf_log info $i + $IPADDR2_CIP_IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ + --new \ + --clustermac $IF_MAC \ + --total-nodes $IP_INC_GLOBAL \ + --local-node $i \ + --hashmode $IP_CIP_HASH + i=`expr $i + 1` + done + else + ip_del_if="no" + fi + fi + + if [ "$ip_del_if" = "yes" ]; then + delete_interface $OCF_RESKEY_ip $NIC $NETMASK + if [ $? -ne 0 ]; then + ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]" + exit $OCF_ERR_GENERIC + fi + + if ocf_is_true ${OCF_RESKEY_lvs_support}; then + restore_loopback "$OCF_RESKEY_ip" + fi + fi + + exit $OCF_SUCCESS +} + +ip_monitor() { + # TODO: Implement more elaborate monitoring like checking for + # interface health maybe via a daemon like FailSafe etc... + + local ip_status=`ip_served` + case $ip_status in + ok) + run_arp_sender refresh + return $OCF_SUCCESS + ;; + partial|no|partial2) + exit $OCF_NOT_RUNNING + ;; + *) + # Errors on this interface? + return $OCF_ERR_GENERIC + ;; + esac +} + +# make sure that we have something to send ARPs with +set_send_arp_program() { + ARP_SENDER=send_arp + if [ -n "$OCF_RESKEY_arp_sender" ]; then + case "$OCF_RESKEY_arp_sender" in + send_arp) + check_binary $SENDARP + ;; + iputils_arping) + check_binary arping + ;; + libnet_arping) + check_binary arping + ;; + ipoibarping) + check_binary ipoibarping + ;; + *) + ocf_exit_reason "unrecognized arp_sender value: $OCF_RESKEY_arp_sender" + exit $OCF_ERR_CONFIGURED + ;; + esac + ARP_SENDER="$OCF_RESKEY_arp_sender" + else + if is_infiniband; then + ARP_SENDER=ipoibarping + if ! have_binary ipoibarping; then + [ "$__OCF_ACTION" = start ] && + ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)" + check_binary $SENDARP + ARP_SENDER=send_arp + fi + fi + fi +} + +ip_validate() { + check_binary $IP2UTIL + IP_CIP= + + if [ -n "$OCF_RESKEY_network_namespace" ]; then + OCF_RESKEY_network_namespace= exec $IP2UTIL netns exec "$OCF_RESKEY_network_namespace" "$0" "$__OCF_ACTION" + fi + + ip_init + + set_send_arp_program + + if [ -n "$IP_CIP" ]; then + if have_binary "$IPTABLES_LEGACY"; then + IPADDR2_CIP_IPTABLES="$IPTABLES_LEGACY" + fi + check_binary "$IPADDR2_CIP_IPTABLES" + check_binary $MODPROBE + fi + +# $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, +# do not bother here. + + if ocf_is_true "$OCF_RESKEY_unique_clone_address" && + ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then + ocf_exit_reason "unique_clone_address makes sense only with meta globally_unique set" + exit $OCF_ERR_CONFIGURED + fi + + if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then + : + else + ocf_exit_reason "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" + exit $OCF_ERR_CONFIGURED + fi + + if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then + : + else + ocf_exit_reason "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" + exit $OCF_ERR_CONFIGURED + fi + + if [ -z "$OCF_RESKEY_preferred_lft" ]; then + ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft" + exit $OCF_ERR_CONFIGURED + fi + + if [ -n "$IP_CIP" ]; then + + local valid=1 + + case $IP_CIP_HASH in + sourceip|sourceip-sourceport|sourceip-sourceport-destport) + ;; + *) + ocf_exit_reason "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" + exit $OCF_ERR_CONFIGURED + ;; + esac + + if ocf_is_true ${OCF_RESKEY_lvs_support}; then + ocf_exit_reason "LVS and load sharing not advised to try" + exit $OCF_ERR_CONFIGURED + fi + + case $IF_MAC in + [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) + ;; + *) + valid=0 + ;; + esac + + if [ $valid -eq 0 ]; then + ocf_exit_reason "Invalid IF_MAC [$IF_MAC]" + exit $OCF_ERR_CONFIGURED + fi + + fi +} + +if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then + prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` + suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` + suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` + OCF_RESKEY_ip="$prefix.$suffix" +fi + +case $__OCF_ACTION in +meta-data) meta_data + ;; +usage|help) ip_usage + exit $OCF_SUCCESS + ;; +esac + +ip_validate + +case $__OCF_ACTION in +start) ip_start + ;; +stop) ip_stop + ;; +status) ip_status=`ip_served` + if [ $ip_status = "ok" ]; then + echo "running" + exit $OCF_SUCCESS + else + echo "stopped" + exit $OCF_NOT_RUNNING + fi + ;; +monitor) ip_monitor + ;; +validate-all) ;; +*) ip_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +# vi:sw=4:ts=8: diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr new file mode 100755 index 0000000..c732ce8 --- /dev/null +++ b/heartbeat/IPsrcaddr @@ -0,0 +1,631 @@ +#!/bin/sh +# +# Description: IPsrcaddr - Preferred source(/dest) address modification +# +# Author: John Sutton +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: SCL Internet +# +# Based on the IPaddr script. +# +# This script manages the preferred source address associated with +# packets which originate on the localhost and are routed through the +# matching route. By default, i.e. without the use of this script or +# similar, these packets will carry the IP of the primary i.e. the +# non-aliased interface. This can be a nuisance if you need to ensure +# that such packets carry the same IP irrespective of which host in +# a redundant cluster they actually originate from. +# +# It can add a preferred source address, or remove one. +# +# usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data} +# +# The "start" arg adds a preferred source address. +# +# Surprisingly, the "stop" arg removes it. :-) +# +# NOTES: +# +# 1) There must be one and not more than 1 matching route! Mainly because +# I can't see why you should have more than one. And if there is more +# than one, we would have to box clever to find out which one is to be +# modified, or we would have to pass its identity as an argument. +# +# 2) The script depends on Alexey Kuznetsov's ip utility from the +# iproute aka iproute2 package. +# +# 3) No checking is done to see if the passed in IP address can +# reasonably be associated with the interface on which the default +# route exists. So unless you want to deliberately spoof your source IP, +# check it! Normally, I would expect that your haresources looks +# something like: +# +# nodename ip1 ip2 ... ipN IPsrcaddr::ipX +# +# where ipX is one of the ip1 to ipN. +# +# OCF parameters are as below: +# OCF_RESKEY_ipaddress + +####################################################################### +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +. ${OCF_FUNCTIONS_DIR}/findif.sh + +# Defaults +OCF_RESKEY_ipaddress_default="" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_destination_default="0.0.0.0/0" +OCF_RESKEY_proto_default="" +OCF_RESKEY_metric_default="" +OCF_RESKEY_table_default="" + +: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_destination=${OCF_RESKEY_destination_default}} +: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}} +: ${OCF_RESKEY_metric=${OCF_RESKEY_metric_default}} +: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}} +####################################################################### + +[ -z "$OCF_RESKEY_proto" ] && PROTO="" || PROTO="proto $OCF_RESKEY_proto" +[ -z "$OCF_RESKEY_table" ] && TABLE="" || TABLE="table $OCF_RESKEY_table" + +USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + + CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination" +CMDCHANGE="$IP2UTIL route change to " + +if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then + CMDSHOW="$CMDSHOW src $OCF_RESKEY_ipaddress" +fi + +if [ "$OCF_RESKEY_table" = "local" ]; then + TABLE="$TABLE local" +fi + +SYSTYPE="`uname -s`" + +usage() { + echo $USAGE >&2 +} + +meta_data() { + cat < + + +1.0 + + +Resource script for IPsrcaddr. It manages the preferred source address +modification. + +Note: DHCP should not be enabled for the interface serving the preferred +source address. Enabling DHCP may result in unexpected behavior, such as +the automatic addition of duplicate or conflicting routes. This may +cause the IPsrcaddr resource to fail, or it may produce undesired +behavior while the resource continues to run. + +Manages the preferred source address for outgoing IP packets + + + + +The IP address. + +IP address + + + + + +The netmask for the interface in CIDR format. (ie, 24), or in +dotted quad notation 255.255.255.0). + +Netmask + + + + + +The destination IP/subnet for the route (default: $OCF_RESKEY_destination_default) + +Destination IP/subnet + + + + + +Proto to match when finding network. E.g. "kernel". + +Proto + + + + + +Metric. Only needed if incorrect metric value is used. + +Metric + + + + + +Table to modify and use for interface lookup. E.g. "local". + +The table has to have a route matching the "destination" parameter. + +This can be used for policy based routing. See man ip-rule(8). + +Table + + + + + + + + + + + + + +END +} + +errorexit() { + ocf_exit_reason "$*" + exit $OCF_ERR_GENERIC +} + +# +# We can distinguish 3 cases: no preferred source address, a +# preferred source address exists which matches that specified, and one +# exists but doesn't match that specified. srca_read() returns 1,0,2 +# respectively. +# +# The output of route show is something along the lines of: +# +# default via X.X.X.X dev eth1 src Y.Y.Y.Y +# +# where the src clause "src Y.Y.Y.Y" may or may not be present + +WS="[[:blank:]]" +OCTET="[0-9]\{1,3\}" +IPADDR="\($OCTET\.\)\{3\}$OCTET" +SRCCLAUSE="src$WS$WS*\($IPADDR\)" +MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)" +METRICCLAUSE=".*\(metric$WS[^ ]\+\)" +PROTOCLAUSE=".*\(proto$WS[^ ]\+\).*" +FINDIF=findif + +# findif needs that to be set +export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress + +srca_read() { + # Capture matching route - doublequotes prevent word splitting... + ROUTE="`$CMDSHOW dev $INTERFACE 2> /dev/null`" || errorexit "command '$CMDSHOW' failed" + + # ... so we can make sure there is only 1 matching route + [ 1 -eq `echo "$ROUTE" | wc -l` ] || \ + errorexit "more than 1 matching route exists" + + # But there might still be no matching route + [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \ + ! ocf_is_probe && [ "$__OCF_ACTION" != stop ] && errorexit "no matching route exists" + + # Sed out the source ip address if it exists + SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"` + + # and what remains after stripping out the source ip address clause + ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"` + + # using "src " only returns output if there's a match + if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then + [ -z "$ROUTE" ] && return 1 || return 0 + fi + + [ -z "$SRCIP" ] && return 1 + [ $SRCIP = $1 ] && return 0 + [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "status" ] && [ "${ROUTE%% *}" = "default" ] && return 1 + return 2 +} + +# +# Add (or change if it already exists) the preferred source address +# The exit code should conform to LSB exit codes. +# + +srca_start() { + srca_read $1 + + rc=$? + if [ $rc = 0 ]; then + rc=$OCF_SUCCESS + ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)" + else + $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC || \ + errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC' failed" + + if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then + $CMDCHANGE $ROUTE_WO_SRC src $1 || \ + errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed" + fi + rc=$? + fi + + return $rc +} + +# +# Remove (if it exists) the preferred source address. +# If one exists but it's not the same as the one specified, that's +# an error. Maybe that's the wrong behaviour because if this fails +# then when IPaddr releases the associated interface (if there is one) +# your matching route will also get dropped ;-( +# The exit code should conform to LSB exit codes. +# + +srca_stop() { + srca_read $1 + rc=$? + + if [ $rc = 1 ]; then + # We do not have a preferred source address for now + ocf_log info "No preferred source address defined, nothing to stop" + exit $OCF_SUCCESS + fi + + [ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address" + + if [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ]; then + SCOPE="link" + else + SCOPE="host" + fi + + PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')" + OPTS="proto kernel scope $SCOPE src $PRIMARY_IP" + + $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC || \ + errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed" + + if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then + $CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP || \ + errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP' failed" + fi + + return $? +} + +srca_status() { + srca_read $1 + + case $? in + 0) echo "OK" + return $OCF_SUCCESS;; + + 1) echo "No preferred source address defined" + return $OCF_NOT_RUNNING;; + + 2) echo "Preferred source address has incorrect value" + return $OCF_ERR_GENERIC;; + esac +} + +# A not reliable IP address checking function, which only picks up those _obvious_ violations... +# +# It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" +# +# 100% confidence whenever it reports "negative", +# but may get false "positive" answer. +# +CheckIP() { + ip="$1" + case $ip in + *[!0-9.]*) #got invalid char + false;; + .*|*.) #begin or end by ".", which is invalid + false;; + *..*) #consecutive ".", which is invalid + false;; + *.*.*.*.*) #four decimal dots, which is too many + false;; + *.*.*.*) #exactly three decimal dots, candidate, evaluate each field + local IFS=. + set -- $ip + if + ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) + then + if [ $1 -eq 127 ]; then + ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address" + exit $OCF_ERR_CONFIGURED + fi + else + true + fi + ;; + *) #less than three decimal dots + false;; + esac + return $? # This return is unnecessary, this comment too :) +} + +# +# Find out which interface or alias serves the given IP address +# The argument is an IP address, and its output +# is an (aliased) interface name (e.g., "eth0" and "eth0:0"). +# +find_interface_solaris() { + + + $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | + while read ifname linkstuff + do + : ifname = $ifname + read inet addr junk + : inet = $inet addr = $addr + while + read line && [ "X$line" != "X" ] + do + : Nothing + done + + # This doesn't look right for a box with multiple NICs. + # It looks like it always selects the first interface on + # a machine. Yet, we appear to use the results for this case too... + ifname=`echo "$ifname" | sed s'%:*$%%'` + + case $addr in + addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;; + $BASEIP) echo $ifname; return $OCF_SUCCESS;; + esac + done + return $OCF_ERR_GENERIC +} + + +# +# Find out which interface or alias serves the given IP address +# The argument is an IP address, and its output +# is an (aliased) interface name (e.g., "eth0" and "eth0:0"). +# +find_interface_generic() { + + local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \ + | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` + if [ -z "$iface" ]; then + return $OCF_ERR_GENERIC + else + echo $iface + return $OCF_SUCCESS + fi +} + + +# +# Find out which interface or alias serves the given IP address +# The argument is an IP address, and its output +# is an (aliased) interface name (e.g., "eth0" and "eth0:0"). +# +find_interface() { + case "$SYSTYPE" in + SunOS) + IF=`find_interface_solaris $BASEIP` + ;; + *) + IF=`find_interface_generic $BASEIP` + ;; + esac + + echo $IF + return $OCF_SUCCESS; +} + + +ip_status() { + + BASEIP="$1" + case "$SYSTYPE" in + Darwin) + # Treat Darwin the same as the other BSD variants (matched as *BSD) + SYSTYPE="${SYSTYPE}BSD" + ;; + *) + ;; + esac + + + case "$SYSTYPE" in + *BSD) + $IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1 + if [ $? = 0 ]; then + return $OCF_SUCCESS + else + return $OCF_NOT_RUNNING + fi;; + + Linux|SunOS) + IF=`find_interface "$BASEIP"` + if [ -z "$IF" ]; then + return $OCF_NOT_RUNNING + fi + + case $IF in + lo*) + ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" + exit $OCF_ERR_CONFIGURED + ;; + *)return $OCF_SUCCESS;; + esac + ;; + + *) + if [ -z "$IF" ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi;; + esac +} + + +srca_validate_all() { + + if [ -z "$OCF_RESKEY_ipaddress" ]; then + # usage + ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" + return $OCF_ERR_CONFIGURED + fi + + if ! echo "$OCF_RESKEY_destination" | grep -q "/"; then + return $OCF_ERR_CONFIGURED + fi + + + if ! [ "x$SYSTYPE" = "xLinux" ]; then + # checks after this point are only relevant for linux. + return $OCF_SUCCESS + fi + + check_binary $AWK + case "$SYSTYPE" in + *BSD|SunOS) + check_binary $IFCONFIG + ;; + esac + +# The IP address should be in good shape + if CheckIP "$ipaddress"; then + : + else + ocf_exit_reason "Invalid IP address [$ipaddress]" + return $OCF_ERR_CONFIGURED + fi + + if ocf_is_probe; then + return $OCF_SUCCESS + fi + +# We should serve this IP address of course + if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then + if ip_status "$ipaddress"; then + : + else + ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address" + return $OCF_ERR_INSTALLED + fi + fi + return $OCF_SUCCESS +} + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations do not require the OCF instance parameters to be set +case $1 in + meta-data) meta_data + exit $OCF_SUCCESS + ;; + usage) usage + exit $OCF_SUCCESS + ;; + *) + ;; +esac + +ipaddress="$OCF_RESKEY_ipaddress" + +[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10 +srca_validate_all +rc=$? +if [ $rc -ne $OCF_SUCCESS ]; then + case $1 in + # if we can't validate the configuration during a stop, that + # means the resources isn't configured correctly. There's no way + # to actually stop the resource in this situation because there's + # no way it could have even started. Return success here + # to indicate that the resource is not running, otherwise the + # stop action will fail causing the node to be fenced just because + # of a mis configuration. + stop) exit $OCF_SUCCESS;; + *) exit $rc;; + esac +fi + +findif_out=`$FINDIF` +rc=$? +[ $rc -ne 0 ] && { + ocf_exit_reason "[$FINDIF] failed" + exit $rc +} + +INTERFACE=`echo $findif_out | awk '{print $1}'` +LISTROUTE=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress` +[ -z "$PROTO" ] && PROTO=`echo $LISTROUTE | sed -n "s/$PROTOCLAUSE/\1/p"` +if [ -n "$OCF_RESKEY_metric" ]; then + METRIC="metric $OCF_RESKEY_metric" +elif [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ]; then + METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"` +else + METRIC="" +fi +if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then + NETWORK=`echo $LISTROUTE | grep -m 1 -o '^[^ ]*'` + + if [ -z "$NETWORK" ]; then + err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO" + err_str="$err_str match $ipaddress' failed to find a matching route" + + if [ "$__OCF_ACTION" = "start" ]; then + ocf_exit_reason "$err_str" + exit $OCF_ERR_ARGS + elif ! ocf_is_probe; then + ocf_log warn "$err_str" + else + ocf_log debug "$err_str" + fi + fi +else + NETWORK="$OCF_RESKEY_destination" +fi + +case $1 in + start) srca_start $ipaddress + ;; + stop) srca_stop $ipaddress + ;; + status) srca_status $ipaddress + ;; + monitor) srca_status $ipaddress + ;; + validate-all) srca_validate_all + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? + +# +# Version 0.3 2002/11/04 17:00:00 John Sutton +# Name changed from IPsrcroute to IPsrcaddr and now reports errors +# using ha_log rather than on stderr. +# +# Version 0.2 2002/11/02 17:00:00 John Sutton +# Changed status output to "OK" to satisfy ResourceManager's +# we_own_resource() function. +# +# Version 0.1 2002/11/01 17:00:00 John Sutton +# First effort but does the job? +# diff --git a/heartbeat/IPv6addr.c b/heartbeat/IPv6addr.c new file mode 100644 index 0000000..2e9e126 --- /dev/null +++ b/heartbeat/IPv6addr.c @@ -0,0 +1,899 @@ + +/* + * This program manages IPv6 address with OCF Resource Agent standard. + * + * Author: Huang Zhen + * Copyright (c) 2004 International Business Machines + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * It can add an IPv6 address, or remove one. + * + * Usage: IPv6addr {start|stop|status|monitor|meta-data} + * + * The "start" arg adds an IPv6 address. + * The "stop" arg removes one. + * The "status" arg shows whether the IPv6 address exists + * The "monitor" arg shows whether the IPv6 address can be pinged (ICMPv6 ECHO) + * The "meta_data" arg shows the meta data(XML) + */ + +/* + * ipv6-address: + * + * currently the following forms are legal: + * address + * address/prefix + * + * E.g. + * 3ffe:ffff:0:f101::3 + * 3ffe:ffff:0:f101::3/64 + * + * It should be passed by environment variant: + * OCF_RESKEY_ipv6addr=3ffe:ffff:0:f101::3 + * OCF_RESKEY_cidr_netmask=64 + * OCF_RESKEY_nic=eth0 + * + */ + +/* + * start: + * 1.IPv6addr will choice a proper interface for the new address. + * 2.Then assign the new address to the interface. + * 3.Wait until the new address is available (reply ICMPv6 ECHO packet) + * 4.Send out the unsolicited advertisements. + * + * return 0(OCF_SUCCESS) for success + * return 1(OCF_ERR_GENERIC) for failure + * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) + * + * + * stop: + * remove the address from the inferface. + * + * return 0(OCF_SUCCESS) for success + * return 1(OCF_ERR_GENERIC) for failure + * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) + * + * status: + * return the status of the address. only check whether it exists. + * + * return 0(OCF_SUCCESS) for existing + * return 1(OCF_NOT_RUNNING) for not existing + * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) + * + * + * monitor: + * ping the address by ICMPv6 ECHO request. + * + * return 0(OCF_SUCCESS) for response correctly. + * return 1(OCF_NOT_RUNNING) for no response. + * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include /* for inet_pton */ +#include /* for if_nametoindex */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define PIDFILE_BASE HA_RSCTMPDIR "/IPv6addr-" + +/* +0 No error, action succeeded completely +1 generic or unspecified error (current practice) + The "monitor" operation shall return this for a crashed, hung or + otherwise non-functional resource. +2 invalid or excess argument(s) + Likely error code for validate-all, if the instance parameters + do not validate. Any other action is free to also return this + exit status code for this case. +3 unimplemented feature (for example, "reload") +4 user had insufficient privilege +5 program is not installed +6 program is not configured +7 program is not running +8 resource is running in "master" mode and fully operational +9 resource is in "master" mode but in a failed state +*/ +#define OCF_SUCCESS 0 +#define OCF_ERR_GENERIC 1 +#define OCF_ERR_ARGS 2 +#define OCF_ERR_UNIMPLEMENTED 3 +#define OCF_ERR_PERM 4 +#define OCF_ERR_INSTALLED 5 +#define OCF_ERR_CONFIGURED 6 +#define OCF_NOT_RUNNING 7 + +const char* APP_NAME = "IPv6addr"; + +const char* START_CMD = "start"; +const char* STOP_CMD = "stop"; +const char* STATUS_CMD = "status"; +const char* MONITOR_CMD = "monitor"; +const char* ADVT_CMD = "advt"; +const char* RECOVER_CMD = "recover"; +const char* RELOAD_CMD = "reload"; +const char* META_DATA_CMD = "meta-data"; +const char* VALIDATE_CMD = "validate-all"; + +const int QUERY_COUNT = 5; + +struct in6_ifreq { + struct in6_addr ifr6_addr; + uint32_t ifr6_prefixlen; + unsigned int ifr6_ifindex; +}; + +static int start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); +static int stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); +static int status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); +static int monitor_addr6(struct in6_addr* addr6, int prefix_len); +static int advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); +static int meta_data_addr6(void); + + +static void usage(const char* self); +int write_pid_file(const char *pid_file); +int create_pid_directory(const char *pid_file); +static void byebye(int nsig); + +static char* scan_if(struct in6_addr* addr_target, int* plen_target, + int use_mask, char* prov_ifname); +static char* find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); +static char* get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); +static int assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); +static int unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); +int is_addr6_available(struct in6_addr* addr6); + +int +main(int argc, char* argv[]) +{ + char pid_file[256]; + char* ipv6addr; + char* cidr_netmask; + int ret; + char* cp; + char* prov_ifname = NULL; + int prefix_len = -1; + struct in6_addr addr6; + struct sigaction act; + + /* Check the count of parameters first */ + if (argc < 2) { + usage(argv[0]); + return OCF_ERR_ARGS; + } + + /* set termination signal */ + memset(&act, 0, sizeof(struct sigaction)); + act.sa_flags &= ~SA_RESTART; /* redundant - to stress syscalls should fail */ + act.sa_handler = byebye; + if ((sigemptyset(&act.sa_mask) < 0) || (sigaction(SIGTERM, &act, NULL) < 0)) { + cl_log(LOG_ERR, "Could not set handler for signal: %s", strerror(errno)); + return OCF_ERR_GENERIC; + } + + /* open system log */ + cl_log_set_entity(APP_NAME); + cl_log_set_facility(LOG_DAEMON); + + /* the meta-data dont need any parameter */ + if (0 == strncmp(META_DATA_CMD, argv[1], strlen(META_DATA_CMD))) { + ret = meta_data_addr6(); + return OCF_SUCCESS; + } + + /* check the OCF_RESKEY_ipv6addr parameter, should be an IPv6 address */ + ipv6addr = getenv("OCF_RESKEY_ipv6addr"); + + if (ipv6addr == NULL) { + cl_log(LOG_ERR, "Please set OCF_RESKEY_ipv6addr to the IPv6 address you want to manage."); + usage(argv[0]); + return OCF_ERR_ARGS; + } + + /* legacy option */ + if ((cp = strchr(ipv6addr, '/'))) { + prefix_len = atol(cp + 1); + if ((prefix_len < 0) || (prefix_len > 128)) { + cl_log(LOG_ERR, "Invalid prefix_len [%s], should be an integer in [0, 128]", cp+1); + usage(argv[0]); + return OCF_ERR_ARGS; + } + *cp=0; + } + + /* get provided netmask (optional) */ + cidr_netmask = getenv("OCF_RESKEY_cidr_netmask"); + + if (cidr_netmask != NULL) { + if ((atol(cidr_netmask) < 0) || (atol(cidr_netmask) > 128)) { + cl_log(LOG_ERR, "Invalid prefix_len [%s], " + "should be an integer in [0, 128]", cidr_netmask); + usage(argv[0]); + return OCF_ERR_ARGS; + } + if (prefix_len != -1 && prefix_len != atol(cidr_netmask)) { + cl_log(LOG_DEBUG, "prefix_len(%d) is overwritted by cidr_netmask(%s)", + prefix_len, cidr_netmask); + } + prefix_len = atol(cidr_netmask); + + } else if (prefix_len == -1) { + prefix_len = 0; + } + + /* get provided interface name (optional) */ + prov_ifname = getenv("OCF_RESKEY_nic"); + + if (inet_pton(AF_INET6, ipv6addr, &addr6) <= 0) { + cl_log(LOG_ERR, "Invalid IPv6 address [%s]", ipv6addr); + usage(argv[0]); + return OCF_ERR_ARGS; + } + + /* Check whether this system supports IPv6 */ + if (access(IF_INET6, R_OK)) { + cl_log(LOG_ERR, "No support for INET6 on this system."); + return OCF_ERR_GENERIC; + } + + /* create the pid file so we can make sure that only one IPv6addr + * for this address is running + */ + if (snprintf(pid_file, sizeof(pid_file), "%s%s", PIDFILE_BASE, ipv6addr) + >= (int)sizeof(pid_file)) { + cl_log(LOG_ERR, "Pid file truncated"); + return OCF_ERR_GENERIC; + } + + if (write_pid_file(pid_file) < 0) { + return OCF_ERR_GENERIC; + } + + + /* switch the command */ + if (0 == strncmp(START_CMD,argv[1], strlen(START_CMD))) { + ret = start_addr6(&addr6, prefix_len, prov_ifname); + }else if (0 == strncmp(STOP_CMD,argv[1], strlen(STOP_CMD))) { + ret = stop_addr6(&addr6, prefix_len, prov_ifname); + }else if (0 == strncmp(STATUS_CMD,argv[1], strlen(STATUS_CMD))) { + ret = status_addr6(&addr6, prefix_len, prov_ifname); + }else if (0 ==strncmp(MONITOR_CMD,argv[1], strlen(MONITOR_CMD))) { + ret = monitor_addr6(&addr6, prefix_len); + }else if (0 ==strncmp(RELOAD_CMD,argv[1], strlen(RELOAD_CMD))) { + ret = OCF_ERR_UNIMPLEMENTED; + }else if (0 ==strncmp(RECOVER_CMD,argv[1], strlen(RECOVER_CMD))) { + ret = OCF_ERR_UNIMPLEMENTED; + }else if (0 ==strncmp(VALIDATE_CMD,argv[1], strlen(VALIDATE_CMD))) { + /* ipv6addr has been validated by inet_pton, hence a valid IPv6 address */ + ret = OCF_SUCCESS; + }else if (0 ==strncmp(ADVT_CMD,argv[1], strlen(MONITOR_CMD))) { + ret = advt_addr6(&addr6, prefix_len, prov_ifname); + }else{ + usage(argv[0]); + ret = OCF_ERR_ARGS; + } + + /* release the pid file */ + unlink(pid_file); + + return ret; +} +int +start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) +{ + int i; + char* if_name; + if(OCF_SUCCESS == status_addr6(addr6,prefix_len,prov_ifname)) { + return OCF_SUCCESS; + } + + /* we need to find a proper device to assign the address */ + if_name = find_if(addr6, &prefix_len, prov_ifname); + if (NULL == if_name) { + cl_log(LOG_ERR, "no valid mechanisms"); + return OCF_ERR_GENERIC; + } + + /* Assign the address */ + if (0 != assign_addr6(addr6, prefix_len, if_name)) { + cl_log(LOG_ERR, "failed to assign the address to %s", if_name); + return OCF_ERR_GENERIC; + } + + /* Check whether the address available */ + for (i = 0; i < QUERY_COUNT; i++) { + if (0 == is_addr6_available(addr6)) { + break; + } + sleep(1); + } + if (i == QUERY_COUNT) { + cl_log(LOG_ERR, "failed to ping the address"); + return OCF_ERR_GENERIC; + } + + /* Send unsolicited advertisement packet to neighbor */ + for (i = 0; i < UA_REPEAT_COUNT; i++) { + send_ua(addr6, if_name); + sleep(1); + } + return OCF_SUCCESS; +} + +int +advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) +{ + /* First, we need to find a proper device to assign the address */ + char* if_name = get_if(addr6, &prefix_len, prov_ifname); + int i; + if (NULL == if_name) { + cl_log(LOG_ERR, "no valid mechanisms"); + return OCF_ERR_GENERIC; + } + /* Send unsolicited advertisement packet to neighbor */ + for (i = 0; i < UA_REPEAT_COUNT; i++) { + send_ua(addr6, if_name); + sleep(1); + } + return OCF_SUCCESS; +} + +int +stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) +{ + char* if_name; + if(OCF_NOT_RUNNING == status_addr6(addr6,prefix_len,prov_ifname)) { + return OCF_SUCCESS; + } + + if_name = get_if(addr6, &prefix_len, prov_ifname); + + if (NULL == if_name) { + cl_log(LOG_ERR, "no valid mechanisms."); + /* I think this should be a success exit according to LSB. */ + return OCF_ERR_GENERIC; + } + + /* Unassign the address */ + if (0 != unassign_addr6(addr6, prefix_len, if_name)) { + cl_log(LOG_ERR, "failed to assign the address to %s", if_name); + return OCF_ERR_GENERIC; + } + + return OCF_SUCCESS; +} + +int +status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) +{ + char* if_name = get_if(addr6, &prefix_len, prov_ifname); + if (NULL == if_name) { + return OCF_NOT_RUNNING; + } + return OCF_SUCCESS; +} + +int +monitor_addr6(struct in6_addr* addr6, int prefix_len) +{ + if(0 == is_addr6_available(addr6)) { + return OCF_SUCCESS; + } + return OCF_NOT_RUNNING; +} + +/* find the network interface associated with an address */ +char* +scan_if(struct in6_addr* addr_target, int* plen_target, int use_mask, char* prov_ifname) +{ + FILE *f; + static char devname[21]=""; + struct in6_addr addr; + struct in6_addr mask; + unsigned int plen, scope, dad_status, if_idx; + unsigned int addr6p[4]; + + /* open /proc/net/if_inet6 file */ + if ((f = fopen(IF_INET6, "r")) == NULL) { + return NULL; + } + + /* Loop for each entry */ + while (1) { + int i; + int n; + int s; + gboolean same = TRUE; + + i = fscanf(f, "%08x%08x%08x%08x %x %02x %02x %02x %20s\n", + &addr6p[0], &addr6p[1], &addr6p[2], &addr6p[3], + &if_idx, &plen, &scope, &dad_status, devname); + if (i == EOF) { + break; + } + else if (i != 9) { + cl_log(LOG_INFO, "Error parsing %s, " + "perhaps the format has changed\n", IF_INET6); + break; + } + + /* Consider link-local addresses (scope == 0x20) only when + * the inerface name is provided, and global addresses + * (scope == 0). Skip everything else. + */ + if (scope != 0) { + if (scope != 0x20 || prov_ifname == 0 + || *prov_ifname == 0) + continue; + } + + /* If specified prefix, only same prefix entry + * would be considered. + */ + if (*plen_target!=0 && plen != *plen_target) { + continue; + } + + /* If interface name provided, only same devname entry + * would be considered + */ + if (prov_ifname!=0 && *prov_ifname!=0) + { + if (strcmp(devname, prov_ifname)) + continue; + } + + for (i = 0; i< 4; i++) { + addr.s6_addr32[i] = htonl(addr6p[i]); + } + + /* Make the mask based on prefix length */ + memset(mask.s6_addr, 0xff, 16); + if (use_mask && plen < 128) { + n = plen / 32; + memset(mask.s6_addr32 + n + 1, 0, (3 - n) * 4); + s = 32 - plen % 32; + if (s == 32) + mask.s6_addr32[n] = 0x0; + else + mask.s6_addr32[n] = 0xffffffff << s; + mask.s6_addr32[n] = htonl(mask.s6_addr32[n]); + } + + /* compare addr and addr_target */ + same = TRUE; + for (i = 0; i < 4; i++) { + if ((addr.s6_addr32[i]&mask.s6_addr32[i]) != + (addr_target->s6_addr32[i]&mask.s6_addr32[i])) { + same = FALSE; + break; + } + } + + /* We found it! */ + if (same) { + fclose(f); + *plen_target = plen; + return devname; + } + } + fclose(f); + return NULL; +} +/* find a proper network interface to assign the address */ +char* +find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) +{ + char *best_ifname = scan_if(addr_target, plen_target, 1, prov_ifname); + + /* use the provided ifname and prefix if the address did not match */ + if (best_ifname == NULL && + prov_ifname != 0 && *prov_ifname != 0 && *plen_target != 0) { + cl_log(LOG_INFO, "Could not find a proper interface by the ipv6addr. Using the specified nic:'%s' and cidr_netmask:'%d'", prov_ifname, *plen_target); + return prov_ifname; + } + return best_ifname; +} +/* get the device name and the plen_target of a special address */ +char* +get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) +{ + return scan_if(addr_target, plen_target, 0, prov_ifname); +} +int +assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) +{ + struct in6_ifreq ifr6; + + /* Get socket first */ + int fd; + struct ifreq ifr; + + fd = socket(AF_INET6, SOCK_DGRAM, 0); + if (fd < 0) { + return 1; + } + + /* Query the index of the if */ + strcpy(ifr.ifr_name, if_name); + if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { + return -1; + } + + /* Assign the address to the if */ + ifr6.ifr6_addr = *addr6; + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = prefix_len; + if (ioctl(fd, SIOCSIFADDR, &ifr6) < 0) { + return -1; + } + close (fd); + return 0; +} +int +unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) +{ + int fd; + struct ifreq ifr; + struct in6_ifreq ifr6; + + /* Get socket first */ + fd = socket(AF_INET6, SOCK_DGRAM, 0); + if (fd < 0) { + return 1; + } + + /* Query the index of the if */ + strcpy(ifr.ifr_name, if_name); + if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { + return -1; + } + + /* Unassign the address to the if */ + ifr6.ifr6_addr = *addr6; + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = prefix_len; + if (ioctl(fd, SIOCDIFADDR, &ifr6) < 0) { + return -1; + } + + close (fd); + return 0; +} + +#define MINPACKSIZE 64 +int +is_addr6_available(struct in6_addr* addr6) +{ + struct sockaddr_in6 addr; + struct icmp6_hdr icmph; + u_char outpack[MINPACKSIZE]; + int icmp_sock; + int ret; + struct iovec iov; + u_char packet[MINPACKSIZE]; + struct msghdr msg; + int i; + struct pollfd pfd; + + if ((icmp_sock = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1) { + return -1; + } + + memset(&icmph, 0, sizeof(icmph)); + icmph.icmp6_type = ICMP6_ECHO_REQUEST; + icmph.icmp6_code = 0; + icmph.icmp6_cksum = 0; + icmph.icmp6_seq = htons(0); + icmph.icmp6_id = 0; + + memset(&outpack, 0, sizeof(outpack)); + memcpy(&outpack, &icmph, sizeof(icmph)); + + memset(&addr, 0, sizeof(struct sockaddr_in6)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(IPPROTO_ICMPV6); + memcpy(&addr.sin6_addr,addr6,sizeof(struct in6_addr)); + + /* Only the first 8 bytes of outpack are meaningful... */ + ret = sendto(icmp_sock, (char *)outpack, sizeof(outpack), 0, + (struct sockaddr *) &addr, + sizeof(struct sockaddr_in6)); + if (0 >= ret) { + return -1; + } + + iov.iov_base = (char *)packet; + iov.iov_len = sizeof(packet); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + + for (i = 0; i < 3; i++) { + pfd.fd = icmp_sock; + pfd.events = POLLIN; + pfd.revents = 0; + ret = poll(&pfd, 1, 10); + + if (ret < 1) + continue; + + ret = recvmsg(icmp_sock, &msg, MSG_DONTWAIT); + if (ret > 0) + return 0; + if (ret == 0) + break; + + if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) + break; + } + + return -1; +} + +static void usage(const char* self) +{ + printf("usage: %s {start|stop|status|monitor|validate-all|meta-data}\n",self); + return; +} + +/* Following code is copied from send_arp.c, linux-HA project. */ +void +byebye(int nsig) +{ + (void)nsig; + /* Avoid an "error exit" log message if we're killed */ + exit(0); +} + +int +create_pid_directory(const char *pid_file) +{ + int status; + int return_status = -1; + struct stat stat_buf; + char* dir; + + dir = strdup(pid_file); + if (!dir) { + cl_log(LOG_INFO, "Memory allocation failure: %s", + strerror(errno)); + return -1; + } + + dirname(dir); + + status = stat(dir, &stat_buf); + + if (status < 0 && errno != ENOENT && errno != ENOTDIR) { + cl_log(LOG_INFO, "Could not stat pid-file directory " + "[%s]: %s", dir, strerror(errno)); + goto err; + } + + if (!status) { + if (S_ISDIR(stat_buf.st_mode)) { + goto out; + } + cl_log(LOG_INFO, "Pid-File directory exists but is " + "not a directory [%s]", dir); + goto err; + } + + if (mkdir(dir, S_IRUSR|S_IWUSR|S_IXUSR | S_IRGRP|S_IXGRP) < 0) { + cl_log(LOG_INFO, "Could not create pid-file directory " + "[%s]: %s", dir, strerror(errno)); + goto err; + } + +out: + return_status = 0; +err: + free(dir); + return return_status; +} + +int +write_pid_file(const char *pid_file) +{ + + int pidfilefd; + char pidbuf[11]; + unsigned long pid; + ssize_t bytes; + + if (*pid_file != '/') { + cl_log(LOG_INFO, "Invalid pid-file name, must begin with a " + "'/' [%s]\n", pid_file); + return -1; + } + + if (create_pid_directory(pid_file) < 0) { + return -1; + } + + while (1) { + pidfilefd = open(pid_file, O_CREAT|O_EXCL|O_RDWR, + S_IRUSR|S_IWUSR); + if (pidfilefd < 0) { + if (errno != EEXIST) { /* Old PID file */ + cl_log(LOG_INFO, "Could not open pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + } + else { + break; + } + + pidfilefd = open(pid_file, O_RDONLY, S_IRUSR|S_IWUSR); + if (pidfilefd < 0) { + cl_log(LOG_INFO, "Could not open pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + + while (1) { + bytes = read(pidfilefd, pidbuf, sizeof(pidbuf)-1); + if (bytes < 0) { + if (errno == EINTR) { + continue; + } + cl_log(LOG_INFO, "Could not read pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + pidbuf[bytes] = '\0'; + break; + } + + if(unlink(pid_file) < 0) { + cl_log(LOG_INFO, "Could not delete pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + + if (!bytes) { + cl_log(LOG_INFO, "Invalid pid in pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + + close(pidfilefd); + + pid = strtoul(pidbuf, NULL, 10); + if (pid == ULONG_MAX && errno == ERANGE) { + cl_log(LOG_INFO, "Invalid pid in pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + + if (kill(pid, SIGKILL) < 0 && errno != ESRCH) { + cl_log(LOG_INFO, "Error killing old process [%lu] " + "from pid-file [%s]: %s", pid, + pid_file, strerror(errno)); + return -1; + } + + cl_log(LOG_INFO, "Killed old send_ua process [%lu]", pid); + } + + if (snprintf(pidbuf, sizeof(pidbuf), "%u" + , getpid()) >= (int)sizeof(pidbuf)) { + cl_log(LOG_INFO, "Pid too long for buffer [%u]", getpid()); + return -1; + } + + while (1) { + bytes = write(pidfilefd, pidbuf, strlen(pidbuf)); + if (bytes != strlen(pidbuf)) { + if (bytes < 0 && errno == EINTR) { + continue; + } + cl_log(LOG_INFO, "Could not write pid-file " + "[%s]: %s", pid_file, + strerror(errno)); + return -1; + } + break; + } + + close(pidfilefd); + + return 0; +} +static int +meta_data_addr6(void) +{ + const char* meta_data= + "\n" + "\n" + "\n" + " 1.0\n" + " \n" + " This script manages IPv6 alias IPv6 addresses,It can add an IP6\n" + " alias, or remove one.\n" + " \n" + " Manages IPv6 aliases\n" + " \n" + " \n" + " \n" + " The IPv6 address this RA will manage \n" + " \n" + " IPv6 address\n" + " \n" + " \n" + " \n" + " \n" + " The netmask for the interface in CIDR format. (ie, 24).\n" + " The value of this parameter overwrites the value of _prefix_\n" + " of ipv6addr parameter.\n" + " \n" + " Netmask\n" + " \n" + " \n" + " \n" + " \n" + " The base network interface on which the IPv6 address will\n" + " be brought online.\n" + " \n" + " Network interface\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n"; + printf("%s\n",meta_data); + return OCF_SUCCESS; +} diff --git a/heartbeat/IPv6addr_utils.c b/heartbeat/IPv6addr_utils.c new file mode 100644 index 0000000..7672b70 --- /dev/null +++ b/heartbeat/IPv6addr_utils.c @@ -0,0 +1,147 @@ + +/* + * This program manages IPv6 address with OCF Resource Agent standard. + * + * Author: Huang Zhen + * Copyright (c) 2004 International Business Machines + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include + +#include +#include +#include +#include +#include +#include /* for inet_pton */ +#include /* for if_nametoindex */ +#include +#include +#include +#include + +/* Send an unsolicited advertisement packet + * Please refer to rfc4861 / rfc3542 + */ +int +send_ua(struct in6_addr* src_ip, char* if_name) +{ + int status = -1; + int fd; + + int ifindex; + int hop; + struct ifreq ifr; + u_int8_t *payload = NULL; + int payload_size; + struct nd_neighbor_advert *na; + struct nd_opt_hdr *opt; + struct sockaddr_in6 src_sin6; + struct sockaddr_in6 dst_sin6; + + if ((fd = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1) { + printf("ERROR: socket(IPPROTO_ICMPV6) failed: %s", + strerror(errno)); + return status; + } + /* set the outgoing interface */ + ifindex = if_nametoindex(if_name); + if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_IF, + &ifindex, sizeof(ifindex)) < 0) { + printf("ERROR: setsockopt(IPV6_MULTICAST_IF) failed: %s", + strerror(errno)); + goto err; + } + /* set the hop limit */ + hop = 255; /* 255 is required. see rfc4861 7.1.2 */ + if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, + &hop, sizeof(hop)) < 0) { + printf("ERROR: setsockopt(IPV6_MULTICAST_HOPS) failed: %s", + strerror(errno)); + goto err; + } + + /* set the source address */ + memset(&src_sin6, 0, sizeof(src_sin6)); + src_sin6.sin6_family = AF_INET6; + src_sin6.sin6_addr = *src_ip; + src_sin6.sin6_port = 0; + if (IN6_IS_ADDR_LINKLOCAL(&src_sin6.sin6_addr) || + IN6_IS_ADDR_MC_LINKLOCAL(&src_sin6.sin6_addr)) { + src_sin6.sin6_scope_id = ifindex; + } + + if (bind(fd, (struct sockaddr *)&src_sin6, sizeof(src_sin6)) < 0) { + printf("ERROR: bind() failed: %s", strerror(errno)); + goto err; + } + + + /* get the hardware address */ + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name) - 1); + if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { + printf("ERROR: ioctl(SIOCGIFHWADDR) failed: %s", strerror(errno)); + goto err; + } + + /* build a neighbor advertisement message */ + payload_size = sizeof(struct nd_neighbor_advert) + + sizeof(struct nd_opt_hdr) + HWADDR_LEN; + payload = memalign(sysconf(_SC_PAGESIZE), payload_size); + if (!payload) { + printf("ERROR: malloc for payload failed"); + goto err; + } + memset(payload, 0, payload_size); + + /* Ugly typecast from ia64 hell! */ + na = (struct nd_neighbor_advert *)((void *)payload); + na->nd_na_type = ND_NEIGHBOR_ADVERT; + na->nd_na_code = 0; + na->nd_na_cksum = 0; /* calculated by kernel */ + na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE; + na->nd_na_target = *src_ip; + + /* options field; set the target link-layer address */ + opt = (struct nd_opt_hdr *)(payload + sizeof(struct nd_neighbor_advert)); + opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + opt->nd_opt_len = 1; /* The length of the option in units of 8 octets */ + memcpy(payload + sizeof(struct nd_neighbor_advert) + + sizeof(struct nd_opt_hdr), + &ifr.ifr_hwaddr.sa_data, HWADDR_LEN); + + /* sending an unsolicited neighbor advertisement to all */ + memset(&dst_sin6, 0, sizeof(dst_sin6)); + dst_sin6.sin6_family = AF_INET6; + inet_pton(AF_INET6, BCAST_ADDR, &dst_sin6.sin6_addr); /* should not fail */ + + if (sendto(fd, payload, payload_size, 0, + (struct sockaddr *)&dst_sin6, sizeof(dst_sin6)) + != payload_size) { + printf("ERROR: sendto(%s) failed: %s", + if_name, strerror(errno)); + goto err; + } + + status = 0; + +err: + close(fd); + free(payload); + return status; +} diff --git a/heartbeat/LVM b/heartbeat/LVM new file mode 100755 index 0000000..b587bd8 --- /dev/null +++ b/heartbeat/LVM @@ -0,0 +1,470 @@ +#!/bin/sh +# +# +# LVM +# +# Description: Manages an LVM volume as an HA resource +# +# +# Author: Alan Robertson +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: (C) 2002 - 2005 International Business Machines, Inc. +# +# This code significantly inspired by the LVM resource +# in FailSafe by Lars Marowsky-Bree +# +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname +# +# See usage() function below for more details... +# +# OCF parameters are as below: +# OCF_RESKEY_volgrpname +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_volgrpname_default="" +OCF_RESKEY_exclusive_default="false" +OCF_RESKEY_tag_default="pacemaker" +OCF_RESKEY_partial_activation_default="false" + +: ${OCF_RESKEY_volgrpname=${OCF_RESKEY_volgrpname_default}} +: ${OCF_RESKEY_exclusive=${OCF_RESKEY_exclusive_default}} +: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}} +: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}} + +####################################################################### + + +usage() { + methods=`LVM_methods` + methods=`echo $methods | tr ' ' '|'` + cat < + + +1.0 + + +Resource script for LVM. It manages an Linux Volume Manager volume (LVM) +as an HA resource. + +Controls the availability of an LVM Volume Group + + + + +The name of volume group. + +Volume group name + + + + +If set, the volume group will be activated exclusively. This option works one of +two ways. If the volume group has the cluster attribute set, then the volume group +will be activated exclusively using clvmd across the cluster. If the cluster attribute +is not set, the volume group will be activated exclusively using a tag and the volume_list +filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This +can be as simple as setting 'volume_list = []' depending on your setup. + +Exclusive activation + + + + + +If "exclusive" is set on a non clustered volume group, this overrides the tag to be used. + +Exclusive activation tag + + + + + +If set, the volume group will be activated partially even with some +physical volumes missing. It helps to set to true when using mirrored +logical volumes. + +Activate VG partially when missing PVs + + + + + + + + + + + + + + + +EOF +} + +# +# methods: What methods/operations do we support? +# +LVM_methods() { + cat < /dev/null 2>&1; then + ocf_log info "Volume group $vg not found" + return $OCF_SUCCESS + fi + + ocf_log info "Deactivating volume group $vg" + + lvm_pre_deactivate || exit + + for i in $(seq 10) + do + ocf_run vgchange $vgchange_deactivate_options $vg + res=$? + if LVM_status $vg; then + ocf_exit_reason "LVM: $vg did not stop correctly" + res=1 + fi + + if [ $res -eq 0 ]; then + break + fi + + res=$OCF_ERR_GENERIC + ocf_log warn "$vg still Active" + ocf_log info "Retry deactivating volume group $vg" + sleep 1 + which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5 + done + + lvm_post_deactivate $res +} + +# +# Check whether the OCF instance parameters are valid +# +LVM_validate_all() { + check_binary $AWK + + ## + # lvmetad is a daemon that caches lvm metadata to improve the + # performance of LVM commands. This daemon should never be used when + # volume groups exist that are being managed by the cluster. The lvmetad + # daemon introduces a response lag, where certain LVM commands look like + # they have completed (like vg activation) when in fact the command + # is still in progress by the lvmetad. This can cause reliability issues + # when managing volume groups in the cluster. For Example, if you have a + # volume group that is a dependency for another application, it is possible + # the cluster will think the volume group is activated and attempt to start + # the application before volume group is really accesible... lvmetad is bad. + ## + lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1 + if [ $? -eq 0 ]; then + # for now warn users that lvmetad is enabled and that they should disable it. In the + # future we may want to consider refusing to start, or killing the lvmetad daemon. + ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process" + fi + + ## + # Off-the-shelf tests... + ## + VGOUT=`vgck ${VOLUME} 2>&1` + if [ $? -ne 0 ]; then + # Inconsistency might be due to missing physical volumes, which doesn't + # automatically mean we should fail. If partial_activation=true then + # we should let start try to handle it, or if no PVs are listed as + # "unknown device" then another node may have marked a device missing + # where we have access to all of them and can start without issue. + if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then + case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in + ???p??*) + if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then + # We are missing devices and cannot activate partially + ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially" + exit $OCF_ERR_GENERIC + else + # We are missing devices but are allowed to activate partially. + # Assume that caused the vgck failure and carry on + ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action." + fi + ;; + esac + # else the vg is partial but all devices are accounted for, so another + # node must have marked the device missing. Proceed. + else + # vgck failure was for something other than missing devices + ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" + exit $OCF_ERR_GENERIC + fi + fi + + ## + # Does the Volume Group exist? + ## + if [ "$LVM_MAJOR" = "1" ]; then + VGOUT=`vgdisplay ${VOLUME} 2>&1` + else + VGOUT=`vgdisplay -v ${VOLUME} 2>&1` + fi + if [ $? -ne 0 ]; then + ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" + exit $OCF_ERR_GENERIC + fi + + if lvs --noheadings -o segtype | grep -q "cache"; then + if ! lvs --noheadings -o cache_mode "$OCF_RESKEY_volgrpname" | grep -q "writethrough"; then + ocf_log warn "LVM CACHE IS NOT IN WRITETHROUGH MODE. THIS IS NOT A SUPPORTED CONFIGURATION." + fi + fi + + if ocf_is_clone && ocf_is_true "$OCF_RESKEY_exclusive"; then + ocf_exit_reason "cloned lvm resources can not be activated exclusively" + exit $OCF_ERR_CONFIGURED + fi + + lvm_validate_all +} + +# +# 'main' starts here... +# + +if + [ $# -ne 1 ] +then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + meta-data) meta_data + exit $OCF_SUCCESS;; + + methods) LVM_methods + exit $?;; + + usage) usage + exit $OCF_SUCCESS;; + *) ;; +esac + +if + [ -z "$OCF_RESKEY_volgrpname" ] +then + ocf_exit_reason "You must identify the volume group name!" + exit $OCF_ERR_CONFIGURED +fi + +# Get the LVM version number, for this to work we assume(thanks to panjiam): +# +# LVM1 outputs like this +# +# # vgchange --version +# vgchange: Logical Volume Manager 1.0.3 +# Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) +# +# LVM2 and higher versions output in this format +# +# # vgchange --version +# LVM version: 2.00.15 (2004-04-19) +# Library version: 1.00.09-ioctl (2004-03-31) +# Driver version: 4.1.0 + +LVM_VERSION=`vgchange --version 2>&1 | \ + $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } + /LVM version:/ {printf $3"\n"; exit;}'` +rc=$? + +if + ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) +then + ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" + exit $OCF_ERR_INSTALLED +fi +LVM_MAJOR="${LVM_VERSION%%.*}" + +VOLUME=$OCF_RESKEY_volgrpname +OP_METHOD=$1 + +set_lvm_mode +lvm_init +if ocf_is_true "$OCF_RESKEY_partial_activation" ; then + vgchange_activate_options="${vgchange_activate_options} --partial" +fi + +# What kind of method was invoked? +case "$1" in + + start) + LVM_validate_all + LVM_start $VOLUME + exit $?;; + + stop) LVM_stop $VOLUME + exit $?;; + + status) LVM_status $VOLUME $1 + exit $?;; + + monitor) LVM_status $VOLUME + exit $?;; + + validate-all) LVM_validate_all + ;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate new file mode 100755 index 0000000..f6f24a3 --- /dev/null +++ b/heartbeat/LVM-activate @@ -0,0 +1,997 @@ +#!/bin/sh +# +# +# Copyright (c) 2017 SUSE LINUX, Eric Ren +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# LVM-activate OCF Resource Agent: +# +# Logical volume manager (LVM) provides new features for cluster enviroment: +# lvmlockd and system ID, which aims to replace clvmd and tagged-exclusive +# activation. Accordingly, we have created a new resource agent named "lvmlockd" +# to manage lvmlockd daemon. In addition, this new resource agent "LVM-activate" +# is created to take care of LVM activation/deactivation work. This agent supports +# the new features: lvmlockd and system ID, and also supports the old features: +# clvmd and lvm tag. +# +# Thanks David Teigland! He is the author of these LVM features, giving valuable +# idea/feedback about this resource agent. +############################################################################ + +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_vgname_default="" +OCF_RESKEY_lvname_default="" +OCF_RESKEY_vg_access_mode_default="" +OCF_RESKEY_activation_mode_default="exclusive" +OCF_RESKEY_tag_default="pacemaker" +OCF_RESKEY_partial_activation_default="false" +OCF_RESKEY_degraded_activation_default="false" +OCF_RESKEY_majority_pvs_default="false" + +: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}} +: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}} +: ${OCF_RESKEY_vg_access_mode=${OCF_RESKEY_vg_access_mode_default}} +: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}} +: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}} +: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}} +: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}} +: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}} + +# If LV is given, only activate this named LV; otherwise, activate all +# LVs in the named VG. +VG=${OCF_RESKEY_vgname} +LV=${OCF_RESKEY_lvname} + +# How LVM controls access to the VG: +# +# 0: place-holder for any incorrect cases; To be safe, we enforce the VG +# must use any of the following protection methods in cluster environment. +# 1: vg is shared - lvmlockd (new) +# 2: vg is clustered - clvmd (old) +# 3: vg has system_id (new) +# 4: vg has tagging (old) +VG_access_mode=${OCF_RESKEY_vg_access_mode} +VG_access_mode_num=0 + +# Activate LV(s) with "shared" lock for cluster fs +# or "exclusive" lock for local fs +LV_activation_mode=${OCF_RESKEY_activation_mode} + +# For system ID feature +SYSTEM_ID="" + +# For tagging activation mode +OUR_TAG=${OCF_RESKEY_tag} + +####################################################################### + +meta_data() { + cat < + + + + +1.0 + + +This agent manages LVM activation/deactivation work for a given volume group. + +It supports the following modes, controlled by the vg_access_mode parameter: + +* lvmlockd +* system_id +* clvmd +* tagging + +Notes: + +1. There are two possible configuration combinations: lvmlockd+LVM-activate and +clvm+LVM-activate. However, it is not possible to use both at the same time! + +2. Put all "lvmlockd"/"clvmd" volume groups into auto management by the agent +if using the cluster to manage at least one of them. If you manage some manually, +the stop action of the lvmlockd agent may fail and the node may get fenced, +because some DLM lockspaces might be in use and cannot be closed automatically. + +3. The autoactivation property of volume group will be disabled when vg_access_mode +is set to system_id. + +Option: OCF_CHECK_LEVEL + +The standard monitor operation of depth 0 checks if the VG or LV is valid. +If you want deeper tests, set OCF_CHECK_LEVEL to 10: + + 10: read first 1 byte of the underlying device (raw read) + +If there are many underlying devs in VG, it will only read one of the devs. +This is not perfect solution for detecting underlying devices livable. +e.g. iscsi SAN IO timeout will return EIO, and it makes monitor failed. + + +This agent activates/deactivates logical volumes. + + + + +The volume group name. + +The volume group name + + + + + +If set, only the specified LV will be activated. + +Only activate the given LV + + + + + +This option decides which solution will be used to protect the volume group in +cluster environment. Optional solutions are: lvmlockd, clvmd, system_id and +tagging. + +The VG access mode + + + + + +The activation mode decides the visibility of logical volumes in the cluster. There +are two different modes: "shared" for cluster filesystem and "exclusive" for local +filesystem. With "shared", an LV can be activated concurrently from multiple nodes. +With "exclusive", an LV can be activated by one node at a time. + +This option only has effect on "lvmlockd"/"clvmd" vg_access_mode. For "system_id" +and "tagging", they always mean exclusive activation. + +Logical volume activation mode + + + + + +The tag used for tagging activation mode. + +The tag used for tagging activation mode + + + + + +If set, the volume group will be activated partially even with some +physical volumes missing. It helps to set to true when using mirrored +logical volumes. + +Activate VG partially when missing PVs + + + + + +Activate RAID LVs using the "degraded" activation mode. This allows RAID +LVs to be activated with missing PVs if all data can be provided with +RAID redundancy. The RAID level determines the number of PVs that are +required for degraded activation to succeed. If fewer PVs are available, +then degraded activation will fail. Also enable majority_pvs. + +Activate RAID LVs in degraded mode when missing PVs + + + + + +If set, the VG system ID can be reassigned to a new host if a majority +of PVs in the VG are present. Otherwise, VG failover with system ID +will fail when the VG is missing PVs. Also enable degraded_activation +when RAID LVs are used. + +Allow changing the system ID of a VG with a majority of PVs + + + + + + + + + + + + + +END +} + +####################################################################### + +usage() { + cat </dev/null | tr -d \') + export ${kvs} + vg_locktype=${LVM2_VG_LOCK_TYPE} + vg_clustered=${LVM2_VG_CLUSTERED} + vg_systemid=${LVM2_VG_SYSTEMID} + vg_tags=${LVM2_VG_TAGS} + + # We know this VG is using lvmlockd if the lock type is dlm. + if [ "$vg_locktype" = "dlm" ]; then + access_mode=1 + elif [ "$vg_clustered" = "clustered" ]; then + access_mode=2 + elif [ -n "$vg_systemid" ]; then + SYSTEM_ID=$(lvm systemid 2>/dev/null | cut -d':' -f2 | tr -d '[:blank:]') + access_mode=3 + elif [ -n "$vg_tags" ]; then + # TODO: + # We don't have reliable way to test if tagging activation is used. + access_mode=4 + else + access_mode=0 + fi + + return $access_mode +} + +# TODO: All tagging activation code is almost copied from LVM RA!!! +# But, the old LVM RA just uses the ordinary tags, not the "hosttag" feature +# which may be a better method for active-inactive cluster scenario. +# +# We have two choice: +# 1. Continue to use the LVM way, which may work well on old system. +# 2. Change to use the real hosttag feature, but it looks very same +# to systemID. +# Anyway, we can easily change this if anyone requests with good reasons. + +# does this vg have our tag +check_tags() +{ + owner=$(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') + + if [ -z "$owner" ]; then + # No-one owns this VG yet + return 1 + fi + + if [ "$OUR_TAG" = "$owner" ]; then + # yep, this is ours + return 0 + fi + + # some other tag is set on this vg + return 2 +} + +strip_tags() +{ + for tag in $(vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g); do + ocf_log info "Stripping tag, $tag" + + # LVM version 2.02.98 allows changing tags if PARTIAL + vgchange --deltag "$tag" ${VG} + done + + if [ ! -z $(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') ]; then + ocf_exit_reason "Failed to remove ownership tags from ${VG}" + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +set_tags() +{ + case check_tags in + 0) + # we already own it. + return $OCF_SUCCESS + ;; + 2) + # other tags are set, strip them before setting + if ! strip_tags; then + return $OCF_ERR_GENERIC + fi + ;; + *) + : ;; + esac + + if ! vgchange --addtag $OUR_TAG ${VG} ; then + ocf_exit_reason "Failed to add ownership tag to ${VG}" + return $OCF_ERR_GENERIC + fi + + ocf_log info "New tag \"${OUR_TAG}\" added to ${VG}" + return $OCF_SUCCESS +} + +# Parameters: +# 1st: config item name +# 2nd: expected config item value +config_verify() +{ + name=$1 + expect=$2 + + real=$(lvmconfig "$name" | cut -d'=' -f2) + if [ "$real" != "$expect" ]; then + ocf_exit_reason "config item $name: expect=$expect but real=$real" + exit $OCF_ERR_ARGS + fi + + return $OCF_SUCCESS +} + +lvmlockd_check() +{ + config_verify "global/use_lvmlockd" "1" + + # locking_type was removed from config in v2.03 + ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03" + if [ "$?" -eq 0 ]; then + config_verify "global/locking_type" "1" + fi + + # We recommend to activate one LV at a time so that this specific volume + # binds to a proper filesystem to protect the data + # TODO: + # Will this warn message be too noisy? + if [ -z "$LV" ]; then + ocf_log warn "You are recommended to activate one LV at a time or use exclusive activation mode." + fi + + # Good: lvmlockd is running, and clvmd is not running + if ! pgrep lvmlockd >/dev/null 2>&1 ; then + if ocf_is_probe; then + ocf_log info "initial probe: lvmlockd is not running yet." + exit $OCF_NOT_RUNNING + fi + + ocf_exit_reason "lvmlockd daemon is not running!" + exit $OCF_ERR_GENERIC + fi + + if pgrep clvmd >/dev/null 2>&1 ; then + ocf_exit_reason "clvmd daemon is running unexpectedly." + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +clvmd_check() +{ + config_verify "global/use_lvmetad" "0" + config_verify "global/use_lvmlockd" "0" + config_verify "global/locking_type" "3" + + # TODO: + # David asked a question: does lvchange -aey works well enough with clvmd? + # + # Corey said: I think it does work well enough. We do a fair amount of exclusive + # activation clvm testing, and my experience is you'll get the LV activated on + # the node you ran the command on. But, I think the specific scenario and issue + # that surprised us all was when the LV was *already* exclusively active on say + # nodeA, and nodeB then attempts to also exclusively activate it as well. Instead + # of failing, the activation succeeds even though nodeB activation didn't occur. + # This is documented in the following bug: + # https://bugzilla.redhat.com/show_bug.cgi?id=1191724#c8 + # Technically, you're not guaranteed to have it activated on the node you run + # the cmd on, but again, that's not been my experience. + # + # Eric: Put the interesting discussion here so that we can be more careful on this. + + # Good: clvmd is running, and lvmlockd is not running + if ! pgrep clvmd >/dev/null 2>&1 ; then + ocf_exit_reason "clvmd daemon is not running!" + exit $OCF_ERR_GENERIC + fi + + if pgrep lvmetad >/dev/null 2>&1 ; then + ocf_exit_reason "Please stop lvmetad daemon when clvmd is running." + exit $OCF_ERR_GENERIC + fi + + if pgrep lvmlockd >/dev/null 2>&1 ; then + ocf_exit_reason "lvmlockd daemon is running unexpectedly." + exit $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +systemid_check() +{ + # system_id_source is set in lvm.conf + source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2) + if [ "$source" = "" ] || [ "$source" = "none" ]; then + ocf_exit_reason "system_id_source in lvm.conf is not set correctly!" + exit $OCF_ERR_ARGS + fi + + if [ -z ${SYSTEM_ID} ]; then + ocf_exit_reason "local/system_id is not set!" + exit $OCF_ERR_ARGS + fi + + return $OCF_SUCCESS +} + +# Verify tags setup +tagging_check() +{ + # The volume_list must be initialized to something in order to + # guarantee our tag will be filtered on startup + if ! lvm dumpconfig activation/volume_list; then + ocf_log err "LVM: Improper setup detected" + ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd" + exit $OCF_ERR_ARGS + fi + + # Our tag must _NOT_ be in the volume_list. This agent + # overrides the volume_list during activation using the + # special tag reserved for cluster activation + if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then + ocf_log err "LVM: Improper setup detected" + ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}" + exit $OCF_ERR_ARGS + fi + + return $OCF_SUCCESS +} + +read_parameters() +{ + if [ -z "$VG" ] + then + ocf_exit_reason "You must identify the volume group name!" + exit $OCF_ERR_CONFIGURED + fi + + if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ] + then + ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode" + exit $OCF_ERR_CONFIGURED + fi + + # Convert VG_access_mode from string to index + case ${VG_access_mode} in + lvmlockd) + VG_access_mode_num=1 + ;; + clvmd) + VG_access_mode_num=2 + ;; + system_id) + VG_access_mode_num=3 + ;; + tagging) + VG_access_mode_num=4 + ;; + *) + # dont exit with error-code here or nodes will get fenced on + # e.g. "pcs resource create" + ocf_exit_reason "You specified an invalid value for vg_access_mode: $VG_access_mode" + ;; + esac +} + +lvm_validate() { + read_parameters + + check_binary pgrep + # Every LVM command is just symlink to lvm binary + check_binary lvm + check_binary dmsetup + + # This is necessary when using system ID to update lvm hints, + # or in older versions of lvm, this is necessary to update the + # lvmetad cache. + pvscan --cache + + if ! vgs --foreign ${VG} >/dev/null 2>&1 ; then + # stop action exits successfully if the VG cannot be accessed... + if [ $__OCF_ACTION = "stop" ]; then + ocf_log warn "VG [${VG}] cannot be accessed, stop action exits successfully." + exit $OCF_SUCCESS + fi + + if ocf_is_probe; then + ocf_log info "initial probe: VG [${VG}] is not found on any block device yet." + exit $OCF_NOT_RUNNING + fi + + # Could be a transient error (e.g., iSCSI connection + # issue) so use OCF_ERR_GENERIC + ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!" + exit $OCF_ERR_GENERIC + fi + + vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null) + + if [ $vg_missing_pv_count -gt 0 ]; then + ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs." + + # Setting new system ID will succeed if over half of PVs remain. + # Don't try to calculate here if a majority is present, + # but leave this up to the vgchange command to determine. + if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then + ocf_log warn "Attempting fail over with missing PVs (majority.)" + + # Setting new system ID will fail, and behavior is undefined for + # other access modes. + elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then + ocf_log warn "Attempting fail over with missing PVs (partial.)" + + else + ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true" + exit $OCF_ERR_GENERIC + fi + fi + + # Get the access mode from VG metadata and check if it matches the input + # value. Skip to check "tagging" mode because there's no reliable way to + # automatically check if "tagging" mode is being used. + get_VG_access_mode_num + mode=$? + if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then + ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!" + exit $OCF_ERR_CONFIGURED + fi + + # Nothing to do if the VG has no logical volume + lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null) + if [ $lv_count -lt 1 ]; then + ocf_exit_reason "Volume group [$VG] doesn't contain any logical volume!" + exit $OCF_ERR_CONFIGURED + fi + + # Check if the given $LV is in the $VG + if [ -n "$LV" ]; then + output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1) + if [ $? -ne 0 ]; then + ocf_log err "lvs: ${output}" + ocf_exit_reason "LV ($LV) is not in the given VG ($VG)." + exit $OCF_ERR_CONFIGURED + fi + fi + + # VG_access_mode_num specific checking goes here + case ${VG_access_mode_num} in + 1) + lvmlockd_check + ;; + 2) + clvmd_check + ;; + 3) + systemid_check + ;; + 4) + tagging_check + ;; + *) + ocf_exit_reason "Incorrect VG access mode detected!" + exit $OCF_ERR_CONFIGURED + esac + + if [ $? -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Improper configuration issue is detected!" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +# To activate LV(s) with different "activation mode" parameters +do_activate() { + do_activate_opt=$1 + + if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then + # This will allow a RAID LV to be activated if sufficient + # devices are available to allow the LV to be usable + do_activate_opt="${do_activate_opt} --activationmode degraded" + + elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then + # This will allow a mirror LV to be activated if any + # devices are missing, but the activated LV may not be + # usable, so it is not recommended. Also, other LV + # types without data redundancy will be activated + # when partial is set. + # RAID LVs and degraded_activation should be used instead. + do_activate_opt="${do_activate_opt} --partial" + fi + + # Only activate the specific LV if it's given + if [ -n "$LV" ]; then + ocf_run lvchange $do_activate_opt ${VG}/${LV} + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + else + ocf_run lvchange $do_activate_opt ${VG} + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + fi + + return $OCF_SUCCESS +} + +lvmlockd_activate() { + if [ "$LV_activation_mode" = "shared" ]; then + activate_opt="-asy" + else + activate_opt="-aey" + fi + + # lvmlockd requires shared VGs to be started before they're used + ocf_run vgchange --lockstart ${VG} + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Failed to start shared VG(s), exit code: $rc" + return $OCF_ERR_GENERIC + fi + + do_activate "$activate_opt" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# clvmd must be running to activate clustered VG +clvmd_activate() { + if [ "$LV_activation_mode" = "shared" ]; then + activate_opt="-asy" + else + activate_opt="-aey" + fi + + do_activate "$activate_opt" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +systemid_activate() { + majority_opt="" + set_autoactivation=0 + cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]') + + if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then + vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs" + fi + + # Put our system ID on the VG + vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \ + --systemid ${SYSTEM_ID} ${VG} + vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1 + + if [ $set_autoactivation -ne 0 ]; then + if vgs -o autoactivation ${VG} | grep enabled >/dev/null 2>&1 ; then + ocf_log info "disable the autoactivation property of ${VG}" + ocf_run vgchange --setautoactivation n ${VG} + fi + fi + + do_activate "-ay" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +tagging_activate() { + if ! set_tags ; then + ocf_exit_reason "Failed to set tags on ${VG}." + return $OCF_ERR_GENERIC + fi + + do_activate "-ay --config activation{volume_list=[\"@${OUR_TAG}\"]}" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +lvmlockd_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + OUT=$(lvs --noheadings -S lv_active=active ${VG} 2>/dev/null) + [ -n "$OUT" ] && return $OCF_SUCCESS + + # Close the lockspace of this VG if there is no active LV + ocf_run vgchange --lockstop ${VG} + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Failed to close the shared VG lockspace, exit code: $rc" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +clvmd_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +systemid_deactivate() { + do_activate "-an" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +tagging_deactivate() { + do_activate "-an --config activation{volume_list=[\"@${OUR_TAG}\"]}" + if [ $? -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + if ! strip_tags ; then + ocf_exit_reason "Failed to remove tags on ${VG}." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# TODO: +# How can we accurately check if LVs in the given VG are all active? +# +# David: +# If we wanted to check that all LVs in the VG are active, then we would +# probably need to use the lvs/lv_live_table command here since dmsetup +# won't know about inactive LVs that should be active. +# +# Eric: +# But, lvs/lv_live_table command doesn't work well now. I tried the following +# method: +# +# lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null | tr -d '[:blank:]') +# dm_count=$(dmsetup --noheadings info -c -S "vg_name=${VG}" 2>/dev/null | grep -c "${VG}-") +# test $lv_count -eq $dm_count +# +# It works, but we cannot afford to use LVM command in lvm_status. LVM command is expensive +# because it may potencially scan all disks on the system, update the metadata even using +# lvs/vgs when the metadata is somehow inconsistent. +# +# So, we have to make compromise that the VG is assumably active if any LV of the VG is active. +# +# Paul: +# VGS + LVS with "-" in their name get mangled with double dashes in dmsetup. +# Switching to wc and just counting lines while depending on the vgname + lvname filter +# in dmsetup gets around the issue with dmsetup reporting correctly but grep failing. +# +# Logic for both test cases and dmsetup calls changed so they match too. +# +# This is AllBad but there isn't a better way that I'm aware of yet. +lvm_status() { + if [ -n "${LV}" ]; then + # dmsetup ls? It cannot accept device name. It's + # too heavy to list all DM devices. + dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG} && lv_name=${LV}" | grep -c -v '^No devices found') + else + dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG}" | grep -c -v '^No devices found') + fi + + if [ $dm_count -eq 0 ]; then + if ocf_is_probe ;then + return $OCF_NOT_RUNNING + else + return $OCF_ERR_GENERIC + fi + fi + + case "$OCF_CHECK_LEVEL" in + 0) + ;; + 10) + # if there are many lv in vg dir, pick the first name + dm_name="/dev/${VG}/$(ls -1 /dev/${VG} | head -n 1)" + + # read 1 byte to check the dev is alive + dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \ + 2>&1 + if [ $? -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + return $OCF_SUCCESS + ;; + *) + ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" + return $OCF_ERR_CONFIGURED + ;; + esac +} + +lvm_start() { + if systemd_is_running ; then + # Create drop-in to deactivate VG before stopping + # storage services during shutdown/reboot. + systemctl show resource-agents-deps.target \ + --property=After | cut -d'=' -f2 | \ + grep -qE "(^|\s)blk-availability.service(\s|$)" + + if [ "$?" -ne 0 ]; then + systemd_drop_in "99-LVM-activate" "After" \ + "blk-availability.service" + fi + + # If blk-availability isn't started, the "After=" + # directive has no effect. + if ! systemctl is-active blk-availability.service ; then + systemctl start blk-availability.service + fi + fi + + if lvm_status ; then + ocf_log info "${vol}: is already active." + return $OCF_SUCCESS + fi + + [ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV} + ocf_log info "Activating ${vol}" + + case ${VG_access_mode_num} in + 1) + lvmlockd_activate + ;; + 2) + clvmd_activate + ;; + 3) + systemid_activate + ;; + 4) + tagging_activate + ;; + *) + ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!" + exit $OCF_ERR_CONFIGURED + ;; + esac + + rc=$? + if lvm_status ; then + ocf_log info "${vol}: activated successfully." + return $OCF_SUCCESS + else + ocf_exit_reason "${vol}: failed to activate." + return $rc + fi +} + +# Deactivate LVM volume(s) +lvm_stop() { + [ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV} + + if ! lvm_status ; then + ocf_log info "${vol}: has already been deactivated." + return $OCF_SUCCESS + fi + + ocf_log info "Deactivating ${vol}" + + case ${VG_access_mode_num} in + 1) + lvmlockd_deactivate + ;; + 2) + clvmd_deactivate + ;; + 3) + systemid_deactivate + ;; + 4) + tagging_deactivate + ;; + *) + ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!" + exit $OCF_SUCCESS + ;; + esac + + if ! lvm_status ; then + ocf_log info "${vol}: deactivated successfully." + return $OCF_SUCCESS + else + ocf_exit_reason "${vol}: failed to deactivate." + return $OCF_ERR_GENERIC + fi +} + +# +# MAIN +# + +case $__OCF_ACTION in +start) + lvm_validate + lvm_start + ;; +stop) + read_parameters + lvm_stop + ;; +monitor) + lvm_status + ;; +validate-all) + lvm_validate + ;; +meta-data) + meta_data + ;; +usage|help) + usage + ;; +*) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc diff --git a/heartbeat/LinuxSCSI b/heartbeat/LinuxSCSI new file mode 100755 index 0000000..e9038cd --- /dev/null +++ b/heartbeat/LinuxSCSI @@ -0,0 +1,322 @@ +#!/bin/sh +# +# +# LinuxSCSI +# +# Description: Enables/Disables SCSI devices to protect them from being +# used by mistake +# +# +# Author: Alan Robertson +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: (C) 2002 - 2005 IBM +# +# CAVEATS: See the usage message for some important warnings +# +# usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods) +# +# OCF parameters are as below: +# OCF_RESKEY_scsi +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 LinuxSCSI:0:0:11 +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_scsi_default="" +OCF_RESKEY_ignore_deprecation_default="false" + +: ${OCF_RESKEY_scsi=${OCF_RESKEY_scsi_default}} +: ${OCF_RESKEY_ignore_deprecation=${OCF_RESKEY_ignore_deprecation_default}} + +####################################################################### + +zeropat="[ 0]0" + +PROCSCSI=/proc/scsi/scsi + +usage() { + cat < + + +1.0 + + +Deprecation warning: This agent makes use of Linux SCSI hot-plug +functionality which has been superseded by SCSI reservations. It is +deprecated and may be removed from a future release. See the +scsi2reservation and sfex agents for alternatives. -- +This is a resource agent for LinuxSCSI. It manages the availability of a +SCSI device from the point of view of the linux kernel. It make Linux +believe the device has gone away, and it can make it come back again. + +Enables and disables SCSI devices through the +kernel SCSI hot-plug subsystem (deprecated) + + + + +The SCSI instance to be managed. + +SCSI instance + + + + + +If set to true, suppresses the deprecation warning for this agent. + +Suppress deprecation warning + + + + + + + + + + + + + + + +EOF +} + +scsi_methods() { + cat <>$PROCSCSI + echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI + if + scsi_status "$1" + then + return $OCF_SUCCESS + else + ocf_exit_reason "SCSI device $1 not active!" + return $OCF_ERR_GENERIC + fi +} + + +# +# stop: Disable the given SCSI device in the kernel +# +scsi_stop() { + parseinst "$1" +# [ $target = error ] && exit 1 + echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI + if + scsi_status "$1" + then + ocf_exit_resaon "SCSI device $1 still active!" + return $OCF_ERR_GENERIC + else + return $OCF_SUCCESS + fi +} + + +# +# status: is the given device now available? +# +scsi_status() { + parseinst "$1" +# [ $target = error ] && exit 1 + [ $channel -eq 0 ] && channel=$zeropat + [ $target -eq 0 ] && target=$zeropat + [ $lun -eq 0 ] && lun=$zeropat + greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun" + grep -i "$greppat" $PROCSCSI >/dev/null + if [ $? -eq 0 ]; then + return $OCF_SUCCESS + else + return $OCF_NOT_RUNNING + fi +} + +# +# validate_all: Check the OCF instance parameters +# +scsi_validate_all() { + parseinst $instance + return $OCF_SUCCESS +} + +if + ( [ $# -ne 1 ] ) +then + ocf_exit_reason "Parameter number error." + usage + exit $OCF_ERR_GENERIC +fi + +#if +# [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ] +#then +# scsi_methods +# exit #? +#fi +case $1 in + methods) scsi_methods + exit $OCF_SUCCESS + ;; + meta-data) meta_data + exit $OCF_SUCCESS + ;; + usage) usage + exit $OCF_SUCCESS + ;; + *) ;; +esac + +# Be obnoxious, log deprecation warning on every invocation (unless +# suppressed by resource configuration). +ocf_deprecated + +if + [ -z "$OCF_RESKEY_scsi" ] +then + ocf_exit_reason "You have to set a valid scsi id at least!" +# usage + exit $OCF_ERR_GENERIC +fi + +instance=$OCF_RESKEY_scsi + +case $1 in + start) scsi_start $instance + ;; + stop) scsi_stop $instance + ;; + status|monitor) + if + scsi_status $instance + then + ocf_log info "SCSI device $instance is running" + return $OCF_SUCCESS + else + ocf_log info "SCSI device $instance is stopped" + exit $OCF_NOT_RUNNING + fi + ;; + validate-all) scsi_validate_all + ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $? diff --git a/heartbeat/MailTo b/heartbeat/MailTo new file mode 100755 index 0000000..56940ba --- /dev/null +++ b/heartbeat/MailTo @@ -0,0 +1,199 @@ +#!/bin/sh +# +# Resource script for MailTo +# +# Author: Alan Robertson +# +# Description: sends email to a sysadmin whenever a takeover occurs. +# +# Note: This command requires an argument, unlike normal init scripts. +# +# This can be given in the haresources file as: +# +# You can also give a mail subject line or even multiple addresses +# MailTo::alanr@unix.sh::BigImportantWebServer +# MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer +# +# This will then be put into the message subject and body. +# +# OCF parameters are as below: +# OCF_RESKEY_email +# OCF_RESKEY_subject +# +# License: GNU General Public License (GPL) +# +# Copyright: (C) 2005 International Business Machines + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_email_default="" +OCF_RESKEY_subject_default="Resource Group" + +: ${OCF_RESKEY_email=${OCF_RESKEY_email_default}} +: ${OCF_RESKEY_subject=${OCF_RESKEY_subject_default}} + +####################################################################### + +ARGS="$0 $*" + +us=`uname -n` + +usage() { + echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}" +} + +meta_data() { + cat < + + +1.0 + + +This is a resource agent for MailTo. It sends email to a sysadmin whenever +a takeover occurs. + +Notifies recipients by email in the event of resource takeover + + + + +The email address of sysadmin. + +Email address + + + + + +The subject of the email. + +Subject + + + + + + + + + + + + + +END +} + +MailProgram() { + $MAILCMD -s "$1" "$email" </dev/null \ + | xsltproc $(top_srcdir)/make/extract_text.xsl - \ + | aspell pipe list -d en_US --ignore-case \ + --home-dir=$(top_srcdir)/make -p spellcheck-ignore \ + | sed -n 's|^&\([^:]*\):.*|\1|p'; +spellcheck: + @$(foreach agent,$(ocf_SCRIPTS), $(do_spellcheck)) + +clean-local: + rm -rf __pycache__ *.pyc diff --git a/heartbeat/ManageRAID.in b/heartbeat/ManageRAID.in new file mode 100644 index 0000000..bf5c745 --- /dev/null +++ b/heartbeat/ManageRAID.in @@ -0,0 +1,391 @@ +#!@BASH_SHELL@ +# +# Name ManageRAID +# Author Matthias Dahl, m.dahl@designassembly.de +# License GPL version 2 +# +# (c) 2006 The Design Assembly GmbH. +# +# +# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +# +# This resource agent is most likely function complete but not error free. Please +# consider it BETA quality for the moment until it has proven itself stable... +# +# USE AT YOUR OWN RISK. +# +# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +# +# +# partly based on/inspired by original Heartbeat2 OCF resource agents +# +# Description +# +# Manages starting, mounting, unmounting, stopping and monitoring of RAID devices +# which are preconfigured in /etc/conf.d/HB-ManageRAID. +# +# +# Created 11. Sep 2006 +# Updated 18. Sep 2006 +# +# rev. 1.00.2 +# +# Changelog +# +# 18/Sep/06 1.00.1 more cleanup +# 12/Sep/06 1.00.1 add more functionality +# add sanity check for config parameters +# general cleanup all over the place +# 11/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) +# +# +# TODO +# +# - check if at least one disk out of PREFIX_LOCALDISKS is still active +# in RAID otherwise consider RAID broken and stop it. +# +# The reason behind this: consider a RAID-1 which contains iSCSI devices +# shared over Ethernet which get dynamically added/removed to/from the RAID. +# Once all local disks have failed and only those iSCSI disks remain, the RAID +# should really stop to prevent bad performance and possible data loss. +# + +### +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_raidname_default="" + +: ${OCF_RESKEY_raidname=${OCF_RESKEY_raidname_default}} + +### + +# required utilities + +# required files/devices +RAID_MDSTAT=/proc/mdstat + +# +# check_file() +# +check_file () +{ + if [[ ! -e $1 ]]; then + ocf_log err "setup problem: file $1 does not exist." + exit $OCF_ERR_GENERIC + fi +} + +# +# usage() +# +usage() +{ + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} + EOT +} + +# +# meta_data() +# +meta_data() +{ + cat < + + + 1.0 + + + Manages starting, stopping and monitoring of RAID devices which + are preconfigured in /etc/conf.d/HB-ManageRAID. + + + Manages RAID devices + + + + + Name (case sensitive) of RAID to manage. (preconfigured in /etc/conf.d/HB-ManageRAID) + + RAID name + + + + + + + + + + + + + +END +} + +# +# start_raid() +# +start_raid() +{ + declare -i retcode + + status_raid + retcode=$? + if [[ $retcode == $OCF_SUCCESS ]]; then + return $OCF_SUCCESS + elif [[ $retcode != $OCF_NOT_RUNNING ]]; then + return $retcode + fi + + for ldev in "${RAID_LOCALDISKS[@]}"; do + if [[ ! -b $ldev ]]; then + ocf_log err "$ldev is not a (local) block device." + return $OCF_ERR_ARGS + fi + done + + $MDADM -A $RAID_DEVPATH -a yes -u ${!RAID_UUID} "${RAID_LOCALDISKS[@]}" &> /dev/null + if [[ $? != 0 ]]; then + ocf_log err "starting ${!RAID_DEV} with ${RAID_LOCALDISKS[*]} failed." + return $OCF_ERR_GENERIC + fi + + $MOUNT -o ${!RAID_MOUNTOPTIONS} $RAID_DEVPATH ${!RAID_MOUNTPOINT} &> /dev/null + if [[ $? != 0 ]]; then + $MDADM -S $RAID_DEVPATH &> /dev/null + + if [[ $? != 0 ]]; then + ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed as well as stopping the RAID itself." + else + ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed. RAID stopped again." + fi + + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# +# stop_raid() +# +stop_raid() +{ + status_raid + if [[ $? == $OCF_NOT_RUNNING ]]; then + return $OCF_SUCCESS + fi + + $UMOUNT ${!RAID_MOUNTPOINT} &> /dev/null + if [[ $? != 0 ]]; then + ocf_log err "unmounting ${!RAID_MOUNTPOINT} failed. not stopping ${!RAID_DEV}!" + return $OCF_ERR_GENERIC + fi + + $MDADM -S $RAID_DEVPATH &> /dev/null + if [[ $? != 0 ]]; then + ocf_log err "stopping RAID ${!RAID_DEV} failed." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# +# status_raid() +# +status_raid() +{ + declare -i retcode_raidcheck + declare -i retcode_uuidcheck + + $CAT $RAID_MDSTAT | $GREP -e "${!RAID_DEV}[\ ]*:[\ ]*active" &> /dev/null + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + fi + + if [ ! -e $RAID_DEVPATH ]; then + return $OCF_ERR_GENERIC + fi + + $MDADM --detail -t $RAID_DEVPATH &> /dev/null + retcode_raidcheck=$? + $MDADM --detail -t $RAID_DEVPATH | $GREP -qEe "^[\ ]*UUID[\ ]*:[\ ]*${!RAID_UUID}" &> /dev/null + retcode_uuidcheck=$? + + if [ $retcode_raidcheck -gt 3 ]; then + ocf_log err "mdadm returned error code $retcode_raidcheck while checking ${!RAID_DEV}." + return $OCF_ERR_GENERIC + elif [ $retcode_raidcheck -eq 3 ]; then + ocf_log err "${!RAID_DEV} has failed." + return $OCF_ERR_GENERIC + elif [ $retcode_raidcheck -lt 3 ] && [ $retcode_uuidcheck != 0 ]; then + ocf_log err "active RAID ${!RAID_DEV} and configured UUID (!$RAID_UUID) do not match." + return $OCF_ERR_GENERIC + fi + + $MOUNT | $GREP -e "$RAID_DEVPATH on ${!RAID_MOUNTPOINT}" &> /dev/null + if [[ $? != 0 ]]; then + ocf_log err "${!RAID_DEV} seems to be no longer mounted at ${!RAID_MOUNTPOINT}" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +# +# validate_all_raid() +# +validate_all_raid() +{ + # + # since all parameters are checked every time ManageRAID is + # invoked, there not much more to check... + # + # status_raid should cover the rest. + # + declare -i retcode + + status_ve + retcode=$? + + if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then + return $retcode + fi + + return $OCF_SUCCESS +} + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; + *) + ;; +esac + +## required configuration +# +[ -f /etc/conf.d/HB-ManageRAID ] || { + ocf_log err "/etc/conf.d/HB-ManageRAID missing" + exit $OCF_ERR_INSTALLED +} +. /etc/conf.d/HB-ManageRAID +# +## + +# +# check relevant environment variables for sanity and security +# + +declare -i retcode_test +declare -i retcode_grep + +$TEST -z "$OCF_RESKEY_raidname" +retcode_test=$? +echo "$OCF_RESKEY_raidname" | $GREP -qEe "^[[:alnum:]\_]+$" +retcode_grep=$? + +if [[ $retcode_test != 1 || $retcode_grep != 0 ]]; then + ocf_log err "OCF_RESKEY_raidname not set or invalid." + exit $OCF_ERR_ARGS +fi + +RAID_UUID=${OCF_RESKEY_raidname}_UUID + +echo ${!RAID_UUID} | $GREP -qEe "^[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}$" +if [[ $? != 0 ]]; then + ocf_log err "${OCF_RESKEY_raidname}_UUID is invalid." + exit $OCF_ERR_ARGS +fi + +RAID_DEV=${OCF_RESKEY_raidname}_DEV + +echo ${!RAID_DEV} | $GREP -qEe "^md[0-9]+$" +if [[ $? != 0 ]]; then + ocf_log err "${OCF_RESKEY_raidname}_DEV is invalid." + exit $OCF_ERR_ARGS +fi + +RAID_DEVPATH=/dev/${!RAID_DEV/md/md\/} +RAID_MOUNTPOINT=${OCF_RESKEY_raidname}_MOUNTPOINT + +echo ${!RAID_MOUNTPOINT} | $GREP -qEe "^[[:alnum:]\/\_\"\ ]+$" +if [[ $? != 0 ]]; then + ocf_log err "${OCF_RESKEY_raidname}_MOUNTPOINT is invalid." + exit $OCF_ERR_ARGS +fi + +RAID_MOUNTOPTIONS=${OCF_RESKEY_raidname}_MOUNTOPTIONS + +echo ${!RAID_MOUNTOPTIONS} | $GREP -qEe "^[[:alpha:]\,]+$" +if [[ $? != 0 ]]; then + ocf_log err "${OCF_RESKEY_raidname}_MOUNTOPTIONS is invalid." + exit $OCF_ERR_ARGS +fi + +RAID_LOCALDISKS=${OCF_RESKEY_raidname}_LOCALDISKS[@] +RAID_LOCALDISKS=( "${!RAID_LOCALDISKS}" ) + +if [ ${#RAID_LOCALDISKS[@]} -lt 1 ]; then + ocf_log err "you have to specify at least one local disk." + exit $OCF_ERR_ARGS +fi + +# +# check that all relevant utilities are available +# +check_binary $MDADM +check_binary $MOUNT +check_binary $UMOUNT +check_binary $GREP +check_binary $CAT +check_binary $TEST +check_binary echo + + +# +# check that all relevant devices are available +# +check_file $RAID_MDSTAT + +# +# finally... let's see what we are ordered to do :-) +# +case "$1" in + start) + start_raid + ;; + stop) + stop_raid + ;; + status|monitor) + status_raid + ;; + validate-all) + validate_all_raid + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? + diff --git a/heartbeat/ManageVE.in b/heartbeat/ManageVE.in new file mode 100644 index 0000000..f07ca5b --- /dev/null +++ b/heartbeat/ManageVE.in @@ -0,0 +1,320 @@ +#!@BASH_SHELL@ +# +# ManageVE OCF RA. Manages OpenVZ Virtual Environments (VEs) +# +# (c) 2006-2010 Matthias Dahl, Florian Haas, +# and Linux-HA contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# +# This OCF compliant resource agent manages OpenVZ VEs and thus requires +# a proper OpenVZ installation including a recent vzctl util. +# +# rev. 1.00.4 +# +# Changelog +# +# 21/Oct/10 1.00.4 implement migrate_from/migrate_to +# 12/Sep/06 1.00.3 more cleanup +# 12/Sep/06 1.00.2 fixed some logic in start_ve +# general cleanup all over the place +# 11/Sep/06 1.00.1 fixed some typos +# 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) +# + +### +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_veid_default="" + +: ${OCF_RESKEY_veid=${OCF_RESKEY_veid_default}} + +### + +# required utilities +VZCTL=/usr/sbin/vzctl + +# +# usage() +# +usage() +{ + cat <<-EOF + usage: $0 {start|stop|status|monitor|migrate_from|migrate_to|validate-all|usage|meta-data} + EOF +} + +# +# meta_data() +# +meta_data() +{ + cat < + + + 1.0 + + + This OCF compliant resource agent manages OpenVZ VEs and thus requires + a proper OpenVZ installation including a recent vzctl util. + + + Manages an OpenVZ Virtual Environment (VE) + + + + + OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs) + + OpenVZ ID of VE + + + + + + + + + + + + + + + +END +} + +# +# start_ve() +# +# Starts a VE, or simply logs a message if the VE is already running. +# +start_ve() +{ + if status_ve; then + ocf_log info "VE $VEID already running." + return $OCF_SUCCESS + fi + + ocf_run $VZCTL start $VEID || exit $OCF_ERR_GENERIC + + return $OCF_SUCCESS +} + +# +# stop_ve() +# +# ATTENTION: The following code relies on vzctl's exit codes, especially: +# +# 0 : success +# +# In case any of those exit codes change, this function will need fixing. +# +stop_ve() +{ + status_ve + if [ $? -eq $OCF_NOT_RUNNING ]; then + ocf_log info "VE $VEID already stopped." + return $OCF_SUCCESS + fi + + ocf_run $VZCTL stop $VEID || exit $OCF_ERR_GENERIC + + return $OCF_SUCCESS +} + +# +# migrate_to_ve() +# +# In the process of a resource migration, checkpoints the VE. For this +# to work, vzctl must obviously create the dump file in a place which +# the migration target has access to (an NFS mount, a DRBD device, +# etc.). +# +migrate_to_ve() +{ + if ! status_ve; then + ocf_log err "VE $VEID is not running, aborting" + exit $OCF_ERR_GENERIC + fi + ocf_run $VZCTL chkpnt $VEID || exit $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +# +# migrate_to_ve() +# +# In the process of a resource migration, restores the VE. For this to +# work, vzctl must obviously have access to the dump file which was +# created on the migration source (on an NFS mount, a DRBD device, +# etc.). +# +migrate_from_ve() +{ + ocf_run $VZCTL restore $VEID || exit $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +# +# status_ve() +# +# ATTENTION: The following code relies on vzctl's status output. The fifth +# column is interpreted as the VE status (either up or down). +# +# In case the output format should change, this function will need fixing. +# +status_ve() +{ + declare -i retcode + + veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'` + vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'` + retcode=$? + + if [[ $retcode != 0 ]]; then + # log error only if expected to find running + if [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe; then + ocf_log err "vzctl status $VEID returned: $retcode" + fi + exit $OCF_ERR_GENERIC + fi + + if [[ $veexists != "exist" ]]; then + ocf_log err "vzctl status $VEID returned: $VEID does not exist." + return $OCF_NOT_RUNNING + fi + + case "$vestatus" in + running) + return $OCF_SUCCESS + ;; + down) + return $OCF_NOT_RUNNING + ;; + *) + ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)" + exit $OCF_ERR_GENERIC + ;; + esac +} + +# +# validate_all_ve() +# +# ATTENTION: The following code relies on vzctl's status output. The fifth +# column is interpreted as the VE status (either up or down). +# +# In case the output format should change, this function will need fixing. +# +validate_all_ve() +{ + declare -i retcode + + # VEID should be a valid VE + `status_ve` + retcode=$? + + if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then + return $retcode + fi + + return $OCF_SUCCESS +} + + +if [[ $# != 1 ]]; then + usage + exit $OCF_ERR_ARGS +fi + +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; + *) + ;; +esac + +# +# check relevant environment variables for sanity and security +# + +# empty string? +`test -z "$OCF_RESKEY_veid"` + +declare -i veidtest1=$? + +# really a number? +`echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'` + +if [[ $veidtest1 != 1 || $? != 0 ]]; then + ocf_log err "OCF_RESKEY_veid not set or not a number." + exit $OCF_ERR_ARGS +fi + +declare -i VEID=$OCF_RESKEY_veid + +# +# check that all relevant utilities are available +# +check_binary $VZCTL +check_binary $AWK + +# +# finally... let's see what we are ordered to do :-) +# +case "$1" in + start) + start_ve + ;; + stop) + stop_ve + ;; + status|monitor) + status_ve + ;; + migrate_to) + migrate_to_ve + ;; + migrate_from) + migrate_from_ve + ;; + validate-all) + validate_all_ve + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? + diff --git a/heartbeat/NodeUtilization b/heartbeat/NodeUtilization new file mode 100755 index 0000000..f98ab13 --- /dev/null +++ b/heartbeat/NodeUtilization @@ -0,0 +1,237 @@ +#!/bin/sh +# +# +# NodeUtilization OCF Resource Agent +# +# Copyright (c) 2011 SUSE LINUX, John Shi +# Copyright (c) 2016 SUSE LINUX, Kristoffer Gronlund +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_VARRUN/NodeUtilization-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_dynamic_default="true" +OCF_RESKEY_utilization_cpu_default="true" +OCF_RESKEY_utilization_cpu_reservation_default="1" +OCF_RESKEY_utilization_host_memory_default="true" +OCF_RESKEY_utilization_host_memory_reservation_default="512" +OCF_RESKEY_utilization_hv_memory_default="true" +OCF_RESKEY_utilization_hv_memory_reservation_default="512" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_dynamic=${OCF_RESKEY_dynamic_default}} +: ${OCF_RESKEY_utilization_cpu=${OCF_RESKEY_utilization_cpu_default}} +: ${OCF_RESKEY_utilization_cpu_reservation=${OCF_RESKEY_utilization_cpu_reservation_default}} +: ${OCF_RESKEY_utilization_host_memory=${OCF_RESKEY_utilization_host_memory_default}} +: ${OCF_RESKEY_utilization_host_memory_reservation=${OCF_RESKEY_utilization_host_memory_reservation_default}} +: ${OCF_RESKEY_utilization_hv_memory=${OCF_RESKEY_utilization_hv_memory_default}} +: ${OCF_RESKEY_utilization_hv_memory_reservation=${OCF_RESKEY_utilization_hv_memory_reservation_default}} + +####################################################################### + +NodeUtilization_meta_data() { + cat < + + +1.0 + + +The Node Utilization agent detects system parameters like available CPU, host +memory and hypervisor memory availability, and adds them into the CIB for each +node using crm_attribute. Run the agent as a clone resource to have it populate +these parameters on each node. +Note: Setting hv_memory only works with Xen at the moment, using the xl or xm +command line tools. + +Node Utilization + + + + +If set, parameters will be updated if there are differences between the HA +parameters and the system values when running the monitor action. +If not set, the parameters will be set once when the resource instance starts. + +Dynamically update parameters in monitor + + + + +Enable setting node CPU utilization limit. +Set node CPU utilization limit. + + + + +Subtract this value when setting the CPU utilization parameter. +CPU reservation. + + + + +Enable setting available host memory. +Set available host memory. + + + + +Subtract this value when setting host memory utilization, in MB. +Host memory reservation, in MB. + + + + +Enable setting available hypervisor memory. +Set available hypervisor memory. + + + + +Subtract this value when setting hypervisor memory utilization, in MB. +Hypervisor memory reservation, in MB. + + + + + + + + + + + + +END +} + +Host_Total_Memory() { + local xentool + + xentool=$(which xl 2> /dev/null || which xm 2> /dev/null) + + if [ -x "$xentool" ]; then + "$xentool" info | awk '/total_memory/{printf("%d\n",$3);exit(0)}' + else + ocf_log debug "Can only set hv_memory for Xen hypervisor" + echo "0" + fi +} + + +set_utilization() { + host_name="$(ocf_local_nodename)" + + if ocf_is_true "$OCF_RESKEY_utilization_cpu"; then + sys_cpu=$(( $(grep -c processor /proc/cpuinfo) - $OCF_RESKEY_utilization_cpu_reservation )) + uti_cpu=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n cpu 2>/dev/null) + + if [ "$sys_cpu" != "$uti_cpu" ]; then + if ! crm_attribute -t nodes --node "$host_name" -z -n cpu -v $sys_cpu; then + ocf_log err "Failed to set the cpu utilization attribute for $host_name using crm_attribute." + return 1 + fi + fi + fi + + if ocf_is_true "$OCF_RESKEY_utilization_host_memory"; then + sys_mem=$(( $(awk '/MemTotal/{printf("%d\n",$2/1024);exit(0)}' /proc/meminfo) - $OCF_RESKEY_utilization_host_memory_reservation )) + uti_mem=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n host_memory 2>/dev/null) + + if [ "$sys_mem" != "$uti_mem" ]; then + if ! crm_attribute -t nodes --node "$host_name" -z -n host_memory -v $sys_mem; then + ocf_log err "Failed to set the host_memory utilization attribute for $host_name using crm_attribute." + return 1 + fi + fi + fi + + if ocf_is_true "$OCF_RESKEY_utilization_hv_memory"; then + hv_mem=$(( $(Host_Total_Memory) - OCF_RESKEY_utilization_hv_memory_reservation )) + uti_mem=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n hv_memory 2>/dev/null) + + [ $hv_mem -lt 0 ] && hv_mem=0 + + if [ "$hv_mem" != "$uti_mem" ]; then + if ! crm_attribute -t nodes --node "$host_name" -z -n hv_memory -v $hv_mem; then + ocf_log err "Failed to set the hv_memory utilization attribute for $host_name using crm_attribute." + return 1 + fi + fi + fi +} + +NodeUtilization_usage() { + cat < : Pure-FTPd script +# Author: Raoul Bhatia : Minor Cleanup. Added Debian GNU/Linux Support +# License: GNU General Public License (GPL) +# +# +# usage: $0 {start|stop|status|monitor|validate-all|meta-data} +# +# The "start" arg starts Pure-FTPd. +# +# The "stop" arg stops it. +# +# OCF parameters: +# OCF_RESKEY_script +# OCF_RESKEY_conffile +# OCF_RESKEY_daemon_type +# OCF_RESKEY_pidfile +# +########################################################################## +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_script_default="/sbin/pure-config.pl" +OCF_RESKEY_conffile_default="/etc/pure-ftpd/pure-ftpd.conf" +OCF_RESKEY_daemon_type_default="" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/pure-ftpd-${OCF_RESOURCE_INSTANCE}.pid" + +: ${OCF_RESKEY_script=${OCF_RESKEY_script_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_daemon_type=${OCF_RESKEY_daemon_type_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} + +script_basename=`basename $OCF_RESKEY_script` + +USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + +########################################################################## + +usage() { + echo $USAGE >&2 +} + +meta_data() { + cat < + + +1.0 + +This script manages Pure-FTPd in an Active-Passive setup + +Manages a Pure-FTPd FTP server instance + + + + + +The full path to the Pure-FTPd startup script. +For example, "/sbin/pure-config.pl" + +Script name with full path + + + + + +The Pure-FTPd configuration file name with full path. +For example, "/etc/pure-ftpd/pure-ftpd.conf" + +Configuration file name with full path + + + + + +The Pure-FTPd daemon to be called by pure-ftpd-wrapper. +Valid options are "" for pure-ftpd, "mysql" for pure-ftpd-mysql, +"postgresql" for pure-ftpd-postgresql and "ldap" for pure-ftpd-ldap + +Configuration file name with full path + + + + +PID file +PID file + + + + + + + + + + + + + +END + exit $OCF_SUCCESS +} + +isRunning() +{ + kill -s 0 "$1" > /dev/null +} + +PureFTPd_status() +{ + if [ -f $OCF_RESKEY_pidfile ] + then + # Pure-FTPd is probably running + PID=`head -n 1 $OCF_RESKEY_pidfile` + if [ ! -z $PID ] ; then + isRunning "$PID" && [ `ps -p $PID | grep pure-ftpd | wc -l` -eq 1 ] + return $? + fi + fi + + # Pure-FTPd is not running + false +} + +PureFTPd_start() +{ + local pid_dir + + # + # make a few checks and start Pure-FTPd + # + if ocf_is_root ; then : ; else + ocf_log err "You must be root." + exit $OCF_ERR_PERM + fi + + # if Pure-FTPd is running return success + + if PureFTPd_status ; then + exit $OCF_SUCCESS + fi + + # check that the Pure-FTPd script exists and can be executed + if [ ! -x "$OCF_RESKEY_script" ]; then + ocf_log err "Pure-FTPd script '$OCF_RESKEY_script' does not exist or cannot be executed" + exit $OCF_ERR_GENERIC + fi + + # make sure that the pid directory exists + pid_dir=`dirname $OCF_RESKEY_pidfile` + if [ ! -d $pid_dir ] ; then + ocf_log info "Creating PID directory '$pid_dir'." + mkdir -p $pid_dir + fi + + # test for pure-ftpd-wrapper (e.g. Debian GNU/Linux Systems) + if [ "$script_basename" = "pure-ftpd-wrapper" ]; then + # pure-ftpd-wrapper expects STANDALONE_OR_INETD to be set to standalone + STANDALONE_OR_INETD=standalone $OCF_RESKEY_script $OCF_RESKEY_daemon_type + else + # check that the Pure-FTPd config file exist + if [ ! -f "$OCF_RESKEY_conffile" ]; then + ocf_log err "Pure_FTPd config file '$OCF_RESKEY_conffile' does not exist" + exit $OCF_ERR_GENERIC + fi + + $OCF_RESKEY_script $OCF_RESKEY_conffile -g $OCF_RESKEY_pidfile + fi + + if [ $? -ne 0 ]; then + ocf_log info "Pure-FTPd returned error" $? + exit $OCF_ERR_GENERIC + fi + + exit $OCF_SUCCESS +} + + +PureFTPd_stop() +{ + if PureFTPd_status ; then + PID=`head -n 1 $OCF_RESKEY_pidfile` + if [ ! -z $PID ] ; then + kill $PID + fi + fi + + exit $OCF_SUCCESS +} + +PureFTPd_monitor() +{ + if PureFTPd_status ; then + return $OCF_SUCCESS + fi + + return $OCF_NOT_RUNNING +} + +PureFTPd_validate_all() +{ + return $OCF_SUCCESS +} + +# +# Main +# + +if [ $# -ne 1 ] +then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + start) PureFTPd_start + ;; + + stop) PureFTPd_stop + ;; + + status) if PureFTPd_status + then + ocf_log info "Pure-FTPd is running" + exit $OCF_SUCCESS + else + ocf_log info "Pure-FTPd is stopped" + exit $OCF_NOT_RUNNING + fi + ;; + + monitor) PureFTPd_monitor + exit $? + ;; + + validate-all) PureFTPd_validate_all + exit $? + ;; + + meta-data) meta_data + ;; + + usage) usage + exit $OCF_SUCCESS + ;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/heartbeat/README b/heartbeat/README new file mode 100644 index 0000000..6042956 --- /dev/null +++ b/heartbeat/README @@ -0,0 +1,46 @@ +The OCF RA shared code directory + +If an RA is too big to be comfortably maintained, split it into +several source files. Obviosuly, if two or more RAs share some +code, move that code out to a file which can be shared. + +These files will be installed in $OCF_ROOT/lib/heartbeat with +permissions 644. + +Naming practice + +Use names such as .sh or -check.sh or anything-else.sh +where "anything-else" should be related to both the RA and the +code it contains. By adding extension (.sh) it is going to be +easier to notice that these files are not complete resource +agents. + +For instance, oracle and oralsnr RA can both use code in +ora-common.sh. + +Of course, if the RA is implemented in another programming +language, use the appropriate extension. + +RA tracing + +RA tracing may be turned on by setting OCF_TRACE_RA. The trace +output will be saved to OCF_TRACE_FILE, if set. If not, +then the trace would be saved to the OCF_RESKEY_trace_dir. +If it's also not defined, the log will be saved by default to + + $HA_VARLIB/trace_ra//.. + +e.g. $HA_VARLIB/trace_ra/oracle/db.start.2012-11-27.08:37:08 + +HA_VARLIB is typically set to /var/lib/heartbeat. + +OCF_TRACE_FILE can be set to a path or file descriptor: + +- FD (small integer [3-9]) in that case it is up to the callers + to capture output; the FD _must_ be open for writing + +- absolute path + +NB: FD 9 may be used for tracing with bash >= v4 in case +OCF_TRACE_FILE is set to a path. + diff --git a/heartbeat/README.galera b/heartbeat/README.galera new file mode 100644 index 0000000..dd45618 --- /dev/null +++ b/heartbeat/README.galera @@ -0,0 +1,148 @@ +Notes regarding the Galera resource agent +--- + +In the resource agent, the action of bootstrapping a Galera cluster is +implemented into a series of small steps, by using: + + * Two CIB attributes `last-committed` and `bootstrap` to elect a + bootstrap node that will restart the cluster. + + * One CIB attribute `sync-needed` that will identify that joining + nodes are in the process of synchronizing their local database + via SST. + + * A Master/Slave pacemaker resource which helps splitting the boot + into steps, up to a point where a galera node is available. + + * the recurring monitor action to coordinate switch from one + state to another. + +How boot works +==== + +There are two things to know to understand how the resource agent +restart a Galera cluster. + +### Bootstrap the cluster with the right node + +When synced, the nodes of a galera cluster have in common a last seqno, +which identifies the last transaction considered successful by a +majority of nodes in the cluster (think quorum). + +To restart a cluster, the resource agent must ensure that it will +bootstrap the cluster from an node which is up-to-date, i.e which has +the highest seqno of all nodes. + +As a result, if the resource agent cannot retrieve the seqno on all +nodes, it won't be able to safely identify a bootstrap node, and +will simply refuse to start the galera cluster. + +### synchronizing nodes can be a long operation + +Starting a bootstrap node is relatively fast, so it's performed +during the "promote" operation, which is a one-off, time-bounded +operation. + +Subsequent nodes will need to synchronize via SST, which consists +in "pushing" an entire Galera DB from one node to another. + +There is no perfect time-out, as time spent during synchronization +depends on the size of the DB. Thus, joiner nodes are started during +the "monitor" operation, which is a recurring operation that can +better track the progress of the SST. + + +State flow +==== + +General idea for starting Galera: + + * Before starting the Galera cluster each node needs to go in Slave + state so that the agent records its last seqno into the CIB. + __ This uses attribute last-committed __ + + * When all node went in Slave, the agent can safely determine the + last seqno and elect a bootstrap node (`detect_first_master()`). + __ This uses attribute bootstrap __ + + * The agent then sets the score of the elected bootstrap node to + Master so that pacemaker promote it and start the first Galera + server. + + * Once the first Master is running, the agent can start joiner + nodes during the "monitor" operation, and starts monitoring + their SST sync. + __ This uses attribute sync-needed __ + + * Only when SST is over on joiner nodes, the agent promotes them + to Master. At this point, the entire Galera cluster is up. + + +Attribute usage and liveness +==== + +Here is how attributes are created on a per-node basis. If you +modify the resource agent make sure those properties still hold. + +### last-committed + +It is just a temporary hint for the resource agent to help +elect a bootstrap node. Once the bootstrap attribute is set on one +of the nodes, we can get rid of last-committed. + + - Used : during Slave state to compare seqno + - Created: before entering Slave state: + . at startup in `galera_start()` + . or when a Galera node is stopped in `galera_demote()` + - Deleted: just before node starts in `galera_start_local_node()`; + cleaned-up during `galera_demote()` and `galera_stop()` + +We delete last-committed before starting Galera, to avoid race +conditions that could arise due to discrepancies between the CIB and +Galera. + +### bootstrap + +Attribute set on the node that is elected to bootstrap Galera. + +- Used : during promotion in `galera_start_local_node()` +- Created: at startup once all nodes have `last-committed`; + or during monitor if all nodes have failed +- Deleted: in `galera_start_local_node()`, just after the bootstrap + node started and is ready; + cleaned-up during `galera_demote()` and `galera_stop()` + +There cannot be more than one bootstrap node at any time, otherwise +the Galera cluster would stop replicating properly. + +### sync-needed + +While this attribute is set on a node, the Galera node is in JOIN +state, i.e. SST is in progress and the node cannot serve queries. + +The resource agent relies on the underlying SST method to monitor +the progress of the SST. For instance, with `wsrep_sst_rsync`, +timeout would be reported by rsync, the Galera node would go in +Non-primary state, which would make `galera_monitor()` fail. + +- Used : during recurring slave monitor in `check_sync_status()` +- Created: in `galera_start_local_node()`, just after the joiner + node started and entered the Galera cluster +- Deleted: during recurring slave monitor in `check_sync_status()` + as soon as the Galera code reports to be SYNC-ed. + +### no-grastate + +If a galera node was unexpectedly killed in a middle of a replication, +InnoDB can retain the equivalent of a XA transaction in prepared state +in its redo log. If so, mysqld cannot recover state (nor last seqno) +automatically, and special recovery heuristic has to be used to +unblock the node. + +This transient attribute is used to keep track of forced recoveries to +prevent bootstrapping a cluster from a recovered node when possible. + +- Used : during `detect_first_master()` to elect the bootstrap node +- Created: in `detect_last_commit()` if the node has a pending XA + transaction to recover in the redo log +- Deleted: when a node is promoted to Master. diff --git a/heartbeat/README.mariadb.md b/heartbeat/README.mariadb.md new file mode 100644 index 0000000..da35a03 --- /dev/null +++ b/heartbeat/README.mariadb.md @@ -0,0 +1,156 @@ +Setting up the MariaDB resource agent +===================================== + +This resource agent requires corosync version >= 2 and mariadb version > 10.2 . + +Before embarking on this quest one should read the MariaDB pages on replication +and global transaction IDs, GTID. This will greatly help in understanding what +is going on and why. + +Replication: https://mariadb.com/kb/en/mariadb/setting-up-replication/ +GTID: https://mariadb.com/kb/en/mariadb/gtid/ +semi-sync: https://mariadb.com/kb/en/mariadb/semisynchronous-replication/ + +Some reading on failures under enhanced semi-sync can be found here: +https://jira.mariadb.org/browse/MDEV-162 + +Part 1: MariaDB Setup +--------------------- + +It is best to initialize your MariaDB and do a failover before trying to use +Pacemaker to manage MariaDB. This will both verify the MariaDB configuration +and help you understand what is going on. + +###Configuration Files + +In your MariaDB config file for the server on node 1, place the following +entry (replacing my_database and other names as needed): +``` +[mariadb] +log-bin +server_id=1 +log-basename=master +binlog_do_db=my_database +``` + +Then for each other node create the same entry, but increment the server_id. + +###Replication User + +Now create the replication user (be sure to change the password!): +``` +GRANT ALL PRIVILEGES ON *.* TO 'slave_user'@'%' IDENTIFIED BY 'password'; +GRANT ALL PRIVILEGES ON *.* TO 'slave_user'@'localhost' IDENTIFIED BY 'password'; +``` + +The second entry may not be necessary, but simplified other steps. Change +user name and password as needed. + + +###Intialize from a database backup + +Initialize all nodes from an existing backup, or create a backup from the +first node if needed: + +On the current database: +``` +mysqldump -u root --master-data --databases my_database1 my_database2 > backup.sql +``` + +At the top of this file is a commented out line: +SET GLOBAL gtid_slave_pos='XXXX...' + +uncomment his line. + +On all new nodes: +``` +mysqldump -u root < backup.sql +``` + +###Initialize replication + +Choose a node as master, in this example node1. + +On all slaves, execute: +``` +RESET MASTER; + +CHANGE MASTER TO master_host="node1", master_port=3306, \ + master_user="slave_user", master_password="password", \ + master_use_gtid=current_pos; + +SET GLOBAL rpl_semi_sync_master_enabled='ON', rpl_semi_sync_slave_enabled='ON'; + +START SLAVE; + +SHOW SLAVE STATUS\G +``` + +In an ideal world this will show that replication is now fully working. + +Once replication is working, verify the configuration by doing some updates +and verifying that they are replicated. + +Now try changing the master. On each slave perform: +``` +STOP SLAVE +``` + +Choose a new master, node2 in our example. On all slave nodes execute: +``` +CHANGE MASTER TO master_host="node2", master_port=3306, \ + master_user="slave_user", master_password="password", \ + master_use_gtid=current_pos; + +START SLAVE; +``` + +And again, check that replication is working and changes are synchronized. + + +Part 2: Pacemaker Setup +----------------------- + +This is pretty straightforward. Example is using pcs. + +``` +# Dump the cib +pcs cluster cib mariadb_cfg + +# Create the mariadb_server resource +pcs -f mariadb_cfg resource create mariadb_server mariadb \ + binary="/usr/sbin/mysqld" \ + replication_user="slave_user" \ + replication_passwd="password" \ + node_list="node1 node2 node3" \ + op start timeout=120 interval=0 \ + op stop timeout=120 interval=0 \ + op promote timeout=120 interval=0 \ + op demote timeout=120 interval=0 \ + op monitor role=Master timeout=30 interval=10 \ + op monitor role=Slave timeout=30 interval=20 \ + op notify timeout="60s" interval="0s" + +# Create the master slave resource +pcs -f mariadb_cfg resource master msMariadb mariadb_server \ + master-max=1 master-node-max=1 clone-max=3 clone-node-max=1 notify=true + +# Avoid running this on some nodes, only if needed +pcs -f mariadb_cfg constraint location msMariadb avoids \ + node4=INFINITY node5=INFINITY + +# Push the cib +pcs cluster cib-push mariadb_cfg +``` + +You should now have a running MariaDB cluster: +``` +pcs status + +... + Master/Slave Set: msMariadb [mariadb_server] + Masters: [ node1 ] + Slaves: [ node2 node3 ] +... +``` + diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 new file mode 100755 index 0000000..924d94c --- /dev/null +++ b/heartbeat/Raid1 @@ -0,0 +1,586 @@ +#!/bin/sh +# +# +# License: GNU General Public License (GPL) +# Support: users@clusterlabs.org +# +# Raid1 +# Description: Manages a Linux software RAID device on a shared storage medium. +# Original Author: Eric Z. Ayers (eric.ayers@compgen.com) +# Original Release: 25 Oct 2000 +# RAID patches: http://people.redhat.com/mingo/raid-patches/ +# Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 +# Sympathetic Ear: mailto:linux-raid@vger.kernel.org +# +# usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} +# +# +# EXAMPLE config file /etc/raidtab.md0 +# This file must exist on both machines! +# +# raiddev /dev/md0 +# raid-level 1 +# nr-raid-disks 2 +# chunk-size 64k +# persistent-superblock 1 +# #nr-spare-disks 0 +# device /dev/sda1 +# raid-disk 0 +# device /dev/sdb1 +# raid-disk 1 +# +# EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) +# +# DEVICE /dev/sdb1 /dev/sdc1 +# ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_raidconf_default="" +OCF_RESKEY_raiddev_default="" +OCF_RESKEY_homehost_default="" +OCF_RESKEY_force_stop_default="true" +OCF_RESKEY_udev_default="true" +OCF_RESKEY_force_clones_default="false" + +: ${OCF_RESKEY_raidconf=${OCF_RESKEY_raidconf_default}} +: ${OCF_RESKEY_raiddev=${OCF_RESKEY_raiddev_default}} +: ${OCF_RESKEY_homehost=${OCF_RESKEY_homehost_default}} +: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} +: ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}} +: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} + +####################################################################### + +usage() { + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} + EOT +} + +meta_data() { + cat < + + +1.0 + + +This resource agent manages Linux software RAID (MD) devices on +a shared storage medium. It uses mdadm(8) to start, stop, and +monitor the MD devices. Raidtools are supported, but deprecated. +See https://raid.wiki.kernel.org/index.php/Linux_Raid for more +information. + +Manages Linux software RAID (MD) devices on shared storage + + + + +The RAID configuration file, e.g. /etc/mdadm.conf. + +RAID config file + + + + + +One or more block devices to use, space separated. Alternatively, +set to "auto" to manage all devices specified in raidconf. + +block device + + + + + +The value for the homehost directive; this is an mdadm feature to +protect RAIDs against being activated by accident. It is recommended to +create RAIDs managed by the cluster with "homehost" set to a special +value, so they are not accidentally auto-assembled by nodes not +supposed to own them. + +Homehost for mdadm + + + + + +If processes or kernel threads are using the array, it cannot be +stopped. We will try to stop processes, first by sending TERM and +then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. +The lsof(8) program is required to get the list of array users. +Of course, the kernel threads cannot be stopped this way. +If the processes are critical for data integrity, then set this +parameter to false. Note that in that case the stop operation +will fail and the node will be fenced. + +force stop processes using the array + + + + + +Wait until udevd creates a device in the start operation. On a +normally loaded host this should happen quickly, but you may be +unlucky. If you are not using udev set this to "no". + +udev + + + + + +Activating the same md RAID array on multiple nodes at the same time +will result in data corruption and thus is forbidden by default. + +A safe example could be an array that is only named identically across +all nodes, but is in fact distinct. + +Only set this to "true" if you know what you are doing! + +force ability to run as a clone + + + + + + + + + + + + + + +END +} + +udev_settle() { + if ocf_is_true $WAIT_FOR_UDEV; then + udevadm settle $* + fi +} +list_conf_arrays() { + test -f $RAIDCONF || { + ocf_exit_reason "$RAIDCONF gone missing!" + exit $OCF_ERR_GENERIC + } + grep ^ARRAY $RAIDCONF | awk '{print $2}' +} +forall() { + local func=$1 + local checkall=$2 + local mddev rc=0 + for mddev in $RAIDDEVS; do + $func $mddev + rc=$(($rc | $?)) + [ "$checkall" = all ] && continue + [ $rc -ne 0 ] && return $rc + done + return $rc +} +are_arrays_stopped() { + local rc mddev + for mddev in $RAIDDEVS; do + raid1_monitor_one $mddev + rc=$? + [ $rc -ne $OCF_NOT_RUNNING ] && break + done + test $rc -eq $OCF_NOT_RUNNING +} + +md_assemble() { + local mddev=$1 + $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST + udev_settle --exit-if-exists=$mddev +} +# +# START: Start up the RAID device +# +raid1_start() { + local rc + raid1_monitor + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + # md already online, nothing to do. + return $OCF_SUCCESS + fi + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # If the array is in a broken state, this agent doesn't + # know how to repair that. + ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)" + return $OCF_ERR_GENERIC + fi + + if [ $HAVE_RAIDTOOLS = "true" ]; then + # Run raidstart to start up the RAID array + $RAIDSTART --configfile $RAIDCONF $MDDEV + else + forall md_assemble all + fi + + raid1_monitor + if [ $? -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + else + ocf_exit_reason "Couldn't start RAID for $RAIDDEVS" + return $OCF_ERR_GENERIC + fi +} + +# +# STOP: stop the RAID device +# +mark_readonly() { + local mddev=$1 + local rc + ocf_log info "Attempting to mark array $mddev readonly" + $MDADM --readonly $mddev --config=$RAIDCONF + rc=$? + if [ $rc -ne 0 ]; then + ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)" + fi + return $rc +} +mknod_raid1_stop() { + # first create a block device file, then try to stop the + # array + local rc n tmp_block_file + n=`echo $1 | sed 's/[^0-9]*//'` + if ! ocf_is_decimal "$n"; then + ocf_log warn "could not get the minor device number from $1" + return 1 + fi + tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`" + rm -f $tmp_block_file + ocf_log info "block device file $1 missing, creating one in order to stop the array" + mknod $tmp_block_file b 9 $n + $MDADM --stop $tmp_block_file --config=$RAIDCONF + rc=$? + rm -f $tmp_block_file + return $rc +} +raid1_stop_one() { + ocf_log info "Stopping array $1" + if [ -b "$1" ]; then + $MDADM --stop $1 --config=$RAIDCONF && + return + else + # newer mdadm releases can stop arrays when given the + # basename; try that first + $MDADM --stop `basename $1` --config=$RAIDCONF && + return + # otherwise create a block device file + mknod_raid1_stop $1 + fi +} +get_users_pids() { + local mddev=$1 + local outp l + ocf_log debug "running lsof to list $mddev users..." + outp=`lsof $mddev | tail -n +2` + echo "$outp" | awk '{print $2}' | sort -u + echo "$outp" | while read l; do + ocf_log warn "$l" + done +} +stop_raid_users() { + local pids + pids=`forall get_users_pids all | sort -u` + if [ -z "$pids" ]; then + ocf_log warn "lsof reported no users holding arrays" + return 2 + else + ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids + fi +} +stop_arrays() { + if [ $HAVE_RAIDTOOLS = "true" ]; then + $RAIDSTOP --configfile $RAIDCONF $MDDEV + else + forall raid1_stop_one all + fi +} +showusers() { + local disk + for disk; do + if have_binary lsof; then + ocf_log info "running lsof to list $disk users..." + ocf_run -warn lsof $disk + fi + if [ -d /sys/block/$disk/holders ]; then + ocf_log info "ls -l /sys/block/$disk/holders" + ocf_run -warn ls -l /sys/block/$disk/holders + fi + done +} +raid1_stop() { + local rc + # See if the MD device is already cleanly stopped: + if are_arrays_stopped; then + return $OCF_SUCCESS + fi + + # Turn off raid + if ! stop_arrays; then + if ocf_is_true $FORCESTOP; then + if have_binary lsof; then + stop_raid_users + case $? in + 2) false;; + *) stop_arrays;; + esac + else + ocf_log warn "install lsof(8) to list users holding the disk" + false + fi + else + false + fi + fi + rc=$? + + if [ $rc -ne 0 ]; then + ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" + showusers $RAIDDEVS + if [ $HAVE_RAIDTOOLS != "true" ]; then + forall mark_readonly all + fi + return $OCF_ERR_GENERIC + fi + + if are_arrays_stopped; then + return $OCF_SUCCESS + fi + + ocf_exit_reason "RAID $RAIDDEVS still active after stop command!" + return $OCF_ERR_GENERIC +} + +# +# monitor: a less noisy status +# +raid1_monitor_one() { + local mddev=$1 + local md= + local rc + local TRY_READD=0 + local pbsize + # check if the md device exists first + # but not if we are in the stop operation + # device existence is important only for the running arrays + if [ "$__OCF_ACTION" != "stop" ]; then + if [ -h "$mddev" ]; then + md=$(ls $mddev -l | awk -F'/' '{print $NF}') + elif [ -b "$mddev" ]; then + md=$(echo $mddev | sed 's,/dev/,,') + else + ocf_log info "$mddev is not a block device" + return $OCF_NOT_RUNNING + fi + fi + if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then + ocf_log info "$md not found in /proc/mdstat" + return $OCF_NOT_RUNNING + fi + if [ $HAVE_RAIDTOOLS != "true" ]; then + $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? + case $rc in + 0) ;; + 1) ocf_log warn "$mddev has at least one failed device." + TRY_READD=1 + ;; + 2) ocf_exit_reason "$mddev has failed." + return $OCF_ERR_GENERIC + ;; + 4) + if [ "$__OCF_ACTION" = "stop" ] ; then + # There may be a transient invalid device after + # we stop MD due to uevent processing, the + # original device is stopped though. + return $OCF_NOT_RUNNING + else + ocf_exit_reason "mdadm failed on $mddev." + return $OCF_ERR_GENERIC + fi + ;; + *) ocf_exit_reason "mdadm returned an unknown result ($rc)." + return $OCF_ERR_GENERIC + ;; + esac + fi + if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ + -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then + ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" + $MDADM $mddev --fail detached + $MDADM $mddev --remove failed + $MDADM $mddev --re-add missing + # TODO: At this stage, there's nothing to actually do + # here. Either this worked or it did not. + fi + + pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null` + if [ -z "$pbsize" ]; then + ocf_log warn "both blockdev and stat could not get the block size (will use 4k)" + pbsize=4096 # try with 4k + fi + if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \ + iflag=direct >/dev/null 2>&1 ; then + ocf_exit_reason "$mddev: I/O error on read" + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +raid1_monitor() { + forall raid1_monitor_one +} + +# +# STATUS: is the raid device online or offline? +# +raid1_status() { + # See if the MD device is online + local rc + raid1_monitor + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + echo "stopped" + else + echo "running" + fi + return $rc +} + +raid1_validate_all() { + return $OCF_SUCCESS +} + +PROC_CLEANUP_TIME=3 + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; + *) + ;; +esac + +RAIDCONF="$OCF_RESKEY_raidconf" +MDDEV="$OCF_RESKEY_raiddev" +FORCESTOP="${OCF_RESKEY_force_stop}" +WAIT_FOR_UDEV="${OCF_RESKEY_udev}" + +if [ -z "$RAIDCONF" ] ; then + ocf_exit_reason "Please set OCF_RESKEY_raidconf!" + exit $OCF_ERR_CONFIGURED +fi + +if [ ! -r "$RAIDCONF" ] ; then + ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opened!" + exit $OCF_ERR_INSTALLED +fi + +if [ -z "$MDDEV" ] ; then + ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" + exit $OCF_ERR_CONFIGURED +fi + +if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then + ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!" + ocf_log err "Please read the comment on the force_clones parameter." + exit $OCF_ERR_CONFIGURED +fi + +if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then + if [ "$__OCF_ACTION" = "start" ]; then + ocf_log warn "either install udevadm or set udev to false" + ocf_log info "setting udev to false!" + fi + WAIT_FOR_UDEV=0 +fi + +if ! ocf_is_true $WAIT_FOR_UDEV; then + export MDADM_NO_UDEV=1 +fi + +if ocf_is_true $FORCESTOP && ! have_binary lsof; then + ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." +fi + +HAVE_RAIDTOOLS=false +if have_binary $MDADM >/dev/null 2>&1 ; then + if [ -n "$OCF_RESKEY_homehost" ]; then + MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" + else + MDADM_HOMEHOST="" + fi +else + check_binary $RAIDSTART + HAVE_RAIDTOOLS=true +fi + +if [ $HAVE_RAIDTOOLS = true ]; then + if [ "$MDDEV" = "auto" ]; then + ocf_exit_reason "autoconf supported only with mdadm!" + exit $OCF_ERR_INSTALLED + elif [ `echo $MDDEV|wc -w` -gt 1 ]; then + ocf_exit_reason "multiple devices supported only with mdadm!" + exit $OCF_ERR_INSTALLED + fi +fi + +if [ "$MDDEV" = "auto" ]; then + RAIDDEVS=`list_conf_arrays` +else + RAIDDEVS="$MDDEV" +fi + +# At this stage, +# [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, +# otherwise we have raidtools (raidstart and raidstop) + +# Look for how we are called +case "$1" in + start) + raid1_start + ;; + stop) + raid1_stop + ;; + status) + raid1_status + ;; + monitor) + raid1_monitor + ;; + validate-all) + raid1_validate_all + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? diff --git a/heartbeat/Route b/heartbeat/Route new file mode 100755 index 0000000..7db41d0 --- /dev/null +++ b/heartbeat/Route @@ -0,0 +1,348 @@ +#!/bin/sh +# +# Route OCF RA. Enables and disables network routes. +# +# (c) 2008-2010 Florian Haas, Dejan Muhamedagic, +# and Linux-HA contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Default values +OCF_RESKEY_device_default="" +OCF_RESKEY_gateway_default="" +OCF_RESKEY_source_default="" +OCF_RESKEY_table_default="" +OCF_RESKEY_family_default="detect" + +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} +: ${OCF_RESKEY_gateway=${OCF_RESKEY_gateway_default}} +: ${OCF_RESKEY_source=${OCF_RESKEY_source_default}} +: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}} +: ${OCF_RESKEY_family=${OCF_RESKEY_family_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +Enables and disables network routes. + +Supports host and net routes, routes via a gateway address, +and routes using specific source addresses. + +This resource agent is useful if a node's routing table +needs to be manipulated based on node role assignment. + +Consider the following example use case: + + - One cluster node serves as an IPsec tunnel endpoint. + + - All other nodes use the IPsec tunnel to reach hosts + in a specific remote network. + +Then, here is how you would implement this scheme making use +of the Route resource agent: + + - Configure an ipsec LSB resource. + + - Configure a cloned Route OCF resource. + + - Create an order constraint to ensure + that ipsec is started before Route. + + - Create a colocation constraint between the + ipsec and Route resources, to make sure no instance + of your cloned Route resource is started on the + tunnel endpoint itself. + +Manages network routes + + + + + +The destination network (or host) to be configured for the route. +Specify the netmask suffix in CIDR notation (e.g. "/24"). +If no suffix is given, a host route will be created. +Specify "0.0.0.0/0" or "default" if you want this resource to set +the system default route. + +Destination network + + + + + +The outgoing network device to use for this route. + +Outgoing network device + + + + + +The gateway IP address to use for this route. + +Gateway IP address + + + + + +The source IP address to be configured for the route. + +Source IP address + + + + + +The routing table to be configured for the route. + +Routing table + + + + + +The address family to be used for the route +ip4 IP version 4 +ip6 IP version 6 +detect Detect from 'destination' address. + +Address Family + + + + + + + + + + + + + + +END +} + +####################################################################### + +create_route_spec() { + # Creates a route specification for use by "ip route (add|del|show)" + route_spec="to ${OCF_RESKEY_destination}" + if [ -n "${OCF_RESKEY_device}" ]; then + route_spec="${route_spec} dev ${OCF_RESKEY_device}" + fi + if [ -n "${OCF_RESKEY_gateway}" ]; then + route_spec="${route_spec} via ${OCF_RESKEY_gateway}" + fi + if [ -n "${OCF_RESKEY_source}" ]; then + route_spec="${route_spec} src ${OCF_RESKEY_source}" + fi + if [ -n "${OCF_RESKEY_table}" ]; then + route_spec="${route_spec} table ${OCF_RESKEY_table}" + fi + echo "$route_spec" +} + +route_usage() { + cat </dev/null 2>&1; then + ocf_exit_reason "Network device ${OCF_RESKEY_device} appears not to be available on this system." + # OCF_ERR_ARGS prevents the resource from running anywhere at all, + # maybe another node has the interface? + # OCF_ERR_INSTALLED just prevents starting on this particular node. + return $OCF_ERR_INSTALLED + fi + fi + + # The following tests must return $OCF_ERR_INSTALLED, but only if + # the resource is actually running (i.e., not during probes) + if ! ocf_is_probe; then + # If a source address has been configured, is it available on + # this system? + if [ -n "${OCF_RESKEY_source}" ]; then + if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then + ocf_exit_reason "Source address ${OCF_RESKEY_source} appears not to be available on this system." + # same reason as with _device: + return $OCF_ERR_INSTALLED + fi + fi + # If a gateway address has been configured, is it reachable? + if [ -n "${OCF_RESKEY_gateway}" ]; then + if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then + ocf_exit_reason "Gateway address ${OCF_RESKEY_gateway} is unreachable." + # same reason as with _device: + return $OCF_ERR_INSTALLED + fi + fi + fi + return $OCF_SUCCESS +} + +# These two actions must always succeed +case $__OCF_ACTION in +meta-data) meta_data + # OCF variables are not set when querying meta-data + exit 0 + ;; +usage|help) route_usage + exit $OCF_SUCCESS + ;; +esac + +# Don't do anything if the necessary utilities aren't present +for binary in ip grep; do + check_binary $binary +done + +case $OCF_RESKEY_family in + ip4) addr_family="-4" ;; + ip6) addr_family="-6" ;; + detect) + case $OCF_RESKEY_destination in + *:*) addr_family="-6" ;; + *.*) addr_family="-4" ;; + *) ocf_exit_reason "Address family detection requires a numeric destination address." ;; + esac ;; + *) ocf_exit_reason "Address family '${OCF_RESKEY_family}' not recognized." ;; +esac + +case $__OCF_ACTION in +start) route_start;; +stop) route_stop;; +status|monitor) route_status;; +reload) ocf_log info "Reloading..." + route_start + ;; +validate-all) route_validate;; +*) route_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +exit $rc diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase new file mode 100755 index 0000000..563a6f3 --- /dev/null +++ b/heartbeat/SAPDatabase @@ -0,0 +1,401 @@ +#!/bin/sh +# +# SAPDatabase +# +# Description: Manages any type of SAP supported database instance +# as a High-Availability OCF compliant resource. +# +# Author: Alexander Krauth, October 2006 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth +# +# An example usage: +# See usage() function below for more details... +# +# OCF instance parameters: +# OCF_RESKEY_SID +# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +# OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) +# OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) +# OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level) +# OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) +# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) +# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) +# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) +# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) +# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) +# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) +# Deprecated parameters: +# OCF_RESKEY_NETSERVICENAME +# OCF_RESKEY_DBJ2EE_ONLY +# OCF_RESKEY_JAVA_HOME +# OCF_RESKEY_DIR_BOOTSTRAP +# OCF_RESKEY_DIR_SECSTORE +# OCF_RESKEY_DB_JARS +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_SID_default="" +OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" +OCF_RESKEY_DBTYPE_default="" +OCF_RESKEY_DBINSTANCE_default="" +OCF_RESKEY_DBOSUSER_default="" +OCF_RESKEY_NETSERVICENAME_default="" +OCF_RESKEY_DBJ2EE_ONLY_default="" +OCF_RESKEY_JAVA_HOME_default="" +OCF_RESKEY_STRICT_MONITORING_default="false" +OCF_RESKEY_AUTOMATIC_RECOVER_default="false" +OCF_RESKEY_MONITOR_SERVICES_default="" +OCF_RESKEY_DIR_BOOTSTRAP_default="" +OCF_RESKEY_DIR_SECSTORE_default="" +OCF_RESKEY_DB_JARS_default="" +OCF_RESKEY_PRE_START_USEREXIT_default="" +OCF_RESKEY_POST_START_USEREXIT_default="" +OCF_RESKEY_PRE_STOP_USEREXIT_default="" +OCF_RESKEY_POST_STOP_USEREXIT_default="" + +: ${OCF_RESKEY_SID=${OCF_RESKEY_SID_default}} +: ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} +: ${OCF_RESKEY_DBTYPE=${OCF_RESKEY_DBTYPE_default}} +: ${OCF_RESKEY_DBINSTANCE=${OCF_RESKEY_DBINSTANCE_default}} +: ${OCF_RESKEY_DBOSUSER=${OCF_RESKEY_DBOSUSER_default}} +: ${OCF_RESKEY_NETSERVICENAME=${OCF_RESKEY_NETSERVICENAME_default}} +: ${OCF_RESKEY_DBJ2EE_ONLY=${OCF_RESKEY_DBJ2EE_ONLY_default}} +: ${OCF_RESKEY_JAVA_HOME=${OCF_RESKEY_JAVA_HOME_default}} +: ${OCF_RESKEY_STRICT_MONITORING=${OCF_RESKEY_STRICT_MONITORING_default}} +: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} +: ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}} +: ${OCF_RESKEY_DIR_BOOTSTRAP=${OCF_RESKEY_DIR_BOOTSTRAP_default}} +: ${OCF_RESKEY_DIR_SECSTORE=${OCF_RESKEY_DIR_SECSTORE_default}} +: ${OCF_RESKEY_DB_JARS=${OCF_RESKEY_DB_JARS_default}} +: ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}} +: ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}} +: ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}} +: ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}} + +####################################################################### + +SH=/bin/sh + +usage() { + methods=`sapdatabase_methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-EOF + usage: $0 ($methods) + + $0 manages a SAP database of any type as an HA resource. + Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. + ABAP databases as well as JAVA only databases are supported. + + The 'start' operation starts the instance. + The 'stop' operation stops the instance. + The 'status' operation reports whether the instance is running + The 'monitor' operation reports whether the instance seems to be working + The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) + The 'validate-all' operation reports whether the parameters are valid + The 'methods' operation reports on the methods $0 supports + + EOF +} + +meta_data() { + cat < + + +1.0 + + +Resource script for SAP databases. It manages a SAP database of any type as an HA resource. + +The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. +The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. +The resource agent supports the following databases: +- Oracle 10.2, 11.2 and 12 +- DB/2 UDB for Windows and Unix 9.x +- SAP-DB / MaxDB 7.x +- Sybase ASE 15.7 +- SAP HANA Database since 1.00 - with SAP note 1625203 (http://sdn.sap.com) + +In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. +The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. +Please follow SAP note 1031096 for the installation of SAPHostAgent. +The required minimum version of SAPHostAgent is: +Release: 7.20 +Patch Number: 90 +or compile time after: Dec 17 2011 + +To exemplify the usage, for a HANA database with SID "TST" and instance number "10", the resource configuration using crmsh syntax looks like: + +primitive rsc_SAPDatabase_TST_HDB10 ocf:heartbeat:SAPDatabase \\ + params DBTYPE="HDB" SID="TST" \\ + op start interval="0" timeout="3600" \\ + op monitor interval="120" timeout="700" \\ + op stop interval="0" timeout="600" + +Make sure to tune the operations timeout values accordingly with your chosen Database and available infrastructure. + +Note that the same configuration can be achieved using any other CLI tool for cluster configuration available, like pcs or cibadmin. + +Manages a SAP database instance as an HA resource. + + + The unique database system identifier. e.g. P01 + Database system ID + + + + The full qualified path where to find saphostexec and saphostctrl. +Usually you can leave this empty. Then the default: ${OCF_RESKEY_DIR_EXECUTABLE_default} is used. + + path of saphostexec and saphostctrl + + + + The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB + database vendor + + + + Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) + Database instance name, if not equal to SID + + + + The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm + the Linux user that owns the database processes on operating system level + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. + Activates application level monitoring + + + + If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. + Enable or disable automatic startup recovery + + + + Defines which services are monitored by the SAPDatabase resource agent. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. +The default MONITOR_SERVICES value is derived from the database type DBTYPE. For reference: + +- DBTYPE "ORA" sets MONITOR_SERVICES="Instance|Database|Listener"; +- DBTYPE "HDB" sets MONITOR_SERVICES="hdbindexserver|hdbnameserver"; +- DBTYPE "ADA" sets MONITOR_SERVICES="Database"; +- DBTYPE "DB6" sets MONITOR_SERVICES="{SID}|{db2sid}"; +- DBTYPE "SYB" sets MONITOR_SERVICES="Server". + +This parameter should be set ONLY if is needed to monitor different services than the ones listed above. + + Database services to monitor + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore + + + + The full qualified path where to find a script or program which should be executed before this resource gets started. + path to a pre-start script + + + + The full qualified path where to find a script or program which should be executed after this resource got started. + path to a post-start script + + + + The full qualified path where to find a script or program which should be executed before this resource gets stopped. + path to a pre-start script + + + + The full qualified path where to find a script or program which should be executed after this resource got stopped. + path to a post-start script + + + + + + + + + + + + + + +END +} + + +# +# methods: What methods/operations do we support? +# +sapdatabase_methods() { + cat <<-EOF + start + stop + status + monitor + recover + validate-all + methods + meta-data + usage + EOF +} + + +# +# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. +# This specialties do not allow a totally generic SAP cluster resource agent. +# Someone should write a resource agent for each additional process you need, if it +# is required to monitor that process within the cluster manager. To enable +# you to extent this resource agent without developing a new one, this user exit +# was introduced. +# +sapuserexit() { + NAME="$1" + VALUE="$2" + + if [ -n "$VALUE" ] + then + if have_binary "$VALUE" + then + ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" + "$VALUE" >/dev/null 2>&1 + ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" + else + ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" + fi + fi + return $OCF_SUCCESS +} + + +# +# saphostctrl_installed +# +saphostctrl_installed() { + SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" + SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" + SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" + SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" + + have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC +} + + +# +# 'main' starts here... +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations don't require OCF instance parameters to be set +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + + usage) usage + exit $OCF_SUCCESS;; + + methods) sapdatabase_methods + exit $?;; + + *);; +esac + +if ! ocf_is_root +then + ocf_log err "$0 must be run as root" + exit $OCF_ERR_PERM +fi + +# mandatory parameter check +if [ -z "$OCF_RESKEY_SID" ]; then + ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" + exit $OCF_ERR_ARGS +fi +SID=`echo "$OCF_RESKEY_SID"` + +if [ -z "$OCF_RESKEY_DBTYPE" ]; then + ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" + exit $OCF_ERR_ARGS +fi +DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` + + +# source functions and initialize global variables +if saphostctrl_installed; then + . ${OCF_FUNCTIONS_DIR}/sapdb.sh +else + if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then + ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed" + exit $OCF_ERR_ARGS + fi + . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh +fi +sapdatabase_init + + +# we always want to fall to the faster status method in case of a probe by the cluster +ACTION=$1 +if ocf_is_probe +then + ACTION=status +fi + +# What kind of method was invoked? +case "$ACTION" in + + start|stop|status|recover) sapdatabase_$ACTION + exit $?;; + monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING + exit $?;; + validate-all) sapdatabase_validate + exit $?;; + *) sapdatabase_methods + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance new file mode 100755 index 0000000..26fd541 --- /dev/null +++ b/heartbeat/SAPInstance @@ -0,0 +1,1076 @@ +#!/bin/sh +# +# SAPInstance +# +# Description: Manages a single SAP Instance as a High-Availability +# resource. One SAP Instance is defined by one +# SAP Instance-Profile. start/stop handles all services +# of the START-Profile, status and monitor care only +# about essential services. +# +# Author: Alexander Krauth, June 2006 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2006-2008 Alexander Krauth +# +# An example usage: +# See usage() function below for more details... +# +# OCF instance parameters: +# OCF_RESKEY_InstanceName +# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) +# OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) +# OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) +# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) +# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only) +# OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk) +# OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Promotable configuration) +# OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Promotable configuration) +# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) +# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) +# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) +# OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) +# OCF_RESKEY_IS_ERS (needed for ENQ/REPL NW 740) +# OCF_RESKEY_MINIMAL_PROBE (optional but needed for simple mount structure architecure) +# +# TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status) +# - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque) +# - Option for cleanup abandoned enqueue replication tables +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_InstanceName_default="" +OCF_RESKEY_DIR_EXECUTABLE_default="" +OCF_RESKEY_DIR_PROFILE_default="" +OCF_RESKEY_START_PROFILE_default="" +OCF_RESKEY_START_WAITTIME_default="3600" +OCF_RESKEY_AUTOMATIC_RECOVER_default="false" +OCF_RESKEY_MONITOR_SERVICES_default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator" +OCF_RESKEY_SHUTDOWN_METHOD_default="normal" +OCF_RESKEY_ERS_InstanceName_default="" +OCF_RESKEY_ERS_START_PROFILE_default="" +OCF_RESKEY_PRE_START_USEREXIT_default="" +OCF_RESKEY_POST_START_USEREXIT_default="" +OCF_RESKEY_PRE_STOP_USEREXIT_default="" +OCF_RESKEY_POST_STOP_USEREXIT_default="" +OCF_RESKEY_IS_ERS_default="false" +OCF_RESKEY_MINIMAL_PROBE_default="false" + +: ${OCF_RESKEY_InstanceName=${OCF_RESKEY_InstanceName_default}} +: ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} +: ${OCF_RESKEY_DIR_PROFILE=${OCF_RESKEY_DIR_PROFILE_default}} +: ${OCF_RESKEY_START_PROFILE=${OCF_RESKEY_START_PROFILE_default}} +: ${OCF_RESKEY_START_WAITTIME=${OCF_RESKEY_START_WAITTIME_default}} +: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} +: ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}} +: ${OCF_RESKEY_SHUTDOWN_METHOD=${OCF_RESKEY_SHUTDOWN_METHOD_default}} +: ${OCF_RESKEY_ERS_InstanceName=${OCF_RESKEY_ERS_InstanceName_default}} +: ${OCF_RESKEY_ERS_START_PROFILE=${OCF_RESKEY_ERS_START_PROFILE_default}} +: ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}} +: ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}} +: ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}} +: ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}} +: ${OCF_RESKEY_IS_ERS=${OCF_RESKEY_IS_ERS_default}} +: ${OCF_RESKEY_IS_MINIMAL_PROBE=${OCF_RESKEY_IS_MINIMAL_PROBE_default}} + +####################################################################### + +SH=/bin/sh + +sapinstance_usage() { + methods=`sapinstance_methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-EOF + usage: $0 ($methods) + + $0 manages a SAP Instance as an HA resource. + + The 'start' operation starts the instance or the ERS instance in a Promotable configuration + The 'stop' operation stops the instance + The 'status' operation reports whether the instance is running + The 'monitor' operation reports whether the instance seems to be working + The 'promote' operation starts the primary instance in a Promotable configuration + The 'demote' operation stops the primary instance and starts the ERS instance + The 'reload' operation allows changed parameters (non-unique only) without restarting the service + The 'notify' operation always returns SUCCESS + The 'validate-all' operation reports whether the parameters are valid + The 'methods' operation reports on the methods $0 supports + + EOF +} + +sapinstance_meta_data() { + cat < + + +1.0 + + +Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration. +The resource agent supports the following SAP versions: +- SAP WebAS ABAP Release 6.20 - 7.40 +- SAP WebAS Java Release 6.40 - 7.40 +- SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case) +When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com). +Other versions may also work with this agent, but have not been verified. + +All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time. + +sapstartsrv knows 4 status colours: +- GREEN = everything is fine +- YELLOW = something is wrong, but the service is still working +- RED = the service does not work +- GRAY = the service has not been started + +The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover. +The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing. + +Manages a SAP instance as an HA resource. + + + The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile. + Instance name: SID_INSTANCE_VIR-HOSTNAME + + + + The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation. + Path of sapstartsrv and sapcontrol + + + + The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation. + Path of start profile + + + + The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. + Start profile name + + + + After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and a JAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance. +Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time. + +That is only useful for double stack systems. + + Check the successful start after that time (do not wait for J2EE-Addin) + + + + The SAPInstance resource agent tries to recover a failed start attempt automatically one time. This is done by killing running instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator. + Enable or disable automatic startup recovery + + + + Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails. +Those services are monitored within the SAPInstance resource agent: + +- disp+work +- msg_server +- enserver (ENSA1) +- enq_server (ENSA2) +- enrepserver (ENSA1) +- enq_replicator (ENSA2) +- jcontrol +- jstart + +Some other services could be monitored as well. They have to be +given with the parameter MONITOR_SERVICES, e.g.: + + - sapwebdisp + - TREXDaemon.x + +That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'. +The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports. +You may specify multiple services separated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver + + Services to monitor + + + + Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! + Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) + + + + Only used in a Promotable resource configuration: +The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. +The enqueue replication instance must be installed, before you want to configure a promotable cluster resource. + +The promotable configuration in the cluster must use this properties: +clone_max = 2 +clone_node_max = 1 +master_node_max = 1 +master_max = 1 + + Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME + + + + Only used in a Promotable resource configuration: +The parameter ERS_InstanceName must also be set in this configuration. +The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. + + Enqueue replication start profile name + + + + The full qualified path where to find a script or program which should be executed before this resource gets started. + Path to a pre-start script + + + + The full qualified path where to find a script or program which should be executed after this resource got started. + Path to a post-start script + + + + The full qualified path where to find a script or program which should be executed before this resource gets stopped. + Path to a pre-start script + + + + The full qualified path where to find a script or program which should be executed after this resource got stopped. + Path to a post-start script + + + + Only used for ASCS/ERS SAP Netweaver installations without implementing a promotable resource to + allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set + to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also + systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario. + + Mark SAPInstance as ERS instance + + + + Setting MINIMAL_PROBE=true forces the resource agent to do only minimal check during a probe. This is needed for special + file system setups. The MINIMAL_PROBE=true is only supported, if requested either by your vendor's support or if described in an architecture document + from your HA vendor. + + Switch probe action from full to minimal check + + + + + + + + + + + + + + + + + + + +END +} + + +# +# methods: What methods/operations do we support? +# +sapinstance_methods() { + cat <<-EOF + start + stop + status + monitor + promote + demote + reload + notify + validate-all + methods + meta-data + usage + EOF +} + + + +# +# is_clone : find out if we are configured to run in a Master/Slave configuration +# +is_clone() { + if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \ + && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ] + then + if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \ + [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \ + [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \ + [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] + then + ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)" + exit $OCF_ERR_CONFIGURED + fi + + if [ -z "$OCF_RESKEY_ERS_InstanceName" ] + then + ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory." + exit $OCF_ERR_ARGS + fi + else + return 0 + fi + return 1 +} + + +# +# abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different +# from customer to customer - we cannot handle this always as an error +# This would be the case, if the software is installed on shared disks and not visible +# to all cluster nodes at all times. +# +abnormal_end() { + local err_msg=$1 + + ocf_is_probe && { + sapinstance_status + exit $? + } + + ocf_log err $err_msg + if [ "$ACTION" = "stop" ] + then + cleanup_instance + exit $OCF_SUCCESS + fi + + exit $OCF_ERR_CONFIGURED +} + +# +# sapinstance_init : Define global variables with default values, if optional parameters are not set +# +# +sapinstance_init() { + + local myInstanceName="$1" + + SID=`echo "$myInstanceName" | cut -d_ -f1` + InstanceName=`echo "$myInstanceName" | cut -d_ -f2` + InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` + SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3` + + # make sure that we don't care the content of variable from previous run of sapinstance_init + DIR_EXECUTABLE="" + SYSTEMCTL="systemctl" + # optional OCF parameters, we try to guess which directories are correct + if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] + then + if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol + then + DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" + SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" + SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" + elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol + then + DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" + SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" + SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" + fi + else + if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" + then + DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" + SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" + SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" + fi + fi + + sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" + + [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" + + if [ -z "$OCF_RESKEY_DIR_PROFILE" ] + then + DIR_PROFILE="/usr/sap/$SID/SYS/profile" + else + DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" + fi + + if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ] + then + currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE + else + currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE + fi + + if [ -z "$OCF_RESKEY_IS_ERS" ]; then + is_ers="no" + else + is_ers="$OCF_RESKEY_IS_ERS" + fi + + if [ -z "$currentSTART_PROFILE" ] + then + if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then + SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" + else + SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" + fi + else + SAPSTARTPROFILE="$currentSTART_PROFILE" + fi + + if [ -z "$OCF_RESKEY_START_WAITTIME" ] + then + export OCF_RESKEY_START_WAITTIME="${OCF_RESKEY_START_WAITTIME_default}" + fi + + if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] + then + export OCF_RESKEY_MONITOR_SERVICES="${OCF_RESKEY_MONITOR_SERVICES_default}" + fi + + # as root user we need the library path to the SAP kernel to be able to call sapcontrol + if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then + LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH + export LD_LIBRARY_PATH + fi + + return $OCF_SUCCESS +} + +# +# check_systemd_integration : Check, if SAP instance is controlled by systemd unit file SAP_.service +# rc == 0 : sap instance is controlled by the unit file (file at least exists) +# rc == 1 : sap instance is NOT controlled by the unit file (file does not exist) +# +check_systemd_integration() { + local systemd_unit_name="SAP${SID}_${InstanceNr}" + local rc=1 + + if which "$SYSTEMCTL" 1>/dev/null 2>/dev/null; then + if $SYSTEMCTL list-unit-files | \ + awk '$1 == service { found=1 } END { if (! found) {exit 1}}' service="${systemd_unit_name}.service"; then + rc=0 + else + rc=1 + fi + fi + return "$rc" +} + +# +# check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. +# We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, +# because then we have two instances with the same instance number. +# +check_sapstartsrv() { + local restart=0 + local runninginst="" + local chkrc=$OCF_SUCCESS + local output="" + + # check for sapstartsrv/systemd integration + + if check_systemd_integration; then + # do it the systemd way + local systemd_unit_name="SAP${SID}_${InstanceNr}" + + if $SYSTEMCTL status "$systemd_unit_name" 1>/dev/null 2>/dev/null; then + ocf_log info "systemd service $systemd_unit_name is active" + else + ocf_log warn "systemd service $systemd_unit_name is not active, it will be started using systemd" + $SYSTEMCTL start "$systemd_unit_name" 1>/dev/null 2>/dev/null + # use start, because restart does also stop sap instance + fi + + return 0 + else # otherwise continue with old code... + if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then + ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now" + restart=1 + else + output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` + if [ $? -eq 0 ] + then + runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` + if [ "$runninginst" != "$InstanceName" ] + then + ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" + restart=1 + else + output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start` + if [ $? -ne 0 ]; then + ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)" + ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)" + restart=1 + fi + fi + else + ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" + restart=1 + fi + fi + + if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi + + if [ $restart -eq 1 ] + then + if [ -d /usr/sap/$SID/SYS/profile/ ] + then + DIR_PROFILE="/usr/sap/$SID/SYS/profile" + else + abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" + fi + + [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" + + pkill -9 -f "sapstartsrv.*$runninginst" + + # removing the unix domain socket files as they might have wrong permissions + # or ownership - they will be recreated by sapstartsrv during next start + rm -f /tmp/.sapstream5${InstanceNr}13 + rm -f /tmp/.sapstream5${InstanceNr}14 + + $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm + + # now make sure the daemon has been started and is able to respond + local srvrc=1 + while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] + do + sleep 1 + $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 + srvrc=$? + done + + if [ $srvrc -ne 1 ] + then + ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" + chkrc=$OCF_SUCCESS + else + ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" + chkrc=$OCF_ERR_GENERIC + ocf_is_probe && chkrc=$OCF_NOT_RUNNING + fi + fi + + return $chkrc + fi +} + + +# +# sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. +# This specialties do not allow a totally generic SAP cluster resource agent. +# Someone should write a resource agent for each additional process you need, if it +# is required to monitor that process within the cluster manager. To enable +# you to extent this resource agent without developing a new one, this user exit +# was introduced. +# +sapuserexit() { + local NAME="$1" + local VALUE="$2" + + if [ -n "$VALUE" ] + then + if have_binary "$VALUE" + then + ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" + "$VALUE" >/dev/null 2>&1 + ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" + else + ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" + fi + fi + return 0 +} + + +# +# cleanup_instance : remove resources (processes and shared memory) from a crashed instance) +# +cleanup_instance() { + pkill -9 -f -U $sidadm $InstanceName + ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'" + + # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot be removed + su - $sidadm -c "cleanipc $InstanceNr remove" + ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm" + + ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap + ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap + ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid + ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid + + return 0 +} + +# +# sapinstance_start : Start the SAP instance +# +sapinstance_start() { + + sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" + + local rc=$OCF_NOT_RUNNING + local output="" + local loopcount=0 + + while [ $loopcount -lt 2 ] + do + loopcount=$(($loopcount + 1)) + + check_sapstartsrv + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + output=`$SAPCONTROL -nr $InstanceNr -function Start` + rc=$? + ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" + fi + + if [ $rc -ne 0 ] + then + ocf_log err "SAP Instance $SID-$InstanceName start failed." + return $OCF_ERR_GENERIC + fi + + local startrc=1 + while [ $startrc -gt 0 ] + do + local waittime_start=`date +%s` + output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` + startrc=$? + local waittime_stop=`date +%s` + + if [ $startrc -ne 0 ] + then + if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] + then + sapinstance_monitor NOLOG + if [ $? -eq $OCF_SUCCESS ] + then + output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." + startrc=0; loopcount=2 + fi + else + if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER + then + ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" + ocf_log warn "Try to recover $SID-$InstanceName" + cleanup_instance + else + loopcount=2 + fi + startrc=-1 + fi + else + loopcount=2 + fi + done + done + + if [ $startrc -eq 0 ] + then + ocf_log info "SAP Instance $SID-$InstanceName started: $output" + rc=$OCF_SUCCESS + sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" + if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 1 -l reboot; fi + else + ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" + rc=$OCF_NOT_RUNNING + if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi + fi + + return $rc +} + + +# +# sapinstance_recover: Try startup of failed instance by cleaning up resources +# +sapinstance_recover() { + cleanup_instance + sapinstance_start + return $? +} + + +# +# sapinstance_stop: Stop the SAP instance +# +sapinstance_stop() { + local output="" + local rc + + sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" + + if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ] + then + ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!" + cleanup_instance + return $OCF_SUCCESS + fi + + check_sapstartsrv + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + output=`$SAPCONTROL -nr $InstanceNr -function Stop` + rc=$? + ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output" + fi + + if [ $rc -eq 0 ] + then + output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` + if [ $? -eq 0 ] + then + ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" + rc=$OCF_SUCCESS + else + ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + else + ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" + rc=$OCF_ERR_GENERIC + fi + + sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" + if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi + + return $rc +} + + +# +# sapinstance_monitor: Can the given SAP instance do anything useful? +# +sapinstance_monitor() { + local MONLOG=$1 + local rc + + if ocf_is_probe && ocf_is_true "$OCF_RESKEY_MINIMAL_PROBE"; then + # code for minimal probe: # grep for sapstartsrv and maybe also for sapstart + # TODO: Do we need to improve this minimal test? + if pgrep -f -l "sapstartsrv .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then + rc="$OCF_SUCCESS" + elif pgrep -f -l "sapstart .*pf=.*${SID}_${InstanceName}_${SAPVIRHOST}"; then + rc="$OCF_SUCCESS" + else + rc="$OCF_NOT_RUNNING" + fi + else + # standard probe and monitoring code + check_sapstartsrv + rc=$? + fi + + if [ $rc -eq $OCF_SUCCESS ] + then + local count=0 + local SERVNO + local output + + output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script` + + # we have to parse the output, because the returncode doesn't tell anything about the instance status + for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` + do + local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` + local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` + local STATE=0 + local SEARCH + + case $COLOR in + GREEN|YELLOW) STATE=$OCF_SUCCESS;; + *) STATE=$OCF_NOT_RUNNING;; + esac + + SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` + if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] + then + if [ $STATE -eq $OCF_NOT_RUNNING ] + then + [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" + rc=$STATE + fi + count=1 + fi + done + + if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] + then + if ocf_is_probe + then + rc=$OCF_NOT_RUNNING + else + [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" + rc=$OCF_ERR_GENERIC + fi + fi + fi + + return $rc +} + + +# +# sapinstance_status: Lightweight check of SAP instance only with OS tools +# +sapinstance_status() { + local pid + local pids + + [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING + pids=$(awk '$3 ~ "^[0-9]+$" { print $3 }' /usr/sap/$SID/$InstanceName/work/kill.sap) + for pid in $pids + do + [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS + done + return $OCF_NOT_RUNNING +} + + +# +# sapinstance_validate: Check the semantics of the input parameters +# +sapinstance_validate() { + local rc=$OCF_SUCCESS + if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" + rc=$OCF_ERR_ARGS + fi + + if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] + then + ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" + rc=$OCF_ERR_ARGS + fi + + return $rc +} + + +# +# sapinstance_start_clone +# +sapinstance_start_clone() { + sapinstance_init $OCF_RESKEY_ERS_InstanceName + ${HA_SBIN_DIR}/crm_master -v 50 -l reboot + sapinstance_start + return $? +} + + +# +# sapinstance_stop_clone +# +sapinstance_stop_clone() { + sapinstance_init $OCF_RESKEY_ERS_InstanceName + ${HA_SBIN_DIR}/crm_master -v 0 -l reboot + sapinstance_stop + return $? +} + + +# +# sapinstance_monitor_clone +# +sapinstance_monitor_clone() { + # first check with the status function (OS tools) if there could be something like a SAP instance running + # as we do not know here, if we are in master or slave state we do not want to start our monitoring + # agents (sapstartsrv) on the wrong host + local rc + + sapinstance_init $OCF_RESKEY_InstanceName + if sapinstance_status; then + if sapinstance_monitor; then + ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot + return $OCF_RUNNING_MASTER + fi + # by nature of the SAP enqueue server we have to make sure + # that we do a failover to the slave (enqueue replication server) + # in case the enqueue process has failed. We signal this to the + # cluster by setting our master preference to a lower value than the slave. + ${HA_SBIN_DIR}/crm_master -v 10 -l reboot + return $OCF_FAILED_MASTER + fi + + sapinstance_init $OCF_RESKEY_ERS_InstanceName + sapinstance_status && sapinstance_monitor + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot + fi + return $rc +} + + +# +# sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance +# The order is important here to behave correct from the application levels view +# +sapinstance_promote_clone() { + local rc + + sapinstance_init $OCF_RESKEY_InstanceName + ocf_log info "Promoting $SID-$InstanceName to running Master." + sapinstance_start + rc=$? + + if [ $rc -eq $OCF_SUCCESS ]; then + sapinstance_init $OCF_RESKEY_ERS_InstanceName + sapinstance_stop + rc=$? + fi + + return $rc +} + + +# +# sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance +# +sapinstance_demote_clone() { + local rc + + sapinstance_init $OCF_RESKEY_InstanceName + ocf_log info "Demoting $SID-$InstanceName to a slave." + sapinstance_stop + rc=$? + + if [ $rc -eq $OCF_SUCCESS ]; then + sapinstance_init $OCF_RESKEY_ERS_InstanceName + sapinstance_start + rc=$? + fi + + return $rc +} + + +# +# sapinstance_notify: Handle master scoring - to make sure a slave gets the next master +# +sapinstance_notify() { + local n_type="$OCF_RESKEY_CRM_meta_notify_type" + local n_op="$OCF_RESKEY_CRM_meta_notify_operation" + + if [ "${n_type}_${n_op}" = "post_promote" ]; then + # After promotion of one master in the cluster, we make sure that all clones reset their master + # value back to 100. This is because a failed monitor on a master might have degree one clone + # instance to score 10. + ${HA_SBIN_DIR}/crm_master -v 100 -l reboot + elif [ "${n_type}_${n_op}" = "pre_demote" ]; then + # if we are a slave and a demote event is announced, make sure we are highest on the list to become master + # that is, when a slave resource was started after the promote event of an already running master (e.g. node of slave was down) + # We also have to make sure to overrule the globally set resource_stickiness or any fail-count factors => INFINITY + local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname" + if [ ${n_uname} != ${NODENAME} ]; then + ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot + fi + fi +} + + +# +# 'main' starts here... +# + +## GLOBALS +SID="" +sidadm="" +InstanceName="" +InstanceNr="" +SAPVIRHOST="" +DIR_EXECUTABLE="" +SAPSTARTSRV="" +SAPCONTROL="" +DIR_PROFILE="" +SAPSTARTPROFILE="" +CLONE=0 +NODENAME=$(ocf_local_nodename) + + +if + ( [ $# -ne 1 ] ) +then + sapinstance_usage + exit $OCF_ERR_ARGS +fi + +ACTION=$1 +if [ "$ACTION" = "status" ]; then + ACTION=monitor +fi + +# These operations don't require OCF instance parameters to be set +case "$ACTION" in + usage|methods) sapinstance_$ACTION + exit $OCF_SUCCESS;; + meta-data) sapinstance_meta_data + exit $OCF_SUCCESS;; + notify) sapinstance_notify + exit $OCF_SUCCESS;; + *);; +esac + +if ! ocf_is_root +then + ocf_log err "$0 must be run as root" + exit $OCF_ERR_PERM +fi + +# parameter check +if [ -z "$OCF_RESKEY_InstanceName" ] +then + ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" + exit $OCF_ERR_ARGS +fi + +is_clone; CLONE=$? +if [ ${CLONE} -eq 1 ] +then + CLACT=_clone +else + if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ] + then + ocf_log err "$ACTION called in a non master/slave environment" + exit $OCF_ERR_ARGS + fi + sapinstance_init $OCF_RESKEY_InstanceName +fi + +# What kind of method was invoked? +case "$ACTION" in + start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT + exit $?;; + validate-all) sapinstance_validate + exit $?;; + reload ) + ocf_log info "reloading SAPInstance parameters" + exit $OCF_SUCCESS;; + *) sapinstance_methods + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/heartbeat/SendArp b/heartbeat/SendArp new file mode 100755 index 0000000..5af7bec --- /dev/null +++ b/heartbeat/SendArp @@ -0,0 +1,277 @@ +#!/bin/sh +# +# +# Copyright (c) 2006, Huang Zhen +# Converting original heartbeat RA to OCF RA. +# +# Copyright (C) 2004 Horms +# +# Based on IPaddr2: Copyright (C) 2003 Tuomo Soini +# +# License: GNU General Public License (GPL) +# Support: users@clusterlabs.org +# +# This script send out gratuitous Arp for an IP address +# +# It can be used _instead_ of the IPaddr2 or IPaddr resource +# to send gratuitous arp for an IP address on a given interface, +# without adding the address to that interface. I.e. if for +# some reason you want to send gratuitous arp for addresses +# managed by IPaddr2 or IPaddr on an additional interface. +# +# OCF parameters are as below: +# OCF_RESKEY_ip +# OCF_RESKEY_nic +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="" +OCF_RESKEY_background_default="true" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_background=${OCF_RESKEY_background_default}} + +SENDARP=$HA_BIN/send_arp +SENDARPPIDDIR=${HA_RSCTMP} + +BASEIP="$OCF_RESKEY_ip" +INTERFACE="$OCF_RESKEY_nic" +RESIDUAL="" +SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" +BACKGROUND=${OCF_RESKEY_background} + +# Set default values + + : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs + : ${ARP_REPEAT=5} # repeat count + : ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground + : ${ARP_NETMASK=ffffffffffff} # netmask for ARP + +####################################################################### + +sendarp_meta_data() { + cat < + + +1.0 + + +This RA can be used _instead_ of the IPaddr2 or IPaddr RA to +send gratuitous ARP for an IP address on a given interface, +without adding the address to that interface. For example, +if for some reason you wanted to send gratuitous ARP for +addresses managed by IPaddr2 or IPaddr on an additional +interface. + +Broadcasts unsolicited ARP announcements + + + + +The IP address for sending ARP packet. + +IP address + + + + + +The NIC for sending ARP packet. + +NIC + + + + + +Send ARPs in background. Set to false if you want to test if +sending ARPs succeeded. + +Send ARPs in background + + + + + + + + + + + + + +END +} + +####################################################################### + +sendarp_usage() { + cat < + + +1.0 + + +Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups. + +Enables and disables shared ServeRAID merge groups + + + + +The adapter number of the ServeRAID adapter. + +serveraid + + + + + +The logical drive under consideration. + +mergegroup + + + + + + + + + + + + + + +END +} + +ServeRAID_methods() { + cat <<-! + start + stop + status + validate-all + methods + usage + meta-data + ! +} + +ServeRAIDSCSI="/proc/scsi/ips" + + +IPS=ipssend +proc_scsi=/proc/scsi/scsi + + +parseinst() { + sr_adapter=error + sr_mergegroup=error + hostid=error + sr_logicaldrivenumber=error + if + [ $# -ne 2 ] + then + ocf_log err "Invalid ServeRAID instance: $*" + exit $OCF_ERR_ARGS + fi + PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/' + # Get the list of host ids of the ServeRAID host adapters + hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi` + # Figure the host id of the desired ServeRAID adapter + hostid=`echo $hostlist | cut -d' ' -f$1` + if + [ ! -f "$ServeRAIDSCSI/$hostid" ] + then + ocf_log err "No such ServeRAID adapter: $1" + exit $OCF_ERR_ARGS + fi + + case $2 in + [1-8]);; + *) ocf_log err "Invalid Shared Merge Group Number: $2" + exit $OCF_ERR_ARGS;; + esac + sr_adapter=$1 + sr_mergegroup=$2 + CheckRaidLevel + return $? +} + +SRLogicalDriveConfig() { + $IPS getconfig $sr_adapter ld +} + +MergeGroupToSCSI_ID() { + + PerlScript="while (<>) { + /logical drive number *([0-9]+)/i && (\$ld=\$1); + /part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\"; + }" + + ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` + case $ID in + [0-9]*) echo "$ID"; return 0;; + *) return 1;; + esac +} + +MergeGroupRaidLevel() { + + PerlScript="while (<>) { + /RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1); + /part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\"; + }" + + Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` + case $Level in + ?*) echo "$Level"; return 0;; + *) return 1;; + esac +} + +CheckRaidLevel() { + RAIDlevel=`MergeGroupRaidLevel` + case $RAIDlevel in + *5*) + ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel" + ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware." + exit $OCF_ERR_GENERIC;; + esac + return $OCF_SUCCESS +} + + + + +ReleaseSCSI() { + targetid=`MergeGroupToSCSI_ID` + echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi +} + +AddSCSI() { + targetid=`MergeGroupToSCSI_ID` + echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi +} + +# +# start: Enable the given ServeRAID device +# +ServeRAID_start() { + if + ServeRAID_status $serveraid $mergegroup + then + ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." + return $OCF_SUCCESS + else + if + # + # Normally we do a MERGE PARTNER, but if we still own the drive for + # some reason, then we'll need to do a MERGE OWN instead... + # + out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1` + if + [ $? -eq $srsuccess ] + then + ocf_log info "$out" + else + ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN + fi + then + : OK All is well! + targetid=`MergeGroupToSCSI_ID` + sr_logicaldrivenumber=`expr $targetid + 1` + #run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber & + # This version of the SYNCH command requires the 6.10 or later + # ServeRAID support CD. + # To avoid issues when called by lrmd, redirect stdout->stderr. + # Use () to create a subshell to make the redirection be synchronized. + ( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2 + AddSCSI + else + return $OCF_ERR_GENERIC + fi + fi + if + ServeRAID_status "$@" + then + return $OCF_SUCCESS + else + ocf_log err "ServeRAID device $1 not active!" + exit $OCF_ERR_GENERIC + fi +} + + +# +# stop: Disable the given ServeRAID device +# +ServeRAID_stop() { + parseinst "$@" + ReleaseSCSI + if + ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup + then + : UNMERGE $sr_adapter $sr_mergegroup worked + fi + if + ServeRAID_status "$@" + then + ocf_log err "ServeRAID device $* is still active!" + return $OCF_ERR_GENERIC + else + return $OCF_SUCCESS + fi +} + + +# +# status: is the given device now available? +# +ServeRAID_status() { + parseinst "$@" + # + # The output we're looking for + # Part of merge group : 2 + # + SRLogicalDriveConfig \ + | grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null +} + +# +# validate_all: are the OCF instance parameters valid? +# +ServeRAID_validate_all() { + check_binary $PERL + +# parseinst() will do all the work... + parseinst "$@" + return $? +} + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# These operations don't require OCF instance parameters to be set +case "$1" in + meta-data) + meta_data + exit $OCF_SUCCESS;; +# +# methods: What methods do we support? +# + methods) + ServeRAID_methods + exit $?;; + usage) + usage + exit $OCF_SUCCESS;; + *) + ;; +esac + +if + ( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] ) +then + ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n + enviroment virables before running $0 !" +# usage + exit $OCF_ERR_GENERIC +fi + +: Right Number of arguments.. +serveraid=$OCF_RESKEY_serveraid +mergegroup=$OCF_RESKEY_mergegroup + +# Look for the start, stop, status, or methods calls... +case "$1" in + stop) + ServeRAID_stop $serveraid $mergegroup + exit $?;; + start) + ServeRAID_start $serveraid $mergegroup + exit $?;; + status|monitor) + if + ServeRAID_status $serveraid $mergegroup + then + ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." + exit $OCF_SUCCESS + else + ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped." + exit $OCF_NOT_RUNNING + fi + exit $?;; + validate-all) + ServeRAID_validate_all $serveraid $mergegroup + exit $?;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED;; + +esac diff --git a/heartbeat/SphinxSearchDaemon b/heartbeat/SphinxSearchDaemon new file mode 100755 index 0000000..d4e9e85 --- /dev/null +++ b/heartbeat/SphinxSearchDaemon @@ -0,0 +1,230 @@ +#!/bin/sh +# +# +# Searchd OCF RA. +# Manages the Sphinx search daemon +# +# Copyright (c) 2007 Christian Rishoj (christian@rishoj.net) +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_config_default="/etc/sphinx/sphinx.conf" +OCF_RESKEY_searchd_default="/usr/local/bin/searchd" +OCF_RESKEY_search_default="/usr/local/bin/search" +OCF_RESKEY_testQuery_default="Heartbeat_Monitor_Query_Match_string" + +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_searchd=${OCF_RESKEY_searchd_default}} +: ${OCF_RESKEY_search=${OCF_RESKEY_search_default}} +: ${OCF_RESKEY_testQuery=${OCF_RESKEY_testQuery_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This is a searchd Resource Agent. It manages the Sphinx Search Daemon. + +Manages the Sphinx search daemon. + + + + + +searchd configuration file + +Configuration file + + + + + +searchd binary + +searchd binary + + + + + +Search binary for functional testing in the monitor action. + +search binary + + + + + +Test query for functional testing in the monitor action. +The query does not need to match any documents in the index. +The purpose is merely to test whether the search daemon is +is able to query its indices and respond properly. + +test query + + + + + + + + + + + + + +END +} + +####################################################################### + +searchd_usage() { + cat < /dev/null && [ `ps -p "$1" | grep searchd | wc -l` -eq 1 ] +} + +searchd_status() { + pidfile=`grep -v "^#" "$OCF_RESKEY_config" | grep -w pid_file | awk -F "[ \t]*=[ \t]*" '{ print $2 }'` + if [ -f "$pidfile" ] ; then + PID=`head -n 1 $pidfile` + if [ ! -z "$PID" ] ; then + isRunning "$PID" + if [ $? = 0 ] ; then + return 0 + fi + fi + fi + false +} + +searchd_check() { + $OCF_RESKEY_search --config $OCF_RESKEY_config --noinfo "$OCF_RESKEY_testQuery" > /dev/null +} + +searchd_monitor() { + if ! searchd_validate ; then + return $OCF_NOT_RUNNING + fi + if searchd_status ; then + if searchd_check ; then + return $OCF_SUCCESS + else + return $OCF_ERR_GENERIC + fi + else + return $OCF_NOT_RUNNING + fi +} + +searchd_validate() { + if [ ! -x "$OCF_RESKEY_search" ]; then + ocf_log err "search binary '$OCF_RESKEY_search' does not exist or cannot be executed" + return $OCF_ERR_ARGS + fi + + if [ ! -x "$OCF_RESKEY_searchd" ]; then + ocf_log err "searchd binary '$OCF_RESKEY_searchd' does not exist or cannot be executed" + return $OCF_ERR_ARGS + fi + + if [ ! -f "$OCF_RESKEY_config" ]; then + ocf_log err "config file '$OCF_RESKEY_config' does not exist" + return $OCF_ERR_ARGS + fi + + return $OCF_SUCCESS +} + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) searchd_start;; +stop) searchd_stop;; +monitor) searchd_monitor;; +validate-all) searchd_validate;; +usage|help) searchd_usage + exit $OCF_SUCCESS + ;; +*) searchd_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc + diff --git a/heartbeat/Squid.in b/heartbeat/Squid.in new file mode 100644 index 0000000..e574ad0 --- /dev/null +++ b/heartbeat/Squid.in @@ -0,0 +1,472 @@ +#!@BASH_SHELL@ +# +# Description: Manages a Squid Server provided by NTT OSSC as an +# OCF High-Availability resource under Heartbeat/LinuxHA control +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION +# +####################################################################### +# OCF parameters: +# OCF_RESKEY_squid_exe : Executable file +# OCF_RESKEY_squid_conf : Configuration file +# OCF_RESKEY_squid_opts : Start options +# OCF_RESKEY_squid_pidfile: Process id file +# OCF_RESKEY_squid_port : Port number +# OCF_RESKEY_debug_mode : Debug mode +# OCF_RESKEY_debug_log : Debug log file +# OCF_RESKEY_squid_stop_timeout: +# Number of seconds to await to confirm a +# normal stop method +# +# OCF_RESKEY_squid_exe, OCF_RESKEY_squid_conf, OCF_RESKEY_squid_pidfile +# and OCF_RESKEY_squid_port must be specified. Each of the rests +# has its default value or refers OCF_RESKEY_squid_conf to make +# its value when no explicit value is given. +############################################################################### + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_squid_exe_default="" +OCF_RESKEY_squid_conf_default="" +OCF_RESKEY_squid_opts_default="" +OCF_RESKEY_squid_pidfile_default="" +OCF_RESKEY_squid_port_default="" +OCF_RESKEY_squid_stop_timeout_default="10" +OCF_RESKEY_debug_mode_default="" +OCF_RESKEY_debug_log_default="" + +: ${OCF_RESKEY_squid_exe=${OCF_RESKEY_squid_exe_default}} +: ${OCF_RESKEY_squid_conf=${OCF_RESKEY_squid_conf_default}} +: ${OCF_RESKEY_squid_pidfile=${OCF_RESKEY_squid_pidfile_default}} +: ${OCF_RESKEY_squid_port=${OCF_RESKEY_squid_port_default}} +: ${OCF_RESKEY_squid_stop_timeout=${OCF_RESKEY_squid_stop_timeout_default}} +: ${OCF_RESKEY_debug_mode=${OCF_RESKEY_debug_mode_default}} +: ${OCF_RESKEY_debug_log=${OCF_RESKEY_debug_log_default}} + +usage() +{ + cat <<-! +usage: $0 action + +action: + start : start a new squid instance + + stop : stop the running squid instance + + status : return the status of squid, run or down + + monitor : return TRUE if the squid appears to be working. + + meta-data : show meta data message + + validate-all: validate the instance parameters +! + return $OCF_ERR_ARGS +} + +metadata_squid() +{ + cat < + + +1.0 + + +The resource agent of Squid. +This manages a Squid instance as an HA resource. + +Manages a Squid proxy server instance + + + + + +This is a required parameter. This parameter specifies squid's +executable file. + +Executable file + + + + + +This is a required parameter. This parameter specifies a configuration file +for a squid instance managed by this RA. + +Configuration file + + + + + +This is a optional parameter. This parameter specifies the start options. + +Start options + + + + +Deprecated - do not use anymore +deprecated - do not use anymore + + + + + +This is a required parameter. This parameter specifies a port number +for a squid instance managed by this RA. If multiple ports are used, +you must specify only one of them. + +Port number + + + + + +On stop, a squid shutdown is invoked first. If the resource +doesn't stop within this timeout, we resort to stopping +processes by sending signals and finally KILLing them. + +how long to wait for squid shutdown to stop the +instance before resorting to kill + + + + + +This is an optional parameter. +This RA runs in debug mode when this parameter includes 'x' or 'v'. +If 'x' is included, both of STDOUT and STDERR redirect to the logfile +specified by "debug_log", and then the builtin shell option 'x' is turned on. +It is similar about 'v'. + +Debug mode + + + + + +This is an optional parameter. +This parameter specifies a destination file for debug logs +and works only if this RA run in debug mode. Refer to "debug_mode" +about debug mode. If no value is given but is required, it's constructed +according to the following rules: "/var/log/" as a directory part, +the basename of the configuration file given by "syslog_ng_conf" +as a basename part, ".log" as a suffix. + +A destination of the debug log + + + + + + + + + + + + + + +END + + return $OCF_SUCCESS +} + +get_pids() +{ + SQUID_PIDS=( ) + + # Seek by pattern + SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN") + + # Seek by child process + if [[ -n "${SQUID_PIDS[0]}" ]]; then + SQUID_PIDS[1]=$(pgrep -P ${SQUID_PIDS[0]}) + fi + + if [[ -n "${SQUID_PIDS[1]}" ]]; then + typeset exe + exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe") + if [[ $? = 0 ]]; then + exe=${exe##*-> } + if ! [[ "$exe" = $SQUID_EXE ]]; then + SQUID_PIDS[1]="" + fi + else + SQUID_PIDS[1]="" + fi + fi + + # Seek by port + if have_binary netstat; then + SQUID_PIDS[2]=$( + netstat -apn | + awk '/tcp.*:'$SQUID_PORT' .*LISTEN/ && $7~/^[1-9]/ { + sub("\\/.*", "", $7); print $7; exit}') + else + SQUID_PIDS[2]=$( + ss -apn | + awk '/tcp.*LISTEN.*:'$SQUID_PORT'/ { + sub(".*pid=", "", $7); sub(",fd=.*", "", $7); print $7 }') + fi +} + +are_all_pids_found() +{ + if + [[ -n "${SQUID_PIDS[0]}" ]] && + [[ -n "${SQUID_PIDS[1]}" ]] && + [[ -n "${SQUID_PIDS[2]}" ]] + then + return 0 + else + return 1 + fi +} + +are_pids_sane() +{ + if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then + return $OCF_SUCCESS + else + ocf_exit_reason "$SQUID_NAME:Pid unmatch" + return $OCF_ERR_GENERIC + fi +} + +is_squid_dead() +{ + if + [[ -z "${SQUID_PIDS[0]}" ]] && + [[ -z "${SQUID_PIDS[2]}" ]] + then + return 0 + else + return 1 + fi +} + +monitor_squid() +{ + typeset trialcount=0 + + while true; do + get_pids + + if are_all_pids_found; then + are_pids_sane + return $OCF_SUCCESS + fi + + if is_squid_dead; then + return $OCF_NOT_RUNNING + fi + + ocf_log info "$SQUID_NAME:Inconsistent processes:" \ + "${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}" + (( trialcount = trialcount + 1 )) + if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then + ocf_exit_reason "$SQUID_NAME:Inconsistency of processes remains unsolved" + return $OCF_ERR_GENERIC + fi + sleep 1 + done +} + +start_squid() +{ + typeset status + + monitor_squid + status=$? + + if [[ $status != $OCF_NOT_RUNNING ]]; then + return $status + fi + + set -- "$SQUID_OPTS" + ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@" + status=$? + if [[ $status != $OCF_SUCCESS ]]; then + return $OCF_ERR_GENERIC + fi + + while true; do + get_pids + if are_all_pids_found && are_pids_sane; then + return $OCF_SUCCESS + fi + ocf_log info "$SQUID_NAME:Waiting for squid to be invoked" + sleep 1 + done + + return $OCF_ERR_GENERIC +} + +stop_squid() +{ + typeset lapse_sec + + if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then + lapse_sec=0 + while true; do + get_pids + if is_squid_dead; then + return $OCF_SUCCESS + fi + (( lapse_sec = lapse_sec + 1 )) + if (( lapse_sec > SQUID_STOP_TIMEOUT )); then + break + fi + sleep 1 + ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ + "stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT" + done + fi + + while true; do + get_pids + ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ + "try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}" + kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]} + sleep 1 + if is_squid_dead; then + return $OCF_SUCCESS + fi + done + + return $OCF_ERR_GENERIC +} + +status_squid() +{ + return $OCF_SUCCESS +} + + +validate_all_squid() +{ + ocf_log info "validate_all_squid[$SQUID_NAME]" + return $OCF_SUCCESS +} + +: "=== Debug ${0##*/} $1 ===" + +if [[ "$1" = "meta-data" ]]; then + metadata_squid + exit $? +fi + +SQUID_CONF="${OCF_RESKEY_squid_conf}" +if [[ -z "$SQUID_CONF" ]]; then + ocf_exit_reason "SQUID_CONF is not defined" + exit $OCF_ERR_CONFIGURED +fi + +SQUID_NAME="${SQUID_CONF##*/}" +SQUID_NAME="${SQUID_NAME%.*}" + +DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log" + +DEBUG_MODE="" +case $OCF_RESKEY_debug_mode in + *x*) DEBUG_MODE="${DEBUG_MODE}x";; +esac +case $OCF_RESKEY_debug_mode in + *v*) DEBUG_MODE="${DEBUG_MODE}v";; +esac + +if [ -n "$DEBUG_MODE" ]; then + PS4='\d \t \h '"${1-unknown} " + export PS4 + exec 1>>$DEBUG_LOG 2>&1 + set -$DEBUG_MODE +fi + +SQUID_EXE="${OCF_RESKEY_squid_exe}" +if [[ -z "$SQUID_EXE" ]]; then + ocf_exit_reason "SQUID_EXE is not defined" + exit $OCF_ERR_CONFIGURED +fi +if [[ ! -x "$SQUID_EXE" ]]; then + ocf_exit_reason "$SQUID_EXE is not found" + exit $OCF_ERR_CONFIGURED +fi + +SQUID_PORT="${OCF_RESKEY_squid_port}" +if [[ -z "$SQUID_PORT" ]]; then + ocf_exit_reason "SQUID_PORT is not defined" + exit $OCF_ERR_CONFIGURED +fi + +SQUID_OPTS="${OCF_RESKEY_squid_opts}" + +SQUID_PIDS=( ) + +SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}" + +SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}" +SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}" + +PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF" + +COMMAND=$1 + +case "$COMMAND" in + start) + ocf_log debug "[$SQUID_NAME] Enter squid start" + start_squid + func_status=$? + ocf_log debug "[$SQUID_NAME] Leave squid start $func_status" + exit $func_status + ;; + stop) + ocf_log debug "[$SQUID_NAME] Enter squid stop" + stop_squid + func_status=$? + ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status" + exit $func_status + ;; + status) + status_squid + exit $? + ;; + monitor) + #ocf_log debug "[$SQUID_NAME] Enter squid monitor" + monitor_squid + func_status=$? + #ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status" + exit $func_status + ;; + validate-all) + validate_all_squid + exit $? + ;; + *) + usage + ;; +esac + +# vim: set sw=4 ts=4 : + diff --git a/heartbeat/Stateful b/heartbeat/Stateful new file mode 100755 index 0000000..72dd550 --- /dev/null +++ b/heartbeat/Stateful @@ -0,0 +1,192 @@ +#!/bin/sh +# +# +# Example of a stateful OCF Resource Agent. +# +# Copyright (c) 2006 Andrew Beekhof +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_state_default="${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state" + +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This is an example resource agent that implements two states + +Example stateful resource agent + + + + + +Location to store the resource state in + +State file + + + + + + + + + + + + + + + +END + exit $OCF_SUCCESS +} + +####################################################################### + +stateful_usage() { + cat < ${OCF_RESKEY_state} +} + +stateful_check_state() { + target=$1 + if [ -f ${OCF_RESKEY_state} ]; then + state=`cat ${OCF_RESKEY_state}` + if [ "x$target" = "x$state" ]; then + return $OCF_SUCCESS + fi + + else + if [ "x$target" = "x" ]; then + return $OCF_SUCCESS + fi + fi + + return $OCF_ERR_GENERIC +} + +stateful_start() { + stateful_check_state master + if [ $? = 0 ]; then + # CRM Error - Should never happen + return $OCF_RUNNING_MASTER + fi + stateful_update slave + ocf_promotion_score -v 5 + return $OCF_SUCCESS +} + +stateful_demote() { + stateful_check_state + if [ $? = 0 ]; then + # CRM Error - Should never happen + return $OCF_NOT_RUNNING + fi + stateful_update slave + ocf_promotion_score -v 5 + return $OCF_SUCCESS +} + +stateful_promote() { + stateful_check_state + if [ $? = 0 ]; then + return $OCF_NOT_RUNNING + fi + stateful_update master + ocf_promotion_score -v 10 + return $OCF_SUCCESS +} + +stateful_stop() { + ocf_promotion_score -D + stateful_check_state master + if [ $? = 0 ]; then + # CRM Error - Should never happen + return $OCF_RUNNING_MASTER + fi + if [ -f ${OCF_RESKEY_state} ]; then + rm ${OCF_RESKEY_state} + fi + return $OCF_SUCCESS +} + +stateful_monitor() { + stateful_check_state "master" + if [ $? = 0 ]; then + return $OCF_RUNNING_MASTER + fi + + stateful_check_state "slave" + if [ $? = 0 ]; then + return $OCF_SUCCESS + fi + + if [ -f ${OCF_RESKEY_state} ]; then + echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" + cat ${OCF_RESKEY_state} + return $OCF_ERR_GENERIC + fi + return $OCF_NOT_RUNNING +} + +stateful_validate() { + exit $OCF_SUCCESS +} + +case $__OCF_ACTION in +meta-data) meta_data;; +start) stateful_start;; +promote) stateful_promote;; +demote) stateful_demote;; +stop) stateful_stop;; +monitor) stateful_monitor;; +validate-all) stateful_validate;; +usage|help) stateful_usage $OCF_SUCCESS;; +*) stateful_usage $OCF_ERR_UNIMPLEMENTED;; +esac + +exit $? diff --git a/heartbeat/SysInfo.in b/heartbeat/SysInfo.in new file mode 100644 index 0000000..c57b7b6 --- /dev/null +++ b/heartbeat/SysInfo.in @@ -0,0 +1,372 @@ +#!@BASH_SHELL@ +# +# +# SysInfo OCF Resource Agent +# It records (in the CIB) various attributes of a node +# +# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_RSCTMP/SysInfo-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_delay_default="0s" +OCF_RESKEY_clone_default="0" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}} +: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This is a SysInfo Resource Agent. +It records (in the CIB) various attributes of a node +Sample Linux output: + arch: i686 + os: Linux-2.4.26-gentoo-r14 + free_swap: 1999 + cpu_info: Intel(R) Celeron(R) CPU 2.40GHz + cpu_speed: 4771.02 + cpu_cores: 1 + cpu_load: 0.00 + ram_total: 513 + ram_free: 117 + root_free: 2.4 + +Sample Darwin output: + arch: i386 + os: Darwin-8.6.2 + cpu_info: Intel Core Duo + cpu_speed: 2.16 + cpu_cores: 2 + cpu_load: 0.18 + ram_total: 2016 + ram_free: 787 + root_free: 13 + +Units: + free_swap: Mb + ram_*: Mb + root_free: Gb + cpu_speed (Linux): bogomips + cpu_speed (Darwin): Ghz + + +Records various node attributes in the CIB + + + + +PID file +PID file + + + + +Interval to allow values to stabilize +Dampening Delay + + + + + + + + + + + + +END +} + +####################################################################### + +UpdateStat() { + name=$1; shift + value="$*" + echo -e "$name:\t$value" + ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" +} + +SysInfoStats() { + + UpdateStat arch "`uname -m`" + UpdateStat os "`uname -s`-`uname -r`" + + case `uname -s` in + "Darwin") + mem=`top -l 1 | grep Mem: | awk '{print $10}'` + mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` + mem=`SysInfo_mem_units $mem` + mem_used=`SysInfo_mem_units $mem_used` + mem_total=`expr $mem_used + $mem` + cpu_type=`system_profiler SPHardwareDataType | grep "CPU Type:"` + cpu_type=${cpu_type/*: /} + cpu_speed=`system_profiler SPHardwareDataType | grep "CPU Speed:" | awk '{print $3}'` + cpu_cores=`system_profiler SPHardwareDataType | grep "Number Of"` + cpu_cores=${cpu_cores/*: /} + ;; + "Linux") + if [ -f /proc/cpuinfo ]; then + cpu_type=`grep "model name" /proc/cpuinfo | head -n 1` + cpu_type=${cpu_type/*: /} + cpu_speed=`grep "bogomips" /proc/cpuinfo | head -n 1` + cpu_speed=${cpu_speed/*: /} + cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` + fi + + if [ -f /proc/meminfo ]; then + # meminfo results are in kB + mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` + if [ ! -z $mem ]; then + UpdateStat free_swap `SysInfo_mem_units $mem` + fi + mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` + mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` + else + mem=`top -n 1 | grep Mem: | awk '{print $7}'` + fi + ;; + *) + esac + + if [ x != x"$cpu_type" ]; then + UpdateStat cpu_info "$cpu_type" + fi + + if [ x != x"$cpu_speed" ]; then + UpdateStat cpu_speed "$cpu_speed" + fi + + if [ x != x"$cpu_cores" ]; then + UpdateStat cpu_cores "$cpu_cores" + fi + + loads=`uptime` + load15=`echo ${loads} | awk '{print $10}'` + UpdateStat cpu_load $load15 + + if [ ! -z "$mem" ]; then + # Massage the memory values + UpdateStat ram_total `SysInfo_mem_units $mem_total` + UpdateStat ram_free `SysInfo_mem_units $mem` + fi + + # Portability notes: + # o df: -h flag not available on Solaris 8. (OK on 9, 10, ...) #FIXME# + # o tail: explicit "-n" not available in Solaris; instead simplify + # 'tail -n ' to the equivalent 'tail -'. + disk=`df -h / | tail -1 | awk '{print $4}'` + if [ x != x"$disk" ]; then + UpdateStat root_free `SysInfo_hdd_units $disk` + fi +} + +SysInfo_mem_units() { + mem=$1 + + if [ -z $1 ]; then + return + fi + + memlen=`expr ${#mem} - 1` + memlen_alt=`expr ${#mem} - 2` + if [ ${mem:$memlen:1} = "G" ]; then + mem="${mem:0:$memlen}" + if [ $mem != ${mem/./} ]; then + mem_before=${mem/.*/} + mem_after=${mem/*./} + mem=$[mem_before*1024] + if [ ${#mem_after} = 0 ]; then + : + elif [ ${#mem_after} = 1 ]; then + mem=$[mem+100*$mem_after] + elif [ ${#mem_after} = 2 ]; then + mem=$[mem+10*$mem_after] + elif [ ${#mem_after} = 3 ]; then + mem=$[mem+$mem_after] + else + mem_after=${mem_after:0:3} + mem=$[mem+$mem_after] + fi + fi + elif [ ${mem:$memlen:1} = "M" ]; then + mem=${mem/.*/} + mem="${mem:0:$memlen}" + elif [ ${mem:$memlen:1} = "k" ]; then + mem="${mem:0:$memlen}" + mem=${mem/.*/} + mem=`expr $mem / 1024` + elif [ ${mem:$memlen_alt:2} = "kB" ]; then + mem="${mem:0:$memlen_alt}" + mem=${mem/.*/} + mem=`expr $mem / 1024` + elif [ ${mem:$memlen_alt:2} = "Mb" ]; then + mem="${mem:0:$memlen_alt}" + mem=${mem/.*/} + elif [ ${mem:$memlen_alt:2} = "MB" ]; then + mem="${mem:0:$memlen_alt}" + mem=${mem/.*/} + fi + + # Round to the next multiple of 50 + memlen=`expr ${#mem} - 2` + mem_round="${mem:$memlen:2}" + if [ x$mem_round = x ]; then + : + elif [ $mem_round = "00" ]; then + : + else + mem_round=`echo $mem_round | sed 's/^0//'` + if [ $mem_round -lt "50" ]; then + mem=$[mem+50] + mem=$[mem-$mem_round] + + else + mem=$[mem+100] + mem=$[mem-$mem_round] + fi + fi + echo $mem +} + +SysInfo_hdd_units() { + disk=$1 + disklen=`expr ${#disk} - 1` + disklen_alt=`expr ${#disk} - 2` + if [ ${disk:$disklen:1} = "G" ]; then + disk="${disk:0:$disklen}" + elif [ ${disk:$disklen:1} = "M" ]; then + disk="${disk:0:$disklen}" + disk=${disk/.*/} + disk=`expr $disk / 1024` + elif [ ${disk:$disklen:1} = "k" ]; then + disk="${disk:0:$disklen}" + disk=${disk/.*/} + disk=`expr $disk / 1048576` + elif [ ${disk:$disklen_alt:2} = "kB" ]; then + disk="${disk:0:$disklen_alt}" + disk=${disk/.*/} + disk=`expr $disk / 1048576` + elif [ ${disk:$disklen_alt:2} = "Mb" ]; then + disk="${disk:0:$disklen_alt}" + disk=${disk/.*/} + disk=`expr $disk / 1024` + elif [ ${disk:$disklen_alt:2} = "MB" ]; then + disk="${disk:0:$disklen_alt}" + disk=${disk/.*/} + disk=`expr $disk / 1024` + fi + echo $disk +} + +SysInfo_usage() { + cat < $OCF_RESKEY_pidfile + SysInfoStats + exit $OCF_SUCCESS +} + +SysInfo_stop() { + rm $OCF_RESKEY_pidfile + exit $OCF_SUCCESS +} + +SysInfo_monitor() { + if [ -f $OCF_RESKEY_pidfile ]; then + clone=`cat $OCF_RESKEY_pidfile` + fi + + if [ x$clone = x ]; then + rm $OCF_RESKEY_pidfile + exit $OCF_NOT_RUNNING + + elif [ $clone = $OCF_RESKEY_clone ]; then + SysInfoStats + exit $OCF_SUCCESS + + elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue ] || + [ x$OCF_RESKEY_CRM_meta_globally_unique = xTrue ] || + [ x$OCF_RESKEY_CRM_meta_globally_unique = xyes ] || + [ x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then + SysInfoStats + exit $OCF_SUCCESS + fi + exit $OCF_NOT_RUNNING +} + +SysInfo_validate() { + return $OCF_SUCCESS +} + +if [ $# -ne 1 ]; then + SysInfo_usage + exit $OCF_ERR_ARGS +fi + +if [ x != x${OCF_RESKEY_delay} ]; then + OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" +fi + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) SysInfo_start + ;; +stop) SysInfo_stop + ;; +monitor) SysInfo_monitor + ;; +validate-all) SysInfo_validate + ;; +usage|help) SysInfo_usage + exit $OCF_SUCCESS + ;; +*) SysInfo_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac + +exit $? diff --git a/heartbeat/VIPArip b/heartbeat/VIPArip new file mode 100755 index 0000000..688237d --- /dev/null +++ b/heartbeat/VIPArip @@ -0,0 +1,314 @@ +#!/bin/sh +# +# License: GNU General Public License (GPL) +# Support: users@clusterlabs.org +# Author: Huang Zhen +# Copyright (c) 2006 International Business Machines +# +# Virtual IP Address by RIP2 protocol. +# This script manages IP alias in different subnet with quagga/ripd. +# It can add an IP alias, or remove one. +# +# The quagga package should be installed to run this RA +# +# usage: $0 {start|stop|status|monitor|validate-all|meta-data} +# +# The "start" arg adds an IP alias. +# Surprisingly, the "stop" arg removes one. :-) +# +# OCF parameters are as below +# OCF_RESKEY_ip The IP address in different subnet +# OCF_RESKEY_nic The nic for broadcast the route information +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf +ZEBRA=/usr/sbin/zebra +RIPD=/usr/sbin/ripd +USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="eth0" +OCF_RESKEY_zebra_binary_default="${ZEBRA}" +OCF_RESKEY_ripd_binary_default="${RIPD}" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_zebra_binary=${OCF_RESKEY_zebra_binary_default}} +: ${OCF_RESKEY_ripd_binary=${OCF_RESKEY_ripd_binary_default}} + +####################################################################### + +meta_data() { +cat < + + +1.0 + + +Virtual IP Address by RIP2 protocol. +This script manages IP alias in different subnet with quagga/ripd. +It can add an IP alias, or remove one. + +Manages a virtual IP address through RIP2 + + + + + +The IPv4 address in different subnet, for example "192.168.1.1". + +The IP address in different subnet + + + + + +The nic for broadcast the route information. +The ripd uses this nic to broadcast the route information to others + +The nic for broadcast the route information + + + + + +Absolute path to the zebra binary. + +zebra binary + + + + + +Absolute path to the ripd binary. + +ripd binary + + + + + + + + + + + + + +END +exit $OCF_SUCCESS +} + +usage() { + echo $USAGE >&2 +} + +new_config_file() { + echo new_config_file $1 $2 $3 + cat >$RIPDCONF < $RIPDCONF.tmp + cp $RIPDCONF.tmp $RIPDCONF +} + +add_ip() { + echo add_ip $1 + sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp + cp $RIPDCONF.tmp $RIPDCONF +} + +del_ip() { + echo del_ip $1 + sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp + cp $RIPDCONF.tmp $RIPDCONF + if $GREP "access-list private permit" $RIPDCONF>/dev/null + then + echo some other IP is running + reload_config + else + stop_quagga + echo remove $RIPDCONF + rm $RIPDCONF + fi + +} + +add_nic() { + echo add_nic $1 + if $GREP "network $1" $RIPDCONF >/dev/null + then + echo the nic is already in the config file + else + sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp + cp $RIPDCONF.tmp $RIPDCONF + fi +} + +reload_config() { + echo reload_config + echo $RIPDCONF: + cat $RIPDCONF + echo killall -SIGHUP ripd + killall -SIGHUP ripd +} + +start_quagga() { + echo start_quagga + echo $RIPDCONF: + cat $RIPDCONF + echo $ZEBRA -d + $ZEBRA -d + echo $RIPD -d -f $RIPDCONF + $RIPD -d -f $RIPDCONF +} + +stop_quagga() { + echo stop_quagga + echo $RIPDCONF: + cat $RIPDCONF + echo killall -SIGTERM ripd + killall -SIGTERM ripd + echo killall -SIGTERM zebra + killall -SIGTERM zebra +} + +start_rip_ip() { + echo start_rip_ip + check_params + + if [ x"$OCF_RESKEY_nic" = x ] + then + echo OCF_RESKEY_nic is null, set to ${OCF_RESKEY_nic_default} + OCF_RESKEY_nic="${OCF_RESKEY_nic_default}" + fi + + status_rip_ip + case $? in + $OCF_SUCCESS) + ocf_log info "already running" + exit $OCF_SUCCESS + ;; + $OCF_NOT_RUNNING) + ;; + *) + ocf_log info "state undefined, stopping first" + stop_rip_ip + ;; + esac + + $IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo + if [ -f "$RIPDCONF" ] + then + # there is a config file, add new data(IP,nic,metric) + # to the existing config file. + add_ip $OCF_RESKEY_ip + add_nic $OCF_RESKEY_nic + set_metric 1 + reload_config + echo sleep 3 + sleep 3 + set_metric 3 + reload_config + else + new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1 + start_quagga + echo sleep 3 + sleep 3 + set_metric 3 + reload_config + fi + return $OCF_SUCCESS +} + +stop_rip_ip() { + echo stop_rip_ip + check_params + status_rip_ip + if [ $? = $OCF_NOT_RUNNING ] + then + exit $OCF_SUCCESS + fi + $IP2UTIL addr del $OCF_RESKEY_ip dev lo + echo sleep 2 + sleep 2 + del_ip $OCF_RESKEY_ip + return $OCF_SUCCESS +} + +status_rip_ip() { + check_params + if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null + then + if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null + then + if pidof ripd >/dev/null + then + return $OCF_SUCCESS + fi + fi + return $OCF_ERR_GENERIC + fi + return $OCF_NOT_RUNNING +} + +if + [ $# -ne 1 ] +then + usage + exit $OCF_ERR_ARGS +fi + +[ x != x"$OCF_RESKEY_zebra_binary" ] && + ZEBRA=$OCF_RESKEY_zebra_binary +[ x != x"$OCF_RESKEY_ripd_binary" ] && + RIPD=$OCF_RESKEY_ripd_binary + +case $1 in + start) start_rip_ip;; + stop) stop_rip_ip;; + status) status_rip_ip;; + monitor) status_rip_ip;; + validate-all) check_binary $IP2UTIL + exit $OCF_SUCCESS;; + meta-data) meta_data;; + usage) usage; exit $OCF_SUCCESS;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain new file mode 100755 index 0000000..3905695 --- /dev/null +++ b/heartbeat/VirtualDomain @@ -0,0 +1,1158 @@ +#!/bin/sh +# +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# +# Resource Agent for domains managed by the libvirt API. +# Requires a running libvirt daemon (libvirtd). +# +# (c) 2008-2010 Florian Haas, Dejan Muhamedagic, +# and Linux-HA contributors +# +# usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} +# +####################################################################### +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Defaults +OCF_RESKEY_config_default="" +OCF_RESKEY_migration_transport_default="" +OCF_RESKEY_migration_downtime_default=0 +OCF_RESKEY_migration_speed_default=0 +OCF_RESKEY_migration_network_suffix_default="" +OCF_RESKEY_force_stop_default=0 +OCF_RESKEY_monitor_scripts_default="" +OCF_RESKEY_autoset_utilization_cpu_default="true" +OCF_RESKEY_autoset_utilization_host_memory_default="true" +OCF_RESKEY_autoset_utilization_hv_memory_default="true" +OCF_RESKEY_unset_utilization_cpu_default="false" +OCF_RESKEY_unset_utilization_host_memory_default="false" +OCF_RESKEY_unset_utilization_hv_memory_default="false" +OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) +OCF_RESKEY_CRM_meta_timeout_default=90000 +OCF_RESKEY_save_config_on_stop_default=false +OCF_RESKEY_sync_config_on_stop_default=false +OCF_RESKEY_snapshot_default="" +OCF_RESKEY_backingfile_default="" +OCF_RESKEY_stateless_default="false" +OCF_RESKEY_copyindirs_default="" +OCF_RESKEY_shutdown_mode_default="" +OCF_RESKEY_start_resources_default="false" + +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_migration_transport=${OCF_RESKEY_migration_transport_default}} +: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}} +: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}} +: ${OCF_RESKEY_migration_network_suffix=${OCF_RESKEY_migration_network_suffix_default}} +: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} +: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} +: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} +: ${OCF_RESKEY_autoset_utilization_host_memory=${OCF_RESKEY_autoset_utilization_host_memory_default}} +: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} +: ${OCF_RESKEY_unset_utilization_cpu=${OCF_RESKEY_unset_utilization_cpu_default}} +: ${OCF_RESKEY_unset_utilization_host_memory=${OCF_RESKEY_unset_utilization_host_memory_default}} +: ${OCF_RESKEY_unset_utilization_hv_memory=${OCF_RESKEY_unset_utilization_hv_memory_default}} +: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} +: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} +: ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} +: ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}} +: ${OCF_RESKEY_snapshot=${OCF_RESKEY_snapshot_default}} +: ${OCF_RESKEY_backingfile=${OCF_RESKEY_backingfile_default}} +: ${OCF_RESKEY_stateless=${OCF_RESKEY_stateless_default}} +: ${OCF_RESKEY_copyindirs=${OCF_RESKEY_copyindirs_default}} +: ${OCF_RESKEY_shutdown_mode=${OCF_RESKEY_shutdown_mode_default}} +: ${OCF_RESKEY_start_resources=${OCF_RESKEY_start_resources_default}} + +if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then + OCF_RESKEY_save_config_on_stop="true" +fi +####################################################################### + +## I'd very much suggest to make this RA use bash, +## and then use magic $SECONDS. +## But for now: +NOW=$(date +%s) + +usage() { + echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" +} + +VirtualDomain_meta_data() { + cat < + + +1.0 + + +Resource agent for a virtual domain (a.k.a. domU, virtual machine, +virtual environment etc., depending on context) managed by libvirtd. + +Manages virtual domains through the libvirt virtualization framework + + + + + +Absolute path to the libvirt configuration file, +for this virtual domain. + +Virtual domain configuration file + + + + + +Hypervisor URI to connect to. See the libvirt documentation for +details on supported URI formats. The default is system dependent. +Determine the system's default uri by running 'virsh --quiet uri'. + +Hypervisor URI + + + + + +Always forcefully shut down ("destroy") the domain on stop. The default +behavior is to resort to a forceful shutdown only after a graceful +shutdown attempt has failed. You should only set this to true if +your virtual domain (or your virtualization backend) does not support +graceful shutdown. + +Always force shutdown on stop + + + + + +Transport used to connect to the remote hypervisor while +migrating. Please refer to the libvirt documentation for details on +transports available. If this parameter is omitted, the resource will +use libvirt's default transport to connect to the remote hypervisor. + +Remote hypervisor transport + + + + + +The username will be used in the remote libvirt remoteuri/migrateuri. No user will be +given (which means root) in the username if omitted + +If remoteuri is set, migration_user will be ignored. + +Remote username for the remoteuri + + + + + +Define max downtime during live migration in milliseconds + +Live migration downtime + + + + + +Define live migration speed per resource in MiB/s + +Live migration speed + + + + + +Use a dedicated migration network. The migration URI is composed by +adding this parameters value to the end of the node name. If the node +name happens to be an FQDN (as opposed to an unqualified host name), +insert the suffix immediately prior to the first period (.) in the FQDN. +At the moment Qemu/KVM and Xen migration via a dedicated network is supported. + +Note: Be sure this composed host name is locally resolvable and the +associated IP is reachable through the favored network. This suffix will +be added to the remoteuri and migrateuri parameters. + +See also the migrate_options parameter below. + +Migration network host name suffix + + + + + +You can also specify here if the calculated migrate URI is unsuitable for your +environment. + +If migrateuri is set then migration_network_suffix, migrateport and +--migrateuri in migrate_options are effectively ignored. Use "%n" as the +placeholder for the target node name. + +Please refer to the libvirt documentation for details on guest +migration. + +Custom migrateuri for migration state transfer + + + + + +Extra virsh options for the guest live migration. You can also specify +here --migrateuri if the calculated migrate URI is unsuitable for your +environment. If --migrateuri is set then migration_network_suffix +and migrateport are effectively ignored. Use "%n" as the placeholder +for the target node name. + +Please refer to the libvirt documentation for details on guest +migration. + +live migrate options + + + + + +To additionally monitor services within the virtual domain, add this +parameter with a list of scripts to monitor. + +Note: when monitor scripts are used, the start and migrate_from operations +will complete only when all monitor scripts have completed successfully. +Be sure to set the timeout of these operations to accommodate this delay. + +space-separated list of monitor scripts + + + + + +If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it +into the CPU utilization of the resource when the monitor is executed. + +Enable auto-setting the CPU utilization of the resource + + + + + +If set true, the agent will detect the number of *Max memory* from virsh, and put it +into the host_memory utilization of the resource when the monitor is executed. + +Enable auto-setting the host_memory utilization of the resource + + + + + +If set true, the agent will detect the number of *Max memory* from virsh, and put it +into the hv_memory utilization of the resource when the monitor is executed. + +Enable auto-setting the hv_memory utilization of the resource + + + + + +If set true then the agent will remove the cpu utilization resource when the monitor +is executed. + +Enable auto-removing the CPU utilization of the resource + + + + + +If set true then the agent will remove the host_memory utilization resource when the monitor +is executed. + +Enable auto-removing the host_memory utilization of the resource + + + + + +If set true then the agent will remove the hv_memory utilization resource when the monitor +is executed. + +Enable auto-removing the hv_memory utilization of the resource + + + + + +This port will be used in the qemu migrateuri. If unset, the port will be a random highport. + +Port for migrateuri + + + + + +Use this URI as virsh connection URI to commuicate with a remote hypervisor. + +If remoteuri is set then migration_user and migration_network_suffix are +effectively ignored. Use "%n" as the placeholder for the target node name. + +Please refer to the libvirt documentation for details on guest +migration. + +Custom remoteuri to communicate with a remote hypervisor + + + + + +Changes to a running VM's config are normally lost on stop. +This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. + +Save running VM's config back to its config file + + + + + +Setting this automatically enables save_config_on_stop. +When enabled this parameter instructs the RA to +call csync2 -x to synchronize the file to all nodes. +csync2 must be properly set up for this to work. + +Save running VM's config back to its config file + + + + + +Path to the snapshot directory where the virtual machine image will be stored. When this +parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot +directory when stopped. If on start a state file is present for the domain, the domain +will be restored to the same state it was in right before it stopped last. This option +is incompatible with the 'force_stop' option. + + +Restore state on start/stop + + + + + + +When the VM is used in Copy-On-Write mode, this is the backing file to use (with its full path). +The VMs image will be created based on this backing file. +This backing file will never be changed during the life of the VM. + +If the VM is wanted to work with Copy-On-Write mode, this is the backing file to use (with its full path) + + + + + +If set to true and backingfile is defined, the start of the VM will systematically create a new qcow2 based on +the backing file, therefore the VM will always be stateless. If set to false, the start of the VM will use the +COW (<vmname>.qcow2) file if it exists, otherwise the first start will create a new qcow2 based on the backing +file given as backingfile. + +If set to true, the (<vmname>.qcow2) file will be re-created at each start, based on the backing file (if defined) + + + + + +List of directories for the virt-copy-in before booting the VM. Used only in stateless mode. + +List of directories for the virt-copy-in before booting the VM stateless mode. + + + + + +virsh shutdown method to use. Please verify that it is supported by your virsh toolsed with 'virsh help shutdown' +When this parameter is set --mode shutdown_mode is passed as an additional argument to the 'virsh shutdown' command. +One can use this option in case default acpi method does not work. Verify that this mode is supported +by your VM. By default --mode is not passed. + + +Instruct virsh to use specific shutdown mode + + + + + + +Start the virtual storage pools and networks used by the virtual machine before starting it or before live migrating it. + + +Ensure the needed virtual storage pools and networks are started + + + + + + + + + + + + + + + + + +EOF +} + +set_util_attr() { + local attr=$1 val=$2 + local cval outp + + cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) + if [ $? -ne 0 ] && [ -z "$cval" ]; then + crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 + if [ $? -eq 0 ]; then + ocf_log debug "Unable to set utilization attribute, cib is not available" + return + fi + fi + + if [ "$cval" != "$val" ]; then + outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || + ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" + fi +} + +unset_util_attr() { + local attr=$1 + local cval outp + + outp=$(crm_resource --resource=$OCF_RESOURCE_INSTANCE --utilization --delete-parameter=$attr 2>&1) || + ocf_log warn "crm_resource failed to unset utilization attribute $attr: $outp" +} + +update_utilization() { + local dom_cpu dom_mem + + if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then + dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') + test -n "$dom_cpu" && set_util_attr cpu $dom_cpu + elif ocf_is_true "$OCF_RESKEY_unset_utilization_cpu"; then + unset_util_attr cpu + fi + + if ocf_is_true "$OCF_RESKEY_autoset_utilization_host_memory"; then + dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') + test -n "$dom_mem" && set_util_attr host_memory "$dom_mem" + elif ocf_is_true "$OCF_RESKEY_unset_utilization_host_memory"; then + unset_util_attr host_memory + fi + + if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then + dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') + test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" + elif ocf_is_true "$OCF_RESKEY_unset_utilization_hv_memory"; then + unset_util_attr hv_memory + fi +} + +get_emulator() +{ + local emulator="" + + emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') + if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then + emulator=$(cat $EMULATOR_STATE) + fi + if [ -z "$emulator" ]; then + emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') + fi + + if [ -n "$emulator" ]; then + basename $emulator + fi +} + +update_emulator_cache() +{ + local emulator + + emulator=$(get_emulator) + if [ -n "$emulator" ]; then + echo $emulator > $EMULATOR_STATE + fi +} + +# attempt to check domain status outside of libvirt using the emulator process +pid_status() +{ + local rc=$OCF_ERR_GENERIC + local emulator=$(get_emulator) + # An emulator is not required, so only report message in debug mode + local loglevel="debug" + + if ocf_is_probe; then + loglevel="notice" + fi + + case "$emulator" in + qemu-kvm|qemu-dm|qemu-system-*) + rc=$OCF_NOT_RUNNING + ps awx | grep -E "[q]emu-(kvm|dm|system).*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1 + if [ $? -eq 0 ]; then + rc=$OCF_SUCCESS + fi + ;; + libvirt_lxc) + rc=$OCF_NOT_RUNNING + ps awx | grep -E "[l]ibvirt_lxc.*-name ($DOMAIN_NAME|[^ ]*guest=$DOMAIN_NAME(,[^ ]*)?) " > /dev/null 2>&1 + if [ $? -eq 0 ]; then + rc=$OCF_SUCCESS + fi + ;; + # This can be expanded to check for additional emulators + *) + # We may be running xen with PV domains, they don't + # have an emulator set. try xl list or xen-lists + if have_binary xl; then + rc=$OCF_NOT_RUNNING + xl list $DOMAIN_NAME >/dev/null 2>&1 + if [ $? -eq 0 ]; then + rc=$OCF_SUCCESS + fi + elif have_binary xen-list; then + rc=$OCF_NOT_RUNNING + xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null + if [ $? -eq 0 ]; then + rc=$OCF_SUCCESS + fi + else + ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" + fi + ;; + esac + + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." + elif [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." + fi + + return $rc +} + +VirtualDomain_status() { + local try=0 + rc=$OCF_ERR_GENERIC + status="no state" + while [ "$status" = "no state" ]; do + try=$(($try + 1 )) + status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') + case "$status" in + *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off") + # shut off: domain is defined, but not started, will not happen if + # domain is created but not defined + # "Domain not found" or "failed to get domain": domain is not defined + # and thus not started + ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" + rc=$OCF_NOT_RUNNING + ;; + running|paused|idle|blocked|"in shutdown") + # running: domain is currently actively consuming cycles + # paused: domain is paused (suspended) + # idle: domain is running but idle + # blocked: synonym for idle used by legacy Xen versions + # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. + ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." + rc=$OCF_SUCCESS + ;; + ""|*"failed to "*"connect to the hypervisor"*|"no state") + # Empty string may be returned when virsh does not + # receive a reply from libvirtd. + # "no state" may occur when the domain is currently + # being migrated (on the migration target only), or + # whenever virsh can't reliably obtain the domain + # state. + status="no state" + if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then + # During the stop operation, we want to bail out + # quickly, so as to be able to force-stop (destroy) + # the domain if necessary. + ocf_exit_reason "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." + return $OCF_ERR_GENERIC; + elif [ "$__OCF_ACTION" = "monitor" ]; then + pid_status + rc=$? + if [ $rc -ne $OCF_ERR_GENERIC ]; then + # we've successfully determined the domains status outside of libvirt + return $rc + fi + + else + # During all other actions, we just wait and try + # again, relying on the CRM/LRM to time us out if + # this takes too long. + ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." + fi + sleep 1 + ;; + *) + # any other output is unexpected. + ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" + sleep 1 + ;; + esac + done + return $rc +} + +# virsh undefine removes configuration files if they are in +# directories which are managed by libvirt. such directories +# include also subdirectories of /etc (for instance +# /etc/libvirt/*) which may be surprising. VirtualDomain didn't +# include the undefine call before, hence this wasn't an issue +# before. +# +# There seems to be no way to find out which directories are +# managed by libvirt. +# +verify_undefined() { + local tmpf + if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" + then + tmpf=$(mktemp -t vmcfgsave.XXXXXX) + if [ ! -r "$tmpf" ]; then + ocf_log warn "unable to create temp file, disk full?" + # we must undefine the domain + virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 + else + cp -p $OCF_RESKEY_config $tmpf + virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 + [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config + rm -f $tmpf + fi + fi +} + +start_resources() { + local virsh_opts="--connect=$1 --quiet" + local pool_state net_state + for pool in `sed -n "s/^.*pool=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do + pool_state=`LANG=C virsh ${virsh_opts} pool-info ${pool} | sed -n 's/^State: \+\(.*\)$/\1/gp'` + if [ "$pool_state" != "running" ]; then + virsh ${virsh_opts} pool-start $pool + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to start required virtual storage pool ${pool}." + return $OCF_ERR_GENERIC + fi + else + virsh ${virsh_opts} pool-refresh $pool + fi + done + + for net in `sed -n "s/^.*network=['\"]\([^'\"]\+\)['\"].*\$/\1/gp" ${OCF_RESKEY_config} | sort | uniq`; do + net_state=`LANG=C virsh ${virsh_opts} net-info ${net} | sed -n 's/^Active: \+\(.*\)$/\1/gp'` + if [ "$net_state" != "yes" ]; then + virsh ${virsh_opts} net-start $net + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to start required virtual network ${net}." + return $OCF_ERR_GENERIC + fi + fi + done + + return $OCF_SUCCESS +} + +VirtualDomain_start() { + local snapshotimage + + if VirtualDomain_status; then + ocf_log info "Virtual domain $DOMAIN_NAME already running." + return $OCF_SUCCESS + fi + + # systemd drop-in to stop domain before libvirtd terminates services + # during shutdown/reboot + if systemd_is_running ; then + systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" + systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" + systemctl start virt-guest-shutdown.target + fi + + snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" + if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then + virsh restore $snapshotimage + if [ $? -eq 0 ]; then + rm -f $snapshotimage + return $OCF_SUCCESS + fi + ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." + return $OCF_ERR_GENERIC + fi + + # Make sure domain is undefined before creating. + # The 'create' command guarantees that the domain will be + # undefined on shutdown, but requires the domain to be undefined. + # if a user defines the domain + # outside of this agent, we have to ensure that the domain + # is restored to an 'undefined' state before creating. + verify_undefined + + if ocf_is_true "${OCF_RESKEY_start_resources}"; then + start_resources ${OCF_RESKEY_hypervisor} + rc=$? + if [ $rc -eq $OCF_ERR_GENERIC ]; then + return $rc + fi + fi + + if [ -z "${OCF_RESKEY_backingfile}" ]; then + virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." + return $OCF_ERR_GENERIC + fi + else + if ocf_is_true "${OCF_RESKEY_stateless}" || [ ! -s "${OCF_RESKEY_config%%.*}.qcow2" ]; then + # Create the Stateless image + dirconfig=`dirname ${OCF_RESKEY_config}` + qemu-img create -f qcow2 -b ${OCF_RESKEY_backingfile} ${OCF_RESKEY_config%%.*}.qcow2 + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed qemu-img create ${DOMAIN_NAME} with backing file ${OCF_RESKEY_backingfile}." + return $OCF_ERR_GENERIC + fi + + virsh define ${OCF_RESKEY_config} + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." + return $OCF_ERR_GENERIC + fi + + if [ -n "${OCF_RESKEY_copyindirs}" ]; then + # Inject copyindirs directories and files + virt-copy-in -d ${DOMAIN_NAME} ${OCF_RESKEY_copyindirs} / + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed on virt-copy-in command ${DOMAIN_NAME}." + return $OCF_ERR_GENERIC + fi + fi + else + virsh define ${OCF_RESKEY_config} + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." + return $OCF_ERR_GENERIC + fi + fi + + virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." + return $OCF_ERR_GENERIC + fi + fi + + while ! VirtualDomain_monitor; do + sleep 1 + done + + return $OCF_SUCCESS +} + +force_stop() +{ + local out ex translate + local status=0 + + ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." + out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) + ex=$? + translate=$(echo $out|tr 'A-Z' 'a-z') + echo >&2 "$translate" + case $ex$translate in + *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\ + *"error:"*"failed to get domain"*) + : ;; # unexpected path to the intended outcome, all is well + [!0]*) + ocf_exit_reason "forced stop failed" + return $OCF_ERR_GENERIC ;; + 0*) + while [ $status != $OCF_NOT_RUNNING ]; do + VirtualDomain_status + status=$? + done ;; + esac + return $OCF_SUCCESS +} + +sync_config(){ + ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}" + if ! csync2 -x ${OCF_RESKEY_config}; then + ocf_log warn "Syncing ${OCF_RESKEY_config} failed."; + fi +} + +save_config(){ + CFGTMP=$(mktemp -t vmcfgsave.XXX) + virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP} + if [ -s ${CFGTMP} ]; then + if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then + if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then + ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on." + if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then + ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." + if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then + sync_config + fi + else + ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." + fi + else + ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." + fi + fi + else + ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." + fi + rm -f ${CFGTMP} +} + +VirtualDomain_stop() { + local i + local status + local shutdown_timeout + local needshutdown=1 + + VirtualDomain_status + status=$? + + case $status in + $OCF_SUCCESS) + if ocf_is_true $OCF_RESKEY_force_stop; then + # if force stop, don't bother attempting graceful shutdown. + force_stop + return $? + fi + + ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." + + if [ -n "$OCF_RESKEY_snapshot" ]; then + virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" + if [ $? -eq 0 ]; then + needshutdown=0 + else + ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" + fi + fi + + # save config if needed + if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then + save_config + fi + + # issue the shutdown if save state didn't shutdown for us + if [ $needshutdown -eq 1 ]; then + # Issue a graceful shutdown request + if [ -n "${OCF_RESKEY_CRM_shutdown_mode}" ]; then + shutdown_opts="--mode ${OCF_RESKEY_CRM_shutdown_mode}" + fi + virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} $shutdown_opts + fi + + # The "shutdown_timeout" we use here is the operation + # timeout specified in the CIB, minus 5 seconds + shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) + # Loop on status until we reach $shutdown_timeout + while [ $NOW -lt $shutdown_timeout ]; do + VirtualDomain_status + status=$? + case $status in + $OCF_NOT_RUNNING) + # This was a graceful shutdown. + return $OCF_SUCCESS + ;; + $OCF_SUCCESS) + # Domain is still running, keep + # waiting (until shutdown_timeout + # expires) + sleep 1 + ;; + *) + # Something went wrong. Bail out and + # resort to forced stop (destroy). + break; + esac + NOW=$(date +%s) + done + ;; + $OCF_NOT_RUNNING) + ocf_log info "Domain $DOMAIN_NAME already stopped." + return $OCF_SUCCESS + esac + + # OK. Now if the above graceful shutdown hasn't worked, kill + # off the domain with destroy. If that too does not work, + # have the LRM time us out. + force_stop +} + +mk_migrateuri() { + local target_node + local migrate_target + local hypervisor + + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + + # A typical migration URI via a special migration network looks + # like "tcp://bar-mig:49152". The port would be randomly chosen + # by libvirt from the range 49152-49215 if omitted, at least since + # version 0.7.4 ... + if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then + hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" + # Hostname might be a FQDN + migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") + case $hypervisor in + qemu) + # For quiet ancient libvirt versions a migration port is needed + # and the URI must not contain the "//". Newer versions can handle + # the "bad" URI. + echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}" + ;; + xen) + echo "${migrate_target}" + ;; + *) + ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." + ;; + esac + fi +} + +VirtualDomain_migrate_to() { + local rc + local target_node + local remoteuri + local transport_suffix + local migrateuri + local migrate_opts + local migrate_pid + + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + + if VirtualDomain_status; then + # Find out the remote hypervisor to connect to. That is, turn + # something like "qemu://foo:9999/system" into + # "qemu+tcp://bar:9999/system" + + if [ -n "${OCF_RESKEY_remoteuri}" ]; then + remoteuri=`echo "${OCF_RESKEY_remoteuri}" | + sed "s/%n/$target_node/g"` + else + if [ -n "${OCF_RESKEY_migration_transport}" ]; then + transport_suffix="+${OCF_RESKEY_migration_transport}" + fi + + # append user defined suffix if virsh target should differ from cluster node name + if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then + # Hostname might be a FQDN + target_node=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") + fi + + # a remote user has been defined to connect to target_node + if echo ${OCF_RESKEY_migration_user} | grep -q "^[a-z][-a-z0-9]*$" ; then + target_node="${OCF_RESKEY_migration_user}@${target_node}" + fi + + # Scared of that sed expression? So am I. :-) + remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") + fi + + # User defined migrateuri or do we make one? + migrate_opts="$OCF_RESKEY_migrate_options" + + # migration_uri is directly set + if [ -n "${OCF_RESKEY_migrateuri}" ]; then + migrateuri=`echo "${OCF_RESKEY_migrateuri}" | + sed "s/%n/$target_node/g"` + + # extract migrationuri from options + elif echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then + migrateuri=`echo "$migrate_opts" | + sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"` + + # auto generate + else + migrateuri=`mk_migrateuri` + fi + + # remove --migrateuri from migration_opts + migrate_opts=`echo "$migrate_opts" | + sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"` + + + # save config if needed + if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then + save_config + fi + + if ocf_is_true "${OCF_RESKEY_start_resources}"; then + start_resources $remoteuri + rc=$? + if [ $rc -eq $OCF_ERR_GENERIC ]; then + return $rc + fi + fi + + # Live migration speed limit + if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then + ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})." + virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed} + fi + + # OK, we know where to connect to. Now do the actual migration. + ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." + virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri & + + migrate_pid=${!} + + # Live migration downtime interval + # Note: You can set downtime only while live migration is in progress + if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then + sleep 2 + ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})." + virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime} + fi + + wait ${migrate_pid} + + rc=$? + if [ $rc -ne 0 ]; then + ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" + return $OCF_ERR_GENERIC + else + ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." + return $OCF_SUCCESS + fi + else + ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" + return $OCF_ERR_GENERIC + fi +} + +VirtualDomain_migrate_from() { + # systemd drop-in to stop domain before libvirtd terminates services + # during shutdown/reboot + if systemd_is_running ; then + systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" + systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" + systemctl start virt-guest-shutdown.target + fi + + while ! VirtualDomain_monitor; do + sleep 1 + done + ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." + # save config if needed + if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then + save_config + fi + return $OCF_SUCCESS +} + +VirtualDomain_monitor() { + # First, check the domain status. If that returns anything other + # than $OCF_SUCCESS, something is definitely wrong. + VirtualDomain_status + rc=$? + if [ ${rc} -eq ${OCF_SUCCESS} ]; then + # OK, the generic status check turned out fine. Now, if we + # have monitor scripts defined, run them one after another. + for script in ${OCF_RESKEY_monitor_scripts}; do + script_output="$($script 2>&1)" + script_rc=$? + if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then + # A monitor script returned a non-success exit + # code. Stop iterating over the list of scripts, log a + # warning message, and propagate $OCF_ERR_GENERIC. + ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" + rc=$OCF_ERR_GENERIC + break + else + ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" + fi + done + fi + + update_emulator_cache + update_utilization + # Save configuration on monitor as well, so we will have a better chance of + # having fresh and up to date config files on all nodes. + if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then + save_config + fi + + return ${rc} +} + +VirtualDomain_validate_all() { + if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then + ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." + return $OCF_ERR_CONFIGURED + fi + + # check if we can read the config file (otherwise we're unable to + # deduce $DOMAIN_NAME from it, see below) + if [ ! -r $OCF_RESKEY_config ]; then + if ocf_is_probe; then + ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." + elif [ "$__OCF_ACTION" = "stop" ]; then + ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." + else + ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable." + fi + return $OCF_ERR_INSTALLED + fi + + if [ -z $DOMAIN_NAME ]; then + ocf_exit_reason "Unable to determine domain name." + return $OCF_ERR_INSTALLED + fi + + # Check if csync2 is available when config tells us we might need it. + if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then + check_binary csync2 + fi + + # Check if migration_speed is a decimal value + if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then + ocf_exit_reason "migration_speed has to be a decimal value" + return $OCF_ERR_CONFIGURED + fi + + # Check if migration_downtime is a decimal value + if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then + ocf_exit_reason "migration_downtime has to be a decimal value" + return $OCF_ERR_CONFIGURED + fi + + if ocf_is_true "${OCF_RESKEY_stateless}" && [ -z "${OCF_RESKEY_backingfile}" ]; then + ocf_exit_reason "Stateless functionality can't be achieved without a backing file." + return $OCF_ERR_CONFIGURED + fi +} + +VirtualDomain_getconfig() { + # Grab the virsh uri default, but only if hypervisor isn't set + : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} + + # Set options to be passed to virsh: + VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" + + # Retrieve the domain name from the xml file. + DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'` + + EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" +} + +OCF_REQUIRED_PARAMS="config" +OCF_REQUIRED_BINARIES="virsh sed" +ocf_rarun $* diff --git a/heartbeat/WAS b/heartbeat/WAS new file mode 100755 index 0000000..15b56e9 --- /dev/null +++ b/heartbeat/WAS @@ -0,0 +1,572 @@ +#!/bin/sh +# +# +# WAS +# +# Description: Manages a Websphere Application Server as an HA resource +# +# +# Author: Alan Robertson +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: (C) 2002 - 2005 International Business Machines, Inc. +# +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml +# +# See usage() function below for more details... +# +# OCF parameters are as below: +# OCF_RESKEY_config +# (WAS-configuration file, used for the single server edition of WAS) +# OCF_RESKEY_port +# (WAS--port-number, used for the advanced edition of WAS) + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +WASDIR=/opt/WebSphere/AppServer +if + [ ! -d $WASDIR ] +then + WASDIR=/usr/WebSphere/AppServer +fi +STARTTIME=300 # 5 minutes +DEFAULT_WASPORTS="9080" +# +# +WASBIN=$WASDIR/bin +DEFAULT=$WASDIR/config/server-cfg.xml + +# +# Print usage message +# +usage() { + methods=`WAS_methods | grep -v methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-END + usage: $0 ($methods) + + For the single server edition of WAS, you have to set the following + enviroment virable: + OCF_RESKEY_config + (WAS-configuration file) + + For the advanced edition of WAS, you have to set the following + enviroment virable: + OCF_RESKEY_port + (WAS--port-number) + + $0 manages a Websphere Application Server (WAS) as an HA resource + + The 'start' operation starts WAS. + The 'stop' operation stops WAS. + The 'status' operation reports whether WAS is running + The 'monitor' operation reports whether the WAS seems to be working + (httpd also needs to be working for this case) + The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid + The 'methods' operation reports on the methods $0 supports + + This is known to work with the Single Server edition of Websphere, + and is believed to work with the Advanced edition too. + Since the Advanced Edition has no configuration file (it's in a the + database) you need to give a port number instead of a + configuration file for this config parameter. + + The default configuration file for the single server edition is: + $DEFAULT + + The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS + + The start and stop operations must be run as root. + + The status operation will report a pid of "-" for the + WAS root process using unless it is run as root. + + If you don't have xmllint on your system, parsing of WAS + configuration files is very primitive. + In this case, the port specification we need from the XML + config file has to be on the same line as the + first part of the tag. + + We run servlet/snoop on the first transport port listed in + the config file for the "monitor" operation. + + END +} + +meta_data() { + cat < + + +1.0 + + +Resource script for WAS. It manages a Websphere Application Server (WAS) as +an HA resource. + +Manages a WebSphere Application Server instance + + + + +The WAS-configuration file. + +configration file + + + + + +The WAS-(snoop)-port-number. + +port + + + + + + + + + + + + + + +END +} + +# +# Reformat the XML document in a sort of canonical form +# if we can. If we don't have xmllint, we just cat it out +# and hope for the best ;-) +# +xmlcat() { + if + [ "X$XMLcat" = X ] + then + XMLcat=`which xmllint 2>/dev/null` + if + [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] + then + XMLcat=cat + else + XMLcat="$XMLcat --recover --format" + fi + fi + for j in "$@" + do + ${XMLcat} "$j" + done +} + +# +#This is a bit skanky, but it works anyway... +# +# +# +# +# +# It's not really skanky if we can find xmllint on the system, because it +# reformats tags so they are all on one line, which is all we we need... +# + +# +# Get the numbers of the ports WAS should be listening on... +# +# If we don't have xmllint around, then the applicationserver and the +# port= specification have to be on the same line in the XML config file. +# +GetWASPorts() { + case $1 in + [0-9]*) echo "$1" | tr ',' '\012';; + *) + xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' | + grep port= | + sed -e 's%.*port= *"* *%%' \ + -e 's%[^0-9][^0-9]*.*$%%' + # Delete up to port=, throw away optional quote and optional + # white space. + # Throw away everything after the first non-digit. + # This should leave us the port number all by itself... + esac +} + +# +# We assume that the first port listed in the +# is the one we should run servlet/snoop on. +# +GetWASSnoopPort() { + GetWASPorts "$@" | head -n1 +} + +# +# Return information on the processname/id for the WAS ports +# +# pid/java is the expected output. Several lines, one per port... +# +# +WASPortInfo() { + pat="" + once=yes + PortCount=0 + for j in $* + do + case $pat in + "") pat="$j";; + *) pat="$pat|$j";; + esac + PortCount=`expr $PortCount + 1` + done + netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' +} + +# +# Return the number of WAS ports which are open +# +CheckWASPortsInUse() { + count=`WASPortInfo "$@" | wc -l` + echo $count +} + +# +# Return the pid(s) of the processes that have WAS ports open +# +WASPIDs() { + WASPortInfo "$@" | sort -u | cut -f1 -d/ +} + +# +# The version of ps that returns all processes and their (long) args +# It's only used by WAS_procs, which isn't used for anything ;-) +# +ps_long() { + ps axww +} + + +# +# The total set of WAS processes (single server only) +# +WAS_procs() { + ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 +} + + + +# +# methods: What methods/operations do we support? +# +WAS_methods() { + cat <<-! + start + stop + status + methods + validate-all + meta-data + usage + ! + if + have_binary $WGET + then + echo monitor + fi +} + +# +# Return WAS status (silently) +# +WAS_status() { + WASPorts=`GetWASPorts $1` + PortsInUse=`CheckWASPortsInUse $WASPorts` + case $PortsInUse in + 0) false;; + *) true;; + esac +} + +# +# Report on WAS status to stdout... +# +WAS_report_status() { + WASPorts=`GetWASPorts $1` + PortCount=`echo $WASPorts | wc -w` + PortCount=`echo $PortCount` + PortsInUse=`CheckWASPortsInUse $WASPorts` + case $PortsInUse in + 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; + *) + pids=`WASPIDs $WASPorts` + if + [ $PortsInUse -ge $PortCount ] + then + ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." + else + ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." + fi + return $OCF_SUCCESS;; + esac +} + +# +# Monitor WAS - does it really seem to be working? +# +# For this we invoke the snoop applet via wget. +# +# This is actually faster than WAS_status above... +# +WAS_monitor() { + trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 + tmpfile=`maketempfile` || return 1 + SnoopPort=`GetWASSnoopPort $1` + output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1` + rc=$? + if + [ $rc -eq 0 ] + then + if + grep -i 'user-agent.*Wget' $tmpfile >/dev/null + then + : OK + else + ocf_log "err" "WAS: $1: no user-agent from snoop application" + rc=$OCF_ERR_GENERIC + fi + else + ocf_log "err" "WAS: $1: wget failure: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# Start WAS instance +# +WAS_start() { +# Launch Arguments: +# +# -configFile +# -nodeName +# -serverName +# -oltEnabled +# -oltHost +# -oltPort +# -debugEnabled +# -jdwpPort +# -debugSource +# -serverTrace +# -serverTraceFile +# -script [] +# -platform +# -noExecute +# -help + if + [ -x $WASBIN/startServer.sh ] + then + cmd="$WASBIN/startServer.sh -configFile $1" + else + cmd="$WASBIN/startupServer.sh" + fi + + if + ocf_run $cmd + then + if + WAS_wait_4_start $STARTTIME "$@" + then + #true + return $OCF_SUCCESS + else + ocf_log "err" "WAS server $1 did not start correctly" + return $OCF_ERR_GENERIC + fi + else + #false + return $OCF_ERR_GENERIC + fi +} + +# +# Wait for WAS to actually start up. +# +# It seems to take between 30 and 60 seconds for it to +# start up on a trivial WAS instance. +# +WAS_wait_4_start() { + max=$1 + retries=0 + shift + while + [ $retries -lt $max ] + do + if + WAS_status "$@" + then + return $OCF_SUCCESS + else + sleep 1 + fi + retries=`expr $retries + 1` + done + WAS_status "$@" +} + + +# +# Shut down WAS +# +WAS_stop() { + # They don't return good return codes... + # And, they seem to allow anyone to stop WAS (!) + if + [ -x $WASBIN/stopServer.sh ] + then + ocf_run $WASBIN/stopServer.sh -configFile $1 + else + WASPorts=`GetWASPorts $1` + kill `WASPIDs $WASPorts` + fi + if + WAS_status $1 + then + ocf_log "err" "WAS: $1 did not stop correctly" + #false + return $OCF_ERR_GENERIC + else + #true + return $OCF_SUCCESS + fi +} + +# +# Check if the port is valid +# +CheckPort() { + ocf_is_decimal "$1" && [ $1 -gt 0 ] +} + +WAS_validate_all() { + if [ -x $WASBIN/startServer.sh ]; then + # $arg should be config file + if [ ! -f "$arg" ]; then + ocf_log err "Configuration file [$arg] does not exist" + exit $OCF_ERR_ARGS + fi + + # $arg should specify a valid port number at the very least + local WASPorts=`GetWASPorts $arg` + if [ -z "$WASPorts" ]; then + ocf_log err "No port number specified in configuration file [$arg]" + exit $OCF_ERR_CONFIGURED + fi + + local port + local have_valid_port=false + for port in $WASPorts; do + if CheckPort $port; then + have_valid_port=true + break + fi + done + if [ "false" = "$have_valid_port" ]; then + ocf_log err "No valid port number specified in configuration file [$arg]" + exit $OCF_ERR_CONFIGURED + fi + + elif [ -x $WASBIN/startupServer.sh ]; then + # $arg should be port number + if CheckPort "$arg"; then + ocf_log err "Port number is required but [$arg] is not valid port number" + exit $OCF_ERR_ARGS + fi + else + # Do not know hot to validate_all + ocf_log warn "Do not know how to validate-all, assuming validation OK" + return $OCF_SUCCESS + fi +} +# +# 'main' starts here... +# + +if + ( [ $# -ne 1 ] ) +then + usage + exit $OCF_ERR_ARGS +fi + +# +# Supply default configuration parameter(s) +# + +if + ( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] ) +then + if + [ -f $DEFAULT ] + then + arg=$DEFAULT + else + arg=$DEFAULT_WASPORTS + fi +elif + [ ! -z $OCF_RESKEY_config ] +then + arg=$OCF_RESKEY_config +else + arg=$OCF_RESKEY_port +fi + +if + [ ! -f $arg ] +then + case $arg in + [0-9]*) ;; # ignore port numbers... + *) ocf_log "err" "WAS configuration file $arg does not exist!" + usage + exit $OCF_ERR_ARGS;; + esac +fi + + +# What kind of method was invoked? +case "$1" in + + meta-data) meta_data + exit $OCF_SUCCESS;; + + start) WAS_start $arg + exit $?;; + + stop) WAS_stop $arg + exit $?;; + + status) WAS_report_status $arg + exit $?;; + + monitor) WAS_monitor $arg + exit $?;; + + validate-all) WAS_validate_all $arg + exit $?;; + + methods) WAS_methods + exit $?;; + + usage) usage + exit $OCF_SUCCESS;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/heartbeat/WAS6 b/heartbeat/WAS6 new file mode 100755 index 0000000..9e18cd6 --- /dev/null +++ b/heartbeat/WAS6 @@ -0,0 +1,546 @@ +#!/bin/sh +# WAS6 +# +# Description: Manages a Websphere Application Server as an HA resource +# +# +# Author: Ru Xiang Min +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# Copyright: (C) 2006 International Business Machines China, Ltd., Inc. +# +# +# An example usage in /etc/ha.d/haresources: +# node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml +# +# See usage() function below for more details... +# +# OCF parameters are as below: +# OCF_RESKEY_profile +# (WAS profile name, used for the single server edition of WAS6) + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +WAS_DIR=/opt/IBM/WebSphere/AppServer +if + [ ! -d $WAS_DIR ] +then + WAS_DIR=/usr/IBM/WebSphere/AppServer +fi +STARTTIME=300 # 5 minutes +DEFAULT_WASPORTS="9080" +# +# +WAS_BIN=$WAS_DIR/bin +DEFAULT=default +# +# Print usage message +# +usage() { + methods=`WAS_methods | grep -v methods` + methods=`echo $methods | tr ' ' '|'` + cat <<-END + usage: $0 ($methods) + + For the single server edition of WAS6, you have to set the following + enviroment virable: + OCF_RESKEY_profile + (WAS profile name) + + + $0 manages a Websphere Application Server 6(WAS6) as an HA resource + + The 'start' operation starts WAS6. + The 'stop' operation stops WAS6. + The 'status' operation reports whether WAS6 is running + The 'monitor' operation reports whether the WAS6 seems to be working + (httpd also needs to be working for this case) + The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid + The 'methods' operation reports on the methods $0 supports + + This is known to work with the Single Server edition of Websphere. + + The default profile name for the single server edition is: + $DEFAULT + + The start and stop operations must be run as root. + + The status operation will report a pid of "-" for the + WAS root process using unless it is run as root. + + If you don't have xmllint on your system, parsing of WAS + configuration files is very primitive. + + We run servlet/snoop on the seventh transport port listed in + the config file for the "monitor" operation. + + END +} + +meta_data() { + cat < + + +1.0 + + +Resource script for WAS6. It manages a Websphere Application Server (WAS6) as +an HA resource. + +Manages a WebSphere Application Server 6 instance + + + + +The WAS profile name. + +profile name + + + + + + + + + + + + + + +END +} + +# +# Reformat the XML document in a sort of canonical form +# if we can. If we don't have xmllint, we just cat it out +# and hope for the best ;-) +# +xmlcat() { + if + [ "X$XMLcat" = X ] + then + XMLcat=`which xmllint 2>/dev/null` + if + [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] + then + XMLcat=cat + else + XMLcat="$XMLcat --recover --format" + fi + fi + for j in "$@" + do + ${XMLcat} "$j" + done +} + +# +#This is a bit skanky, but it works anyway... +# +# It's not really skanky if we can find xmllint on the system, because it +# reformats tags so they are all on one line, which is all we we need... +# +# +# Get the numbers of the ports WAS should be listening on... +# +# If we don't have xmllint around, then the applicationserver and the +# port= specification have to be on the same line in the XML config file. +# +GetWASPorts() { + case $1 in + [0-9]*) echo "$1" | tr ',' '\012';; + *) + xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml | + grep port= | + sed -e 's%.*port= *"* *%%' \ + -e 's%[^0-9][^0-9]*.*$%%' + # Delete up to port=, throw away optional quote and optional + # white space. + # Throw away everything after the first non-digit. + # This should leave us the port number all by itself... + esac +} + +# +# We assume that the seventh port listed in the serverindex.xml +# is the one we should run servlet/snoop on. +# +GetWASSnoopPort() { + GetWASPorts "$@" | sed -n '7p' +} + +# +# Return information on the processname/id for the WAS ports +# +# pid/java is the expected output. Several lines, one per port... +# +# +WASPortInfo() { + pat="" + once=yes + PortCount=0 + for j in $* + do + case $pat in + "") pat="$j";; + *) pat="$pat|$j";; + esac + PortCount=`expr $PortCount + 1` + done + netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' +} + +# +# Return the number of WAS ports which are open +# +CheckWASPortsInUse() { + count=`WASPortInfo "$@" | wc -l` + echo $count +} + +# +# Return the pid(s) of the processes that have WAS ports open +# +WASPIDs() { + WASPortInfo "$@" | sort -u | cut -f1 -d/ +} + +# +# The version of ps that returns all processes and their (long) args +# It's only used by WAS_procs, which isn't used for anything ;-) +# +ps_long() { + ps axww +} + + +# +# The total set of WAS processes (single server only) +# +WAS_procs() { + ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 +} + + + +# +# methods: What methods/operations do we support? +# +WAS_methods() { + cat <<-! + start + stop + status + methods + validate-all + meta-data + usage + ! + if + have_binary $WGET + then + echo " monitor" + fi +} + +# +# Return WAS status (silently) +# +WAS_status() { + WASPorts=`GetWASPorts $1` + PortsInUse=`CheckWASPortsInUse $WASPorts` + case $PortsInUse in + 0) false;; + *) true;; + esac +} + +# +# Report on WAS status to stdout... +# +WAS_report_status() { + WASPorts=`GetWASPorts $1` + PortCount=`echo $WASPorts | wc -w` + PortCount=`echo $PortCount` + PortsInUse=`CheckWASPortsInUse $WASPorts` + case $PortsInUse in + 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; + *) + pids=`WASPIDs $WASPorts` + if + [ $PortsInUse -ge $PortCount ] + then + ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." + else + ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." + fi + return $OCF_SUCCESS;; + esac +} + +# +# Monitor WAS - does it really seem to be working? +# +# For this we invoke the snoop applet via wget. +# +# This is actually faster than WAS_status above... +# +WAS_monitor() { + trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 + tmpfile=`maketempfile` || exit 1 + SnoopPort=`GetWASSnoopPort $1` + output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1` + rc=$? + if + [ $rc -eq 0 ] + then + if + grep -i 'user-agent.*Wget' $tmpfile >/dev/null + then + : OK + else + ocf_log "err" "WAS: $1: no user-agent from snoop application" + rc=$OCF_ERR_GENERIC + fi + else + ocf_log "err" "WAS: $1: wget failure: $output" + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +# +# Start WAS instance +# +WAS_start() { +# Launch Arguments: +# -nowait +# -quiet +# -logfile +# -replacelog +# -trace +# -script [