diff options
Diffstat (limited to '')
-rwxr-xr-x | heartbeat/garbd | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/heartbeat/garbd b/heartbeat/garbd new file mode 100755 index 0000000..24a6e69 --- /dev/null +++ b/heartbeat/garbd @@ -0,0 +1,436 @@ +#!/bin/sh +# +# Copyright (c) 2015 Damien Ciabrini <dciabrin@redhat.com> +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +## +# README. +# +# Resource agent for garbd, the Galera arbitrator +# +# You can use this agent if you run an even number of galera nodes, +# and you want an additional node to avoid split-brain situations. +# +# garbd requires that a Galera cluster is running, so make sure to +# add a proper ordering constraint to the cluster, e.g.: +# +# pcs constraint order galera-master then garbd +# +# If you add garbd to the cluster while Galera is not running, you +# might want to disable it before setting up ordering constraint, e.g.: +# +# pcs resource create garbd garbd \ +# wsrep_cluster_address=gcomm://node1:4567,node2:4567 \ +# meta target-role=stopped +# +# Use location constraints to avoid running galera and garbd on +# the same node, e.g.: +# +# pcs constraint colocation add garbd with galera-master -INFINITY +# pcs constraint location garbd prefers node3=INFINITY +# +## + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### +# Set default paramenter values + +OCF_RESKEY_binary_default="/usr/sbin/garbd" +OCF_RESKEY_log_default="/var/log/garbd.log" +OCF_RESKEY_pid_default="/var/run/garbd.pid" +OCF_RESKEY_user_default="mysql" +if [ "X${HOSTOS}" = "XOpenBSD" ];then + OCF_RESKEY_group_default="_mysql" +else + OCF_RESKEY_group_default="mysql" +fi +OCF_RESKEY_options_default="" +OCF_RESKEY_wsrep_cluster_address_default="" +OCF_RESKEY_wsrep_cluster_name_default="" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} +: ${OCF_RESKEY_options=${OCF_RESKEY_options_default}} +: ${OCF_RESKEY_wsrep_cluster_address=${OCF_RESKEY_wsrep_cluster_address_default}} +: ${OCF_RESKEY_wsrep_cluster_name=${OCF_RESKEY_wsrep_cluster_name_default}} + +usage() { + cat <<UEND +usage: $0 (start|stop|validate-all|meta-data|status|monitor) + +$0 manages a Galera arbitrator. + +The 'start' operation starts the arbitrator. +The 'stop' operation stops the arbitrator. +The 'status' operation reports whether the arbitrator is running +The 'monitor' operation reports whether the arbitrator seems to be working +The 'validate-all' operation reports whether the parameters are valid + +UEND +} + +meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="garbd" version="1.0"> +<version>1.0</version> + +<longdesc lang="en"> +Resource script for managing Galera arbitrator. +</longdesc> +<shortdesc lang="en">Manages a galera arbitrator instance</shortdesc> +<parameters> + +<parameter name="binary" unique="0" required="0"> +<longdesc lang="en"> +Location of the Galera arbitrator binary +</longdesc> +<shortdesc lang="en">garbd server binary</shortdesc> +<content type="string" default="${OCF_RESKEY_binary_default}" /> +</parameter> + +<parameter name="user" unique="0" required="0"> +<longdesc lang="en"> +User running the garbd process +</longdesc> +<shortdesc lang="en">garbd user</shortdesc> +<content type="string" default="${OCF_RESKEY_user_default}" /> +</parameter> + +<parameter name="group" unique="0" required="0"> +<longdesc lang="en"> +Group running garbd (for logfile permissions) +</longdesc> +<shortdesc lang="en">garbd group</shortdesc> +<content type="string" default="${OCF_RESKEY_group_default}"/> +</parameter> + +<parameter name="log" unique="0" required="0"> +<longdesc lang="en"> +The logfile to be used for garbd. +</longdesc> +<shortdesc lang="en">Galera arbitrator log file</shortdesc> +<content type="string" default="${OCF_RESKEY_log_default}"/> +</parameter> + +<parameter name="pid" unique="0" required="0"> +<longdesc lang="en"> +The pidfile to be used for garbd. +</longdesc> +<shortdesc lang="en">Galera arbitrator pidfile</shortdesc> +<content type="string" default="${OCF_RESKEY_pid_default}"/> +</parameter> + +<parameter name="options" unique="0" required="0"> +<longdesc lang="en"> +Additional parameters which are passed to garbd on startup. +</longdesc> +<shortdesc lang="en">Additional parameters to pass to garbd</shortdesc> +<content type="string" default="${OCF_RESKEY_options_default}"/> +</parameter> + +<parameter name="wsrep_cluster_address" unique="0" required="1"> +<longdesc lang="en"> +The galera cluster address. This takes the form of: +gcomm://node:port,node:port,node:port + +Unlike Galera servers, port is mandatory for garbd. +</longdesc> +<shortdesc lang="en">Galera cluster address</shortdesc> +<content type="string" default="${OCF_RESKEY_wsrep_cluster_address_default}"/> +</parameter> + +<parameter name="wsrep_cluster_name" unique="0" required="1"> +<longdesc lang="en"> +The group name of the Galera cluster to connect to. +</longdesc> +<shortdesc lang="en">Galera cluster name</shortdesc> +<content type="string" default="${OCF_RESKEY_wsrep_cluster_name_default}"/> +</parameter> + +</parameters> + +<actions> +<action name="start" timeout="20s" /> +<action name="stop" timeout="20s" /> +<action name="monitor" depth="0" timeout="20s" interval="20s" /> +<action name="validate-all" timeout="5s" /> +<action name="meta-data" timeout="5s" /> +</actions> +</resource-agent> +END +} + + +garbd_start() +{ + local rc + local pid + local start_wait + local garbd_params + + garbd_status info + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_exit_reason "garbd started outside of the cluster's control" + return $OCF_ERR_GENERIC; + fi + + touch $OCF_RESKEY_log + chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log + chmod 0640 $OCF_RESKEY_log + [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log + + garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \ + --group ${OCF_RESKEY_wsrep_cluster_name} \ + --log ${OCF_RESKEY_log}" + + if [ ! -z "${OCF_RESKEY_options}" ]; then + garbd_params="${garbd_params} --options=${OCF_RESKEY_options}" + fi + + # garbd has no parameter to run as a specific user, + # so we need to start it by our own means + pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!") + + # garbd doesn't create a pidfile either, so we create our own + echo $pid > $OCF_RESKEY_pid + if [ $? -ne 0 ]; then + ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation" + return $OCF_ERR_GENERIC + fi + + # Spin waiting for garbd to connect to the cluster. + # Let the CRM/LRM time us out if required. + start_wait=1 + while [ $start_wait -eq 1 ]; do + garbd_monitor info + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}" + return $OCF_ERR_GENERIC + elif [ $rc -eq $OCF_SUCCESS ]; then + start_wait=0 + fi + sleep 2 + done + + ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" + return $OCF_SUCCESS +} + +garbd_status() +{ + local loglevel=$1 + local rc + ocf_pidfile_status $OCF_RESKEY_pid + rc=$? + + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + elif [ $rc -eq 2 ]; then + return $OCF_NOT_RUNNING + else + # clean up if pidfile is stale + if [ $rc -eq 1 ]; then + ocf_log $loglevel "garbd not running: removing old PID file" + rm -f $OCF_RESKEY_pid + fi + return $OCF_ERR_GENERIC + fi +} + +_port_by_pid() +{ + local pid + pid="$1" + if have_binary "netstat"; then + netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" + else + ss -Htnp 2>/dev/null | grep -s -q "^ESTAB.*pid=${pid}" + fi +} + +garbd_monitor() +{ + local rc + local pid + local loglevel=$1 + + # Set loglevel to info during probe + if ocf_is_probe; then + loglevel="info" + fi + + garbd_status $loglevel + rc=$? + + # probe just wants to know if garbd is running or not + if ocf_is_probe && [ $rc -ne $OCF_SUCCESS ]; then + rc=$OCF_NOT_RUNNING + fi + + # Consider garbd is working if it's connected to at least + # one node in the galera cluster. + # Note: a Galera node in Non-Primary state will be + # stopped by the galera RA. So we can assume that + # garbd will always be connected to the right partition + if [ $rc -eq $OCF_SUCCESS ]; then + pid=`cat $OCF_RESKEY_pid 2> /dev/null ` + _port_by_pid $pid + if [ $? -ne 0 ]; then + ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" + rc=$OCF_ERR_GENERIC + fi + fi + + return $rc +} + +garbd_stop() +{ + local rc + local pid + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "garbd is not running" + return $OCF_SUCCESS + fi + + pid=`cat $OCF_RESKEY_pid 2> /dev/null ` + + ocf_log info "stopping garbd" + + # make sure the process is stopped + ocf_stop_processes TERM 10 $pid + rc=$? + + if [ $rc -ne 0 ]; then + return $OCF_ERR_GENERIC + else + rm -f $OCF_RESKEY_pid + ocf_log info "garbd stopped" + return $OCF_SUCCESS + fi +} + +garbd_validate() +{ + if ! have_binary "$OCF_RESKEY_binary"; then + ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary" + return $OCF_ERR_INSTALLED; + fi + + if ! have_binary "netstat"; then + if ! have_binary "ss"; then + ocf_exit_reason "Setup problem: couldn't find command: netstat or ss" + return $OCF_ERR_INSTALLED; + fi + fi + + if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then + ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value." + return $OCF_ERR_CONFIGURED + fi + + # unlike galera RA, ports must be set in cluster address for garbd + # https://github.com/codership/galera/issues/98 + for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do + echo $node | grep -s -q ':[1-9][0-9]*$' + if [ $? -ne 0 ]; then + ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)." + return $OCF_ERR_CONFIGURED + fi + done + + # Ensure that the encryption method is set if garbd is configured + # to use SSL. + echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)=' + if [ $? -eq 0 ]; then + echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher=' + if [ $? -ne 0 ]; then + ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled." + return $OCF_ERR_CONFIGURED + fi + fi + + if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then + ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value." + return $OCF_ERR_CONFIGURED + fi + + if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then + ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then + ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist" + return $OCF_ERR_INSTALLED + fi + + return $OCF_SUCCESS +} + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +garbd_validate +rc=$? + +# trap configuration errors early, but don't block stop in such cases +LSB_STATUS_STOPPED=3 +if [ $rc -ne 0 ]; then + case "$1" in + stop) exit $OCF_SUCCESS;; + status) exit $LSB_STATUS_STOPPED;; + *) exit $rc;; + esac +fi + +# What kind of method was invoked? +case "$1" in + start) garbd_start;; + stop) garbd_stop;; + status) garbd_status err;; + monitor) garbd_monitor err;; + promote) garbd_promote;; + demote) garbd_demote;; + validate-all) exit $OCF_SUCCESS;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac |