summaryrefslogtreecommitdiffstats
path: root/heartbeat/pgsql
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xheartbeat/pgsql2263
1 files changed, 2263 insertions, 0 deletions
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
new file mode 100755
index 0000000..532063a
--- /dev/null
+++ b/heartbeat/pgsql
@@ -0,0 +1,2263 @@
+#!/bin/sh
+#
+# Description: Manages a PostgreSQL Server as an OCF High-Availability
+# resource
+#
+# Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA
+# Florian Haas (florian@linbit.com) -- makeover
+# Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication
+# David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port
+#
+# Copyright: 2006-2012 Serge Dubrouski <sergeyfd@gmail.com>
+# and other Linux-HA contributors
+# License: GNU General Public License (GPL)
+#
+###############################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
+#
+# Get PostgreSQL Configuration parameter
+#
+get_pgsql_param() {
+ local param_name
+
+ param_name=$1
+ perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) {
+ \$dir=\$1;
+ \$dir =~ s/\s*\#.*//;
+ \$dir =~ s/^'(\S*)'/\$1/;
+ print \$dir;}"
+
+ perl -ne "$perl_code" < $OCF_RESKEY_config
+}
+
+# Defaults
+OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl
+OCF_RESKEY_psql_default=/usr/bin/psql
+OCF_RESKEY_pgdata_default=/var/lib/pgsql/data
+OCF_RESKEY_pgdba_default=postgres
+OCF_RESKEY_pghost_default=""
+OCF_RESKEY_pgport_default=5432
+OCF_RESKEY_pglibs_default=/usr/lib
+OCF_RESKEY_start_opt_default=""
+OCF_RESKEY_ctl_opt_default=""
+OCF_RESKEY_pgdb_default=template1
+OCF_RESKEY_logfile_default=/dev/null
+OCF_RESKEY_socketdir_default=""
+OCF_RESKEY_stop_escalate_default=90
+OCF_RESKEY_monitor_user_default=""
+OCF_RESKEY_monitor_password_default=""
+OCF_RESKEY_monitor_sql_default="select now();"
+OCF_RESKEY_check_wal_receiver_default="false"
+# Defaults for replication
+OCF_RESKEY_rep_mode_default=none
+OCF_RESKEY_node_list_default=""
+OCF_RESKEY_restore_command_default=""
+OCF_RESKEY_archive_cleanup_command_default=""
+OCF_RESKEY_recovery_end_command_default=""
+OCF_RESKEY_master_ip_default=""
+OCF_RESKEY_repuser_default="postgres"
+OCF_RESKEY_primary_conninfo_opt_default=""
+OCF_RESKEY_restart_on_promote_default="false"
+OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
+OCF_RESKEY_xlog_check_count_default="3"
+OCF_RESKEY_crm_attr_timeout_default="5"
+OCF_RESKEY_stop_escalate_in_slave_default=90
+OCF_RESKEY_replication_slot_name_default=""
+
+: ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
+: ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
+: ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}}
+: ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}}
+: ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}}
+: ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}}
+: ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}}
+: ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
+: ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
+: ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}}
+: ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
+: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
+: ${OCF_RESKEY_socketdir=${OCF_RESKEY_socketdir_default}}
+: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
+: ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}}
+: ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}}
+: ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}}
+: ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}}
+
+# for replication
+: ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}}
+: ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}}
+: ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}}
+: ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}}
+: ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}}
+: ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}}
+: ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}}
+: ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}}
+: ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}}
+: ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}}
+: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
+: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
+: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
+: ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}}
+
+usage() {
+ cat <<EOF
+ usage: $0 start|stop|status|monitor|promote|demote|notify|meta-data|validate-all|methods
+
+ $0 manages a PostgreSQL Server as an HA resource.
+
+ The 'start' operation starts the PostgreSQL server.
+ The 'stop' operation stops the PostgreSQL server.
+ The 'status' operation reports whether the PostgreSQL is up.
+ The 'monitor' operation reports whether the PostgreSQL is running.
+ The 'promote' operation promotes the PostgreSQL server.
+ The 'demote' operation demotes the PostgreSQL server.
+ The 'validate-all' operation reports whether the parameters are valid.
+ The 'methods' operation reports on the methods $0 supports.
+EOF
+ return $OCF_ERR_ARGS
+}
+
+meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="pgsql" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
+</longdesc>
+<shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>
+
+<parameters>
+<parameter name="pgctl" unique="0" required="0">
+<longdesc lang="en">
+Path to pg_ctl command.
+</longdesc>
+<shortdesc lang="en">pgctl</shortdesc>
+<content type="string" default="${OCF_RESKEY_pgctl_default}" />
+</parameter>
+
+<parameter name="start_opt" unique="0" required="0">
+<longdesc lang="en">
+Start options (-o start_opt in pg_ctl). "-i -p 5432" for example.
+</longdesc>
+<shortdesc lang="en">start_opt</shortdesc>
+<content type="string" default="${OCF_RESKEY_start_opt_default}" />
+
+</parameter>
+<parameter name="ctl_opt" unique="0" required="0">
+<longdesc lang="en">
+Additional pg_ctl options (-w, -W etc..).
+</longdesc>
+<shortdesc lang="en">ctl_opt</shortdesc>
+<content type="string" default="${OCF_RESKEY_ctl_opt_default}" />
+</parameter>
+
+<parameter name="psql" unique="0" required="0">
+<longdesc lang="en">
+Path to psql command.
+</longdesc>
+<shortdesc lang="en">psql</shortdesc>
+<content type="string" default="${OCF_RESKEY_psql_default}" />
+</parameter>
+
+<parameter name="pgdata" unique="0" required="0">
+<longdesc lang="en">
+Path to PostgreSQL data directory.
+</longdesc>
+<shortdesc lang="en">pgdata</shortdesc>
+<content type="string" default="${OCF_RESKEY_pgdata_default}" />
+</parameter>
+
+<parameter name="pgdba" unique="0" required="0">
+<longdesc lang="en">
+User that owns PostgreSQL.
+</longdesc>
+<shortdesc lang="en">pgdba</shortdesc>
+<content type="string" default="${OCF_RESKEY_pgdba_default}" />
+</parameter>
+
+<parameter name="pghost" unique="0" required="0">
+<longdesc lang="en">
+Hostname/IP address where PostgreSQL is listening
+</longdesc>
+<shortdesc lang="en">pghost</shortdesc>
+<content type="string" default="${OCF_RESKEY_pghost_default}" />
+</parameter>
+
+<parameter name="pgport" unique="0" required="0">
+<longdesc lang="en">
+Port where PostgreSQL is listening
+</longdesc>
+<shortdesc lang="en">pgport</shortdesc>
+<content type="integer" default="${OCF_RESKEY_pgport_default}" />
+</parameter>
+
+<parameter name="pglibs" unique="0" required="0">
+<longdesc lang="en">
+Custom location of the Postgres libraries. If not set, the standard location
+will be used.
+</longdesc>
+<shortdesc lang="en">pglibs</shortdesc>
+<content type="string" default="${OCF_RESKEY_pglibs_default}" />
+</parameter>
+
+<parameter name="monitor_user" unique="0" required="0">
+<longdesc lang="en">
+PostgreSQL user that pgsql RA will user for monitor operations. If it's not set
+pgdba user will be used.
+</longdesc>
+<shortdesc lang="en">monitor_user</shortdesc>
+<content type="string" default="${OCF_RESKEY_monitor_user_default}" />
+</parameter>
+
+<parameter name="monitor_password" unique="0" required="0">
+<longdesc lang="en">
+Password for monitor user.
+</longdesc>
+<shortdesc lang="en">monitor_password</shortdesc>
+<content type="string" default="${OCF_RESKEY_monitor_password_default}" />
+</parameter>
+
+<parameter name="monitor_sql" unique="0" required="0">
+<longdesc lang="en">
+SQL script that will be used for monitor operations.
+</longdesc>
+<shortdesc lang="en">monitor_sql</shortdesc>
+<content type="string" default="${OCF_RESKEY_monitor_sql_default}" />
+</parameter>
+
+<parameter name="config" unique="0" required="0">
+<longdesc lang="en">
+Path to the PostgreSQL configuration file for the instance.
+</longdesc>
+<shortdesc lang="en">Configuration file</shortdesc>
+<content type="string" default="${OCF_RESKEY_pgdata}/postgresql.conf" />
+</parameter>
+
+<parameter name="pgdb" unique="0" required="0">
+<longdesc lang="en">
+Database that will be used for monitoring.
+</longdesc>
+<shortdesc lang="en">pgdb</shortdesc>
+<content type="string" default="${OCF_RESKEY_pgdb_default}" />
+</parameter>
+
+<parameter name="logfile" unique="0" required="0">
+<longdesc lang="en">
+Path to PostgreSQL server log output file.
+</longdesc>
+<shortdesc lang="en">logfile</shortdesc>
+<content type="string" default="${OCF_RESKEY_logfile_default}" />
+</parameter>
+
+<parameter name="socketdir" unique="0" required="0">
+<longdesc lang="en">
+Unix socket directory for PostgreSQL.
+
+If you use PostgreSQL 9.3 or higher and define unix_socket_directories in the postgresql.conf, then you must set socketdir to determine which directory is used for psql command.
+</longdesc>
+<shortdesc lang="en">socketdir</shortdesc>
+<content type="string" default="${OCF_RESKEY_socketdir_default}" />
+</parameter>
+
+<parameter name="stop_escalate" unique="0" required="0">
+<longdesc lang="en">
+Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
+</longdesc>
+<shortdesc lang="en">stop escalation</shortdesc>
+<content type="integer" default="${OCF_RESKEY_stop_escalate_default}" />
+</parameter>
+
+<parameter name="rep_mode" unique="0" required="0">
+<longdesc lang="en">
+Replication mode may be set to "async" or "sync" or "slave".
+They require PostgreSQL 9.1 or later.
+Once set, "async" and "sync" require node_list, master_ip, and
+restore_command parameters,as well as configuring PostgreSQL
+for replication (in postgresql.conf and pg_hba.conf).
+
+"slave" means that RA only makes recovery.conf before starting
+to connect to primary which is running somewhere.
+It doesn't need master/slave setting.
+It requires master_ip restore_command parameters.
+</longdesc>
+<shortdesc lang="en">rep_mode</shortdesc>
+<content type="string" default="${OCF_RESKEY_rep_mode_default}" />
+</parameter>
+
+<parameter name="node_list" unique="0" required="0">
+<longdesc lang="en">
+All node names. Please separate each node name with a space.
+This is optional for replication. Defaults to all nodes in the cluster
+</longdesc>
+<shortdesc lang="en">node list</shortdesc>
+<content type="string" default="${OCF_RESKEY_node_list_default}" />
+</parameter>
+
+<parameter name="restore_command" unique="0" required="0">
+<longdesc lang="en">
+restore_command for recovery.conf.
+This is required for replication.
+</longdesc>
+<shortdesc lang="en">restore_command</shortdesc>
+<content type="string" default="${OCF_RESKEY_restore_command_default}" />
+</parameter>
+
+<parameter name="archive_cleanup_command" unique="0" required="0">
+<longdesc lang="en">
+archive_cleanup_command for recovery.conf.
+This is used for replication and is optional.
+</longdesc>
+<shortdesc lang="en">archive_cleanup_command</shortdesc>
+<content type="string" default="${OCF_RESKEY_archive_cleanup_command_default}" />
+</parameter>
+
+<parameter name="recovery_end_command" unique="0" required="0">
+<longdesc lang="en">
+recovery_end_command for recovery.conf.
+This is used for replication and is optional.
+</longdesc>
+<shortdesc lang="en">recovery_end_command</shortdesc>
+<content type="string" default="${OCF_RESKEY_recovery_end_command_default}" />
+</parameter>
+
+<parameter name="master_ip" unique="0" required="0">
+<longdesc lang="en">
+Master's floating IP address to be connected from hot standby.
+This parameter is used for "primary_conninfo" in recovery.conf.
+This is required for replication.
+</longdesc>
+<shortdesc lang="en">master ip</shortdesc>
+<content type="string" default="${OCF_RESKEY_master_ip_default}" />
+</parameter>
+
+<parameter name="repuser" unique="0" required="0">
+<longdesc lang="en">
+User used to connect to the master server.
+This parameter is used for "primary_conninfo" in recovery.conf.
+This is required for replication.
+</longdesc>
+<shortdesc lang="en">repuser</shortdesc>
+<content type="string" default="${OCF_RESKEY_repuser_default}" />
+</parameter>
+
+<parameter name="primary_conninfo_opt" unique="0" required="0">
+<longdesc lang="en">
+primary_conninfo options of recovery.conf except host, port, user and application_name.
+This is optional for replication.
+</longdesc>
+<shortdesc lang="en">primary_conninfo_opt</shortdesc>
+<content type="string" default="${OCF_RESKEY_primary_conninfo_opt_default}" />
+</parameter>
+
+<parameter name="restart_on_promote" unique="0" required="0">
+<longdesc lang="en">
+If this is true, RA deletes recovery.conf and restarts PostgreSQL
+on promote to keep Timeline ID. It probably makes fail-over slower.
+It's recommended to set on-fail of promote up as fence.
+This is optional for replication.
+</longdesc>
+<shortdesc lang="en">restart_on_promote</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
+</parameter>
+
+<parameter name="replication_slot_name" unique="0" required="0">
+<longdesc lang="en">
+Set this option when using replication slots.
+Can only use lower case letters, numbers and underscore for replication_slot_name.
+
+The replication slots would be created for each node, with the name adding the node name as postfix.
+For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to
+their slots, the slots names are "sample_node1" and "sample_node2".
+If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore.
+For example, Node-1.example.com to node_1_example_com.
+
+pgsql RA doesn't monitor and delete the replication slot.
+When the slave node has been disconnected in failure or the like, execute one of the following manually.
+Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL.
+1. recover and reconnect the slave node to the master node as soon as possible.
+2. delete the slot on the master node by following psql command.
+$ select pg_drop_replication_slot('replication_slot_name');
+</longdesc>
+<shortdesc lang="en">replication_slot_name</shortdesc>
+<content type="string" default="${OCF_RESKEY_replication_slot_name_default}" />
+</parameter>
+
+<parameter name="tmpdir" unique="0" required="0">
+<longdesc lang="en">
+Path to temporary directory.
+This is optional for replication.
+</longdesc>
+<shortdesc lang="en">tmpdir</shortdesc>
+<content type="string" default="${OCF_RESKEY_tmpdir_default}" />
+</parameter>
+
+<parameter name="xlog_check_count" unique="0" required="0">
+<longdesc lang="en">
+Number of checks of xlog on monitor before promote.
+This is optional for replication.
+
+Note: For backward compatibility, the terms are unified with PostgreSQL 9.
+ If you are using PostgreSQL 10 or later, replace "xlog" with "wal".
+ Likewise, replacing "location" with "lsn".
+</longdesc>
+<shortdesc lang="en">xlog check count</shortdesc>
+<content type="integer" default="${OCF_RESKEY_xlog_check_count_default}" />
+</parameter>
+
+<parameter name="crm_attr_timeout" unique="0" required="0">
+<longdesc lang="en">
+The timeout of crm_attribute forever update command.
+Default value is 5 seconds.
+This is optional for replication.
+</longdesc>
+<shortdesc lang="en">The timeout of crm_attribute forever update command.</shortdesc>
+<content type="integer" default="${OCF_RESKEY_crm_attr_timeout_default}" />
+</parameter>
+
+<parameter name="stop_escalate_in_slave" unique="0" required="0">
+<longdesc lang="en">
+Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
+in slave state.
+This is optional for replication.
+</longdesc>
+<shortdesc lang="en">stop escalation_in_slave</shortdesc>
+<content type="integer" default="${OCF_RESKEY_stop_escalate_in_slave_default}" />
+</parameter>
+
+<parameter name="check_wal_receiver" unique="0" required="0">
+<longdesc lang="en">
+If this is true, RA checks wal_receiver process on monitor
+and notifies its status using "(resource name)-receiver-status" attribute.
+It's useful for checking whether PostgreSQL (hot standby) connects to primary.
+The attribute shows status as "normal" or "normal (master)" or "ERROR".
+Note that if you configure PostgreSQL as master/slave resource, then
+wal receiver is not running in the master and the attribute shows status as
+"normal (master)" consistently because it is normal status.
+</longdesc>
+<shortdesc lang="en">check_wal_receiver</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120s" />
+<action name="stop" timeout="120s" />
+<action name="status" timeout="60s" />
+<action name="monitor" depth="0" timeout="30s" interval="30s"/>
+<action name="monitor" depth="0" timeout="30s" interval="29s" role="Promoted" />
+<action name="promote" timeout="120s" />
+<action name="demote" timeout="120s" />
+<action name="notify" timeout="90s" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="5s" />
+<action name="methods" timeout="5s" />
+</actions>
+</resource-agent>
+EOF
+}
+
+
+#
+# Run the given command in the Resource owner environment...
+#
+runasowner() {
+ local quietrun=""
+ local loglevel="-err"
+ local var
+
+ for var in 1 2
+ do
+ case "$1" in
+ "-q")
+ quietrun="-q"
+ shift 1;;
+ "info"|"warn"|"err")
+ loglevel="-$1"
+ shift 1;;
+ *)
+ ;;
+ esac
+ done
+
+ ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
+}
+
+#
+# Shell escape
+#
+escape_string() {
+ echo "$*" | sed -e "s/'/'\\\\''/g"
+}
+
+
+#
+# methods: What methods/operations do we support?
+#
+
+pgsql_methods() {
+ cat <<EOF
+ start
+ stop
+ status
+ monitor
+ promote
+ demote
+ notify
+ methods
+ meta-data
+ validate-all
+EOF
+}
+
+
+# Execulte SQL and return the result.
+exec_sql() {
+ local sql="$1"
+ local output
+ local rc
+
+ output=`$SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
+ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
+ -Atc \"$sql\""`
+ rc=$?
+
+ echo $output
+ return $rc
+}
+
+
+#pgsql_real_start: Starts PostgreSQL
+pgsql_real_start() {
+ local pgctl_options
+ local postgres_options
+ local rc
+
+ pgsql_real_monitor info
+ rc=$?
+ if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
+ ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
+ if is_replication; then
+ return $OCF_ERR_GENERIC
+ else
+ return $OCF_SUCCESS
+ fi
+ fi
+
+ # Remove postmaster.pid if it exists
+ rm -f $PIDFILE
+
+ # Remove backup_label if it exists
+ if [ -f $BACKUPLABEL ] && ! is_replication; then
+ ocf_log info "Removing $BACKUPLABEL. The previous backup might have failed."
+ rm -f $BACKUPLABEL
+ fi
+
+ # Check if we need to create a log file
+ if ! check_log_file $OCF_RESKEY_logfile
+ then
+ ocf_exit_reason "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
+ return $OCF_ERR_PERM
+ fi
+
+ # Check socket directory
+ if [ -n "$OCF_RESKEY_socketdir" ]
+ then
+ check_socket_dir
+ fi
+
+ check_stat_temp_directory
+
+ if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
+ rm -f $RECOVERY_CONF
+ make_recovery_conf || return $OCF_ERR_GENERIC
+ fi
+
+ # Set options passed to pg_ctl
+ pgctl_options="$OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile"
+
+ # Set options passed to the PostgreSQL server process
+ postgres_options="-c config_file=${OCF_RESKEY_config}"
+
+ if [ -n "$OCF_RESKEY_pghost" ]; then
+ postgres_options="$postgres_options -h $OCF_RESKEY_pghost"
+ fi
+ if [ -n "$OCF_RESKEY_start_opt" ]; then
+ postgres_options="$postgres_options $OCF_RESKEY_start_opt"
+ fi
+
+ # Tack pass-through options onto pg_ctl options
+ pgctl_options="$pgctl_options -o '$postgres_options'"
+
+ # Invoke pg_ctl
+ runasowner "unset PGUSER; unset PGPASSWORD; $OCF_RESKEY_pgctl $pgctl_options -W start"
+
+ if [ $? -eq 0 ]; then
+ # Probably started.....
+ ocf_log info "PostgreSQL start command sent."
+ else
+ ocf_exit_reason "Can't start PostgreSQL."
+ return $OCF_ERR_GENERIC
+ fi
+
+ while :
+ do
+ pgsql_real_monitor warn
+ rc=$?
+ if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
+ break;
+ fi
+ sleep 1
+ ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
+ done
+
+ # delete replication slots on all nodes. On master node will be created during promotion.
+ if use_replication_slot; then
+ delete_replication_slots
+ if [ $? -eq $OCF_ERR_GENERIC ]; then
+ ocf_exit_reason "PostgreSQL can't clean up replication_slot."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ ocf_log info "PostgreSQL is started."
+ return $rc
+}
+
+pgsql_replication_start() {
+ local rc
+ local synchronous_standby_names
+
+ # initializing for replication
+ change_pgsql_status "$NODENAME" "STOP"
+ delete_master_baseline
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
+ rm -f ${XLOG_NOTE_FILE}.* $REP_MODE_CONF $RECOVERY_CONF
+ if ! make_recovery_conf || ! delete_xlog_location || ! set_async_mode_all; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ -f $PGSQL_LOCK ]; then
+ ocf_exit_reason "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
+ return $OCF_ERR_GENERIC
+ fi
+
+ # start
+ pgsql_real_start
+ if [ $? -ne $OCF_SUCCESS ]; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ synchronous_standby_names=$(exec_sql "${CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL}")
+ if [ -n "${synchronous_standby_names}" ]; then
+ ocf_exit_reason "Invalid synchronous_standby_names is set in postgresql.conf."
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ change_pgsql_status "$NODENAME" "HS:alone"
+ return $OCF_SUCCESS
+}
+
+#pgsql_start: pgsql_real_start() wrapper for replication
+pgsql_start() {
+ if ! is_replication; then
+ pgsql_real_start
+ return $?
+ else
+ pgsql_replication_start
+ return $?
+ fi
+}
+
+#pgsql_promote: Promote PostgreSQL
+pgsql_promote() {
+ local output
+ local target
+ local rc
+
+ if ! is_replication; then
+ ocf_exit_reason "Not in a replication mode."
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ output=`exec_sql "${CHECK_MS_SQL}"`
+ if [ $? -ne 0 ]; then
+ report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status on promote."
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ "$output" = "f" ]; then
+ ocf_log info "PostgreSQL is already Master. Don't execute promote."
+ return $OCF_SUCCESS
+ fi
+
+ rm -f ${XLOG_NOTE_FILE}.*
+
+ for target in $NODE_LIST; do
+ [ "$target" = "$NODENAME" ] && continue
+ change_data_status "$target" "DISCONNECT"
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ done
+
+ ocf_log info "Creating $PGSQL_LOCK."
+ touch $PGSQL_LOCK
+ show_master_baseline
+
+ if ocf_is_true ${OCF_RESKEY_restart_on_promote}; then
+ ocf_log info "Restarting PostgreSQL instead of promote."
+ #stop : this function returns $OCF_SUCCESS only.
+ pgsql_real_stop slave
+ if "${USE_STANDBY_SIGNAL}"; then
+ rm -f ${OCF_RESKEY_pgdata}/standby.signal
+ else
+ rm -f $RECOVERY_CONF
+ fi
+ pgsql_real_start
+ rc=$?
+ if [ $rc -ne $OCF_RUNNING_MASTER ]; then
+ ocf_exit_reason "Can't start PostgreSQL as primary on promote."
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ change_pgsql_status "$NODENAME" "STOP"
+ fi
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata -W promote"
+ if [ $? -eq 0 ]; then
+ ocf_log info "PostgreSQL promote command sent."
+ else
+ ocf_exit_reason "Can't promote PostgreSQL."
+ return $OCF_ERR_GENERIC
+ fi
+
+ while :
+ do
+ pgsql_real_monitor warn
+ rc=$?
+ if [ $rc -eq $OCF_RUNNING_MASTER ]; then
+ break;
+ elif [ $rc -eq $OCF_ERR_GENERIC ]; then
+ ocf_exit_reason "Can't promote PostgreSQL."
+ return $rc
+ fi
+ sleep 1
+ ocf_log debug "PostgreSQL still hasn't promoted yet. Waiting..."
+ done
+ ocf_log info "PostgreSQL is promoted."
+ fi
+
+ # create replication slots on master after promotion
+ if use_replication_slot; then
+ create_replication_slots
+ if [ $? -eq $OCF_ERR_GENERIC ]; then
+ ocf_exit_reason "PostgreSQL can't create replication_slot."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ change_data_status "$NODENAME" "LATEST"
+ exec_with_retry 0 ocf_promotion_score -v $PROMOTE_ME
+ change_pgsql_status "$NODENAME" "PRI"
+ return $OCF_SUCCESS
+}
+
+#pgsql_demote: Demote PostgreSQL
+pgsql_demote() {
+ local rc
+
+ if ! is_replication; then
+ ocf_exit_reason "Not in a replication mode."
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
+ delete_master_baseline
+
+ if ! pgsql_status; then
+ ocf_log info "PostgreSQL is already stopped on demote."
+ else
+ ocf_log info "Stopping PostgreSQL on demote."
+ pgsql_real_stop master
+ rc=$?
+ if [ "$rc" -ne "$OCF_SUCCESS" ]; then
+ change_pgsql_status "$NODENAME" "UNKNOWN"
+ return $rc
+ fi
+ fi
+ change_pgsql_status "$NODENAME" "STOP"
+ return $OCF_SUCCESS
+}
+
+#pgsql_real_stop: Stop PostgreSQL
+pgsql_real_stop() {
+ local rc
+ local count
+ local stop_escalate
+
+ if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D
+ fi
+
+ if ! pgsql_status
+ then
+ #Already stopped
+ return $OCF_SUCCESS
+ fi
+
+ stop_escalate=$OCF_RESKEY_stop_escalate
+ if [ "$1" = "slave" ]; then
+ stop_escalate="$OCF_RESKEY_stop_escalate_in_slave"
+ fi
+ # adjust stop_escalate time when it is longer than the timeout
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ] && \
+ [ "$stop_escalate" -ge $((OCF_RESKEY_CRM_meta_timeout/1000)) ]; then
+ stop_escalate=$(((OCF_RESKEY_CRM_meta_timeout/1000) - 10))
+ ocf_log info "stop_escalate(or stop_escalate_in_slave) time is adjusted to ${stop_escalate} based on the configured timeout."
+ fi
+
+ # Stop PostgreSQL, do not wait for clients to disconnect
+ if [ $stop_escalate -gt 0 ]; then
+ runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m fast"
+ fi
+
+ # stop waiting
+ count=0
+ while [ $count -lt $stop_escalate ]
+ do
+ if ! pgsql_status
+ then
+ #PostgreSQL stopped
+ break;
+ fi
+ count=`expr $count + 1`
+ sleep 1
+ done
+
+ if pgsql_status
+ then
+ #PostgreSQL is still up. Use another shutdown mode.
+ ocf_log info "PostgreSQL failed to stop after ${stop_escalate}s using -m fast. Trying -m immediate..."
+ runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m immediate"
+ fi
+
+ while :
+ do
+ pgsql_real_monitor
+ rc=$?
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
+ # An unnecessary debug log is prevented.
+ break;
+ fi
+ sleep 1
+ ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
+ done
+
+ # Remove postmaster.pid if it exists
+ rm -f $PIDFILE
+
+ if [ "$1" = "master" -a "$OCF_RESKEY_CRM_meta_notify_slave_uname" = " " ]; then
+ ocf_log info "Removing $PGSQL_LOCK."
+ rm -f $PGSQL_LOCK
+ fi
+ return $OCF_SUCCESS
+}
+
+pgsql_replication_stop() {
+ local rc
+
+ exec_with_retry 5 ocf_promotion_score -v $CAN_NOT_PROMOTE
+ delete_xlog_location
+
+ if ! pgsql_status
+ then
+ ocf_log info "PostgreSQL is already stopped."
+ change_pgsql_status "$NODENAME" "STOP"
+ return $OCF_SUCCESS
+ fi
+
+ pgsql_real_stop slave
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ change_pgsql_status "$NODENAME" "UNKNOWN"
+ return $rc
+ fi
+
+ change_pgsql_status "$NODENAME" "STOP"
+ set_async_mode_all
+ delete_master_baseline
+ return $OCF_SUCCESS
+}
+
+#pgsql_stop: pgsql_real_stop() wrapper for replication
+pgsql_stop() {
+ if ! is_replication; then
+ pgsql_real_stop
+ return $?
+ else
+ pgsql_replication_stop
+ return $?
+ fi
+}
+
+#
+# pgsql_status: is PostgreSQL up?
+#
+
+pgsql_status() {
+ if [ -f $PIDFILE ]
+ then
+ PID=`head -n 1 $PIDFILE`
+ runasowner "kill -s 0 $PID >/dev/null 2>&1"
+ return $?
+ fi
+
+ # No PID file
+ false
+}
+
+pgsql_wal_receiver_status() {
+ local PID
+ local receiver_parent_pids
+ local pgsql_real_monitor_status=$1
+
+ PID=`head -n 1 $PIDFILE`
+ receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al\s*receiver" | cut -d " " -f 3`
+
+ if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal"
+ return 0
+ fi
+
+ if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)"
+ return 0
+ fi
+
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR"
+ ocf_log warn "wal receiver process is not running"
+ return 1
+}
+
+#
+# pgsql_real_monitor
+#
+
+pgsql_real_monitor() {
+ local loglevel
+ local rc
+ local output
+
+ # Set the log level of the error message
+ loglevel=${1:-err}
+
+ if ! pgsql_status
+ then
+ ocf_log info "PostgreSQL is down"
+ return $OCF_NOT_RUNNING
+ fi
+
+ if is_replication; then
+ #Check replication state
+ output=`exec_sql "${CHECK_MS_SQL}"`
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status."
+ return $OCF_ERR_GENERIC
+ fi
+
+ case "$output" in
+ f) ocf_log debug "PostgreSQL is running as a primary."
+ if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
+ if ocf_is_probe; then
+ # Set initial score for primary.
+ exec_with_retry 0 ocf_promotion_score -v $PROMOTE_ME
+ fi
+ return $OCF_RUNNING_MASTER
+ fi
+ ;;
+
+ t) ocf_log debug "PostgreSQL is running as a hot standby."
+ if ocf_is_probe; then
+ # Set initial score for hot standby.
+ exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE
+ fi
+ return $OCF_SUCCESS;;
+
+ *) ocf_exit_reason "$CHECK_MS_SQL output is $output"
+ return $OCF_ERR_GENERIC;;
+ esac
+ fi
+
+ OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"`
+ runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \
+ -c '$OCF_RESKEY_monitor_sql'"
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running."
+ return $OCF_ERR_GENERIC
+ fi
+
+ if is_replication; then
+ return $OCF_RUNNING_MASTER
+ fi
+ return $OCF_SUCCESS
+}
+
+pgsql_replication_monitor() {
+ local rc
+
+ rc=$1
+ if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then
+ return $rc
+ fi
+ # If I am Master
+ if [ $rc -eq $OCF_RUNNING_MASTER ]; then
+ change_data_status "$NODENAME" "LATEST"
+ change_pgsql_status "$NODENAME" "PRI"
+ control_slave_status || return $OCF_ERR_GENERIC
+ if [ "$RE_CONTROL_SLAVE" = "true" ]; then
+ sleep 2
+ ocf_log info "re-controlling slave status."
+ RE_CONTROL_SLAVE="none"
+ control_slave_status || return $OCF_ERR_GENERIC
+ fi
+ return $rc
+ fi
+
+ # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
+ # so I will get master node name using crm_mon -n
+ print_crm_mon | grep -q -i -E "<resource id=\"${RESOURCE_NAME}\" .* role=\"(Promoted|Master)\""
+ if [ $? -ne 0 ] ; then
+ # If I am Slave and Master is not exist
+ ocf_log info "Master does not exist."
+ change_pgsql_status "$NODENAME" "HS:alone"
+ have_master_right
+ if [ $? -eq 0 ]; then
+ rm -f ${XLOG_NOTE_FILE}.*
+ fi
+ else
+ output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \
+ -n "$PGSQL_DATA_STATUS_ATTR" -G -q`
+ if [ "$output" = "DISCONNECT" ]; then
+ change_pgsql_status "$NODENAME" "HS:alone"
+ fi
+ fi
+ return $rc
+}
+
+#pgsql_monitor: pgsql_real_monitor() wrapper for replication
+pgsql_monitor() {
+ local rc
+
+ pgsql_real_monitor
+ rc=$?
+
+ if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
+ pgsql_wal_receiver_status $rc
+ fi
+
+ if ! is_replication; then
+ return $rc
+ else
+ pgsql_replication_monitor $rc
+ return $?
+ fi
+}
+
+# pgsql_post_demote
+pgsql_post_demote() {
+ DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
+ ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE"
+ if [ "$DEMOTE_NODE" != "$NODENAME" ]; then
+ if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then
+ show_master_baseline
+ change_pgsql_status "$NODENAME" "HS:alone"
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+pgsql_pre_promote() {
+ local master_baseline
+ local my_master_baseline
+ local cmp_location
+ local number_of_nodes
+
+ # If my data is newer than new master's one, I fail my resource.
+ PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
+ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
+ number_of_nodes=`echo $NODE_LIST | wc -w`
+ if [ $number_of_nodes -ge 3 -a \
+ "$OCF_RESKEY_rep_mode" = "sync" -a \
+ "$PROMOTE_NODE" != "$NODENAME" ]; then
+ master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \
+ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
+ if [ $? -eq 0 ]; then
+ my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \
+ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
+ # get older location
+ cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
+ sort | head -1`
+ if [ "$cmp_location" != "$my_master_baseline" ]; then
+ # We used to set the failcount to INF for the resource here in
+ # order to move the master to the other node. However, setting
+ # the failcount should be done only by the CRM and so this use
+ # got deprecated in pacemaker version 1.1.17. Now we do the
+ # "ban resource from the node".
+ ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
+ exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+pgsql_notify() {
+ local type="${OCF_RESKEY_CRM_meta_notify_type}"
+ local op="${OCF_RESKEY_CRM_meta_notify_operation}"
+ local rc
+
+ if ! is_replication; then
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log debug "notify: ${type} for ${op}"
+ case $type in
+ pre)
+ case $op in
+ promote)
+ pgsql_pre_promote
+ return $?
+ ;;
+ esac
+ ;;
+ post)
+ case $op in
+ promote)
+ delete_xlog_location
+ PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
+ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
+ if [ "$PROMOTE_NODE" != "$NODENAME" ]; then
+ delete_master_baseline
+ fi
+ return $OCF_SUCCESS
+ ;;
+ demote)
+ pgsql_post_demote
+ return $?
+ ;;
+ start|stop)
+ MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \
+ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
+ if [ "$NODENAME" = "$MASTER_NODE" ]; then
+ control_slave_status
+ fi
+ return $OCF_SUCCESS
+ ;;
+ esac
+ ;;
+ esac
+ return $OCF_SUCCESS
+}
+
+control_slave_status() {
+ local rc
+ local data_status
+ local target
+ local all_data_status
+ local tmp_data_status
+ local number_of_nodes
+
+ all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"`
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ if [ -n "$all_data_status" ]; then
+ all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
+ fi
+ else
+ report_psql_error $rc err "Can't get PostgreSQL replication status."
+ return 1
+ fi
+
+ number_of_nodes=`echo $NODE_LIST | wc -w`
+ for target in $NODE_LIST; do
+ if [ "$target" = "$NODENAME" ]; then
+ continue
+ fi
+
+ data_status="DISCONNECT"
+ if [ -n "$all_data_status" ]; then
+ for tmp_data_status in $all_data_status; do
+ if ! echo $tmp_data_status | grep -q "^${target}|"; then
+ continue
+ fi
+ data_status=`echo $tmp_data_status | cut -d "|" -f 2,3`
+ ocf_log debug "node_name and data_status is $tmp_data_status"
+ break
+ done
+ fi
+
+ case "$data_status" in
+ "STREAMING|SYNC")
+ change_data_status "$target" "$data_status"
+ change_master_score "$target" "$CAN_PROMOTE"
+ change_pgsql_status "$target" "HS:sync"
+ ;;
+ "STREAMING|ASYNC")
+ change_data_status "$target" "$data_status"
+ if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ set_sync_mode "$target"
+ else
+ if [ $number_of_nodes -le 2 ]; then
+ change_master_score "$target" "$CAN_PROMOTE"
+ else
+ # I can't determine which slave's data is newest in async mode.
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ fi
+ fi
+ change_pgsql_status "$target" "HS:async"
+ ;;
+ "STREAMING|POTENTIAL")
+ change_data_status "$target" "$data_status"
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ change_pgsql_status "$target" "HS:potential"
+ ;;
+ "DISCONNECT")
+ change_data_status "$target" "$data_status"
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
+ set_async_mode "$target"
+ fi
+ ;;
+ *)
+ change_data_status "$target" "$data_status"
+ change_master_score "$target" "$CAN_NOT_PROMOTE"
+ if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
+ set_async_mode "$target"
+ fi
+ change_pgsql_status "$target" "HS:connected"
+ ;;
+ esac
+ done
+ return 0
+}
+
+have_master_right() {
+ local old
+ local new
+ local output
+ local data_status
+ local node
+ local mylocation
+ local count
+ local newestXlog
+ local oldfile
+ local newfile
+
+ ocf_log debug "Checking if I have a master right."
+
+ data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \
+ "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
+ if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
+ if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
+ "$data_status" != "LATEST" ]; then
+ ocf_log warn "My data is out-of-date. status=$data_status"
+ return 1
+ fi
+ else
+ if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
+ "$data_status" != "STREAMING|ASYNC" -a \
+ "$data_status" != "LATEST" ]; then
+ ocf_log warn "My data is out-of-date. status=$data_status"
+ return 1
+ fi
+ fi
+ ocf_log info "My data status=$data_status."
+
+ show_xlog_location
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to show my xlog location."
+ exit $OCF_ERR_GENERIC
+ fi
+
+ old=0
+ for count in `seq $OCF_RESKEY_xlog_check_count`; do
+ if [ -f ${XLOG_NOTE_FILE}.$count ]; then
+ old=$count
+ continue
+ fi
+ break
+ done
+ new=`expr $old + 1`
+
+ # get xlog locations of all nodes
+ for node in ${NODE_LIST}; do
+ output=`$CRM_ATTR_REBOOT -N "$node" -n \
+ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null`
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Can't get $node xlog location."
+ continue
+ else
+ ocf_log info "$node xlog location : $output"
+ echo "$node $output" >> ${XLOG_NOTE_FILE}.${new}
+ if [ "$node" = "$NODENAME" ]; then
+ mylocation=$output
+ fi
+ fi
+ done
+
+ oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null`
+ newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null`
+ if [ "$oldfile" != "$newfile" ]; then
+ # reset counter
+ rm -f ${XLOG_NOTE_FILE}.*
+ printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
+ return 1
+ fi
+
+ if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then
+ newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \
+ head -1 | cut -d " " -f 2`
+ if [ "$newestXlog" = "$mylocation" ]; then
+ ocf_log info "I have a master right."
+ exec_with_retry 5 ocf_promotion_score -v $PROMOTE_ME
+ return 0
+ fi
+ change_data_status "$NODENAME" "DISCONNECT"
+ ocf_log info "I don't have correct master data."
+ # reset counter
+ rm -f ${XLOG_NOTE_FILE}.*
+ printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
+ fi
+
+ return 1
+}
+
+is_replication() {
+ if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then
+ return 0
+ fi
+ return 1
+}
+
+use_replication_slot() {
+ if [ -n "$OCF_RESKEY_replication_slot_name" ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+create_replication_slot_name() {
+ local number_of_nodes=0
+ local target
+ local replication_slot_name
+ local replication_slot_name_list_tmp
+ local replication_slot_name_list
+
+ if [ -n "$NODE_LIST" ]; then
+ number_of_nodes=`echo $NODE_LIST | wc -w`
+ fi
+
+ if [ $number_of_nodes -le 0 ]; then
+ replication_slot_name_list=""
+
+ # The Master node should have some slots equal to the number of Slaves, and
+ # the Slave nodes connect to their dedicated slot on the Master.
+ # To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot.
+ # The postfix is "_$target".
+ else
+ for target in $NODE_LIST
+ do
+ if [ "$target" != "$NODENAME" ]; then
+ # The Uppercase, "-" and "." don't allow to use in slot_name.
+ # If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_".
+ target=`echo "$target" | tr 'A-Z.-' 'a-z__'`
+ replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target"
+ replication_slot_name_list_tmp="$replication_slot_name_list"
+ replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name"
+ fi
+ done
+ fi
+
+ echo $replication_slot_name_list
+}
+
+delete_replication_slot(){
+ DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$1');"
+ output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"`
+ return $?
+}
+
+delete_replication_slots() {
+ local replication_slot_name_list
+ local replication_slot_name
+
+ replication_slot_name_list=`create_replication_slot_name`
+ ocf_log debug "replication slot names are $replication_slot_name_list."
+
+ for replication_slot_name in $replication_slot_name_list
+ do
+ if [ `check_replication_slot $replication_slot_name` = "1" ]; then
+ delete_replication_slot $replication_slot_name
+ if [ $? -eq 0 ]; then
+ ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
+ else
+ ocf_exit_reason "$output"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ done
+}
+
+create_replication_slots() {
+ local replication_slot_name
+ local replication_slot_name_list
+ local output
+ local rc
+ local CREATE_REPLICATION_SLOT_sql
+ local DELETE_REPLICATION_SLOT_sql
+
+ replication_slot_name_list=`create_replication_slot_name`
+ ocf_log debug "replication slot names are $replication_slot_name_list."
+
+ for replication_slot_name in $replication_slot_name_list
+ do
+ # If the same name slot is already exists, initialize(delete and create) the slot.
+ if [ `check_replication_slot $replication_slot_name` = "1" ]; then
+ delete_replication_slot $replication_slot_name
+ if [ $? -eq 0 ]; then
+ ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
+ else
+ ocf_exit_reason "$output"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');"
+ output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"`
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)."
+ else
+ ocf_exit_reason "$output"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ return 0
+}
+
+# This function check the replication slot does exists.
+check_replication_slot(){
+ local replication_slot_name=$1
+ local output
+ local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'"
+
+ output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"`
+ echo "$output"
+}
+
+# On postgreSQL 10 or later, "location" means "lsn".
+get_my_location() {
+ local rc
+ local output
+ local replay_loc
+ local receive_loc
+ local output1
+ local output2
+ local log1
+ local log2
+ local newer_location
+
+ output=`exec_sql "$CHECK_XLOG_LOC_SQL"`
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ report_psql_error $rc err "Can't get my xlog location."
+ return 1
+ fi
+ replay_loc=`echo $output | cut -d "|" -f 1`
+ receive_loc=`echo $output | cut -d "|" -f 2`
+
+ output1=`echo "$replay_loc" | cut -d "/" -f 1`
+ output2=`echo "$replay_loc" | cut -d "/" -f 2`
+ log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
+ log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
+ replay_loc="${log1}${log2}"
+
+ output1=`echo "$receive_loc" | cut -d "/" -f 1`
+ output2=`echo "$receive_loc" | cut -d "/" -f 2`
+ log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
+ log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
+ receive_loc="${log1}${log2}"
+
+ newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1`
+ echo "$newer_location"
+ return 0
+}
+
+# On postgreSQL 10 or later, "xlog_location" means "wal_lsn".
+show_xlog_location() {
+ local location
+
+ location=`get_my_location` || return 1
+ exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location"
+}
+
+# On postgreSQL 10 or later, "xlog_location" means "wal_lsn".
+delete_xlog_location() {
+ exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D
+}
+
+show_master_baseline() {
+ local rc
+ local location
+
+ location=`get_my_location`
+ ocf_log info "My master baseline : $location."
+ exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location"
+}
+
+delete_master_baseline() {
+ exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D
+}
+
+set_async_mode_all() {
+ [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0
+ ocf_log info "Set all nodes into async mode."
+ runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Can't set all nodes into async mode."
+ return 1
+ fi
+ return 0
+}
+
+set_async_mode() {
+ cat $REP_MODE_CONF | grep -q -E "(\"$1\")|([,' ]$1[,' ])"
+ if [ $? -eq 0 ]; then
+ ocf_log info "Setup $1 into async mode."
+ runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
+ else
+ ocf_log debug "$1 is already in async mode."
+ return 0
+ fi
+ exec_with_retry 0 reload_conf
+}
+
+set_sync_mode() {
+ local sync_node_in_conf
+
+ sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
+ if [ -n "$sync_node_in_conf" ]; then
+ ocf_log debug "$sync_node_in_conf is already sync mode."
+ else
+ ocf_log info "Setup $1 into sync mode."
+ runasowner -q err "echo \"synchronous_standby_names = '\\\"$1\\\"'\" > \"$REP_MODE_CONF\""
+ [ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true"
+ exec_with_retry 0 reload_conf
+ fi
+}
+
+reload_conf() {
+ # Invoke pg_ctl
+ runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload"
+ if [ $? -eq 0 ]; then
+ ocf_log info "Reload configuration file."
+ else
+ ocf_exit_reason "Can't reload configuration file."
+ return 1
+ fi
+
+ return 0
+}
+
+user_recovery_conf() {
+ local nodename_tmp
+
+ # put archive_cleanup_command and recovery_end_command only when defined by user
+ if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
+ echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
+ fi
+ if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
+ echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
+ fi
+
+ if use_replication_slot; then
+ nodename_tmp=`echo "$NODENAME" | tr 'A-Z.-' 'a-z__'`
+ echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'"
+ fi
+}
+
+make_recovery_conf() {
+ runasowner "touch $RECOVERY_CONF"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Can't create recovery.conf."
+ return 1
+ fi
+
+cat > $RECOVERY_CONF <<END
+primary_conninfo = 'host=${OCF_RESKEY_master_ip} port=${OCF_RESKEY_pgport} user=${OCF_RESKEY_repuser} application_name=${NODENAME} ${OCF_RESKEY_primary_conninfo_opt}'
+restore_command = '${OCF_RESKEY_restore_command}'
+recovery_target_timeline = 'latest'
+END
+
+ if "${USE_STANDBY_SIGNAL}"; then
+ # create a standby.signal to start standby server.
+ runasowner "touch ${OCF_RESKEY_pgdata}/standby.signal"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Can't create ${OCF_RESKEY_pgdata}/standby.signal."
+ return 1
+ fi
+ else
+cat >> $RECOVERY_CONF <<END
+standby_mode = 'on'
+END
+ fi
+
+ user_recovery_conf >> $RECOVERY_CONF
+ ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}"
+ return 0
+}
+
+# change pgsql-status.
+# arg1:node, arg2: value
+change_pgsql_status() {
+ local output
+
+ if ! is_node_online $1; then
+ return 0
+ fi
+
+ output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null`
+ if [ "$output" != "$2" ]; then
+ # If slave's disk is broken, RA cannot read PID file
+ # and misjudges the PostgreSQL as down while it is running.
+ # It causes overwriting of pgsql-status by Master because replication is still connected.
+ if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then
+ if [ "$1" != "$NODENAME" ]; then
+ ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited."
+ return 0
+ fi
+ fi
+ ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2."
+ exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2"
+ fi
+ return 0
+}
+
+# change pgsql-data-status.
+# arg1:node, arg2: value
+change_data_status() {
+ local output
+
+ if ! node_exist $1; then
+ return 0
+ fi
+
+ while :
+ do
+ output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
+ if [ "$output" != "$2" ]; then
+ ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2."
+ exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2"
+ else
+ break
+ fi
+ done
+ return 0
+}
+
+# set master-score
+# arg1:node, arg2: score, arg3: resoure
+set_master_score() {
+ local current_score
+
+ current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
+ if [ -n "$current_score" -a "$current_score" != "$2" ]; then
+ ocf_log info "Changing $3 master score on $1 : $current_score->$2."
+ exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2"
+ fi
+ return 0
+}
+
+# change master-score
+# arg1:node, arg2: score
+change_master_score() {
+ local instance
+
+ if ! is_node_online $1; then
+ return 0
+ fi
+
+ if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then
+ # If Pacemaker version is 1.0.x
+ instance=0
+ while :
+ do
+ if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then
+ break
+ fi
+ if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then
+ instance=`expr $instance + 1`
+ continue
+ fi
+ set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1
+ instance=`expr $instance + 1`
+ done
+ else
+ # If globally-unique=false and Pacemaker version is 1.1.8 or higher
+ # Master/Slave resource has no instance number
+ set_master_score $1 $2 ${RESOURCE_NAME} || return 1
+ fi
+ return 0
+}
+
+report_psql_error()
+{
+ local rc
+ local loglevel
+ local message
+
+ rc=$1
+ loglevel=${2:-err}
+ message="$3"
+
+ ocf_log $loglevel "$message rc=$rc"
+ if [ $rc -eq 1 ]; then
+ ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
+ elif [ $rc -eq 2 ]; then
+ ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command."
+ elif [ $rc -eq 3 ]; then
+ ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
+ fi
+}
+
+#
+# timeout management function
+# arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.)
+# arg2 : command
+# arg3 : command's args
+exec_with_timeout() {
+ local func_pid
+ local count=$OCF_RESKEY_crm_attr_timeout
+ local rc
+
+ if [ "$1" -ne 0 ]; then
+ count=$1
+ fi
+ shift
+
+ $* &
+ func_pid=$!
+ sleep .1
+
+ while kill -s 0 $func_pid >/dev/null 2>&1; do
+ sleep 1
+ count=`expr $count - 1`
+ if [ $count -le 0 ]; then
+ ocf_exit_reason "\"$*\" (pid=$func_pid) timed out."
+ kill -s 9 $func_pid >/dev/null 2>&1
+ return 1
+ fi
+ ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)."
+ done
+ wait $func_pid
+}
+
+# retry command when command doesn't return 0
+# arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
+# arg2..argN : command and args
+exec_with_retry() {
+ local count="86400"
+ local output
+ local rc
+
+ if [ "$1" -ne 0 ]; then
+ count=$1
+ fi
+ shift
+
+ while [ $count -gt 0 ]; do
+ output=`$*`
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
+ count=`expr $count - 1`
+ sleep 1
+ else
+ printf "${output}"
+ return 0
+ fi
+ done
+
+ ocf_exit_reason "giving up executing \"$*\""
+ return $rc
+}
+
+is_node_online() {
+ print_crm_mon | grep -q -i "<node name=\"$1\" .* online=\"true\""
+}
+
+node_exist() {
+ print_crm_mon | grep -q -i "<node name=\"$1\" .* online"
+}
+
+check_binary2() {
+ if ! have_binary "$1"; then
+ ocf_exit_reason "Setup problem: couldn't find command: $1"
+ return 1
+ fi
+ return 0
+}
+
+check_config() {
+ local rc=0
+
+ if [ ! -f "$1" ]; then
+ if ocf_is_probe; then
+ ocf_log info "Unable to read $1 during probe."
+ rc=1
+ else
+ ocf_exit_reason "Configuration file $1 doesn't exist"
+ rc=2
+ fi
+ fi
+
+ return $rc
+}
+
+validate_ocf_check_level_10() {
+ local version
+ local check_config_rc
+ local rep_mode_string
+ local recovery_conf_string
+ local socket_directories
+ local rc
+
+ version=`cat $OCF_RESKEY_pgdata/PG_VERSION`
+
+ if ! check_binary2 "$OCF_RESKEY_pgctl" ||
+ ! check_binary2 "$OCF_RESKEY_psql"; then
+ return $OCF_ERR_INSTALLED
+ fi
+
+ check_config "$OCF_RESKEY_config"
+ check_config_rc=$?
+ [ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED
+ if [ $check_config_rc -eq 0 ]; then
+ ocf_version_cmp "$version" "9.3"
+ if [ $? -eq 0 ]; then
+ : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`}
+ else
+ # unix_socket_directories is used by PostgreSQL 9.3 or higher.
+ socket_directories=`get_pgsql_param unix_socket_directories`
+ if [ -n "$socket_directories" ]; then
+ # unix_socket_directories may have multiple socket directories and the pgsql RA can not know which directory is used for psql command.
+ # Therefore, the user must set OCF_RESKEY_socketdir explicitly.
+ if [ -z "$OCF_RESKEY_socketdir" ]; then
+ ocf_exit_reason "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf."
+ return $OCF_ERR_CONFIGURED
+ fi
+ fi
+ fi
+ fi
+
+ if ocf_is_probe; then
+ ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
+ else
+ if ! runasowner "test -w $OCF_RESKEY_pgdata"; then
+ ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
+ return $OCF_ERR_PERM;
+ fi
+ fi
+
+ if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
+ if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
+ ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
+ return $OCF_ERR_INSTALLED
+ fi
+ ocf_version_cmp "$version" "12"
+ rc=$?
+ if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then
+ # change the standby method for PosrgreSQL 12 or later.
+ USE_STANDBY_SIGNAL=true
+ # change the path to recovery.conf because it cause PostgreSQL start error.
+ RECOVERY_CONF=${OCF_RESKEY_tmpdir}/recovery.conf
+ if [ $check_config_rc -eq 0 ]; then
+ # adding recovery parameters to postgresql.conf.
+ recovery_conf_string="include '$RECOVERY_CONF' # added by pgsql RA"
+ if ! grep -q "^[[:space:]]*$recovery_conf_string" $OCF_RESKEY_config; then
+ ocf_log info "adding include directive $recovery_conf_string into $OCF_RESKEY_config"
+ echo "$recovery_conf_string" >> $OCF_RESKEY_config
+ fi
+ fi
+ fi
+ if [ ! -n "$OCF_RESKEY_master_ip" ]; then
+ ocf_exit_reason "master_ip can't be empty."
+ return $OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ if is_replication; then
+ REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf
+ PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock
+ XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note
+
+ CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
+ CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
+ CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource"
+
+ CAN_NOT_PROMOTE="-INFINITY"
+ CAN_PROMOTE="100"
+ PROMOTE_ME="1000"
+
+ CHECK_MS_SQL="select pg_is_in_recovery()"
+ CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL="show synchronous_standby_names"
+ ocf_version_cmp "$version" "10"
+ rc=$?
+ if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then
+ CHECK_XLOG_LOC_SQL="select pg_last_wal_replay_lsn(),pg_last_wal_receive_lsn()"
+ else
+ CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()"
+ fi
+ CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication"
+
+ PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status"
+ PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status"
+ PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc"
+ PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline"
+
+ NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'`
+ RE_CONTROL_SLAVE="false"
+
+ if ! ocf_is_ms; then
+ ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration."
+ return $OCF_ERR_CONFIGURED
+ fi
+ if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then
+ ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if [ ! -n "$NODE_LIST" ]; then
+ ocf_exit_reason "node_list can't be empty."
+ return $OCF_ERR_CONFIGURED
+ fi
+ if [ $check_config_rc -eq 0 ]; then
+ rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA"
+ if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
+ if ! grep -q "^[[:space:]]*$rep_mode_string" $OCF_RESKEY_config; then
+ ocf_log info "adding include directive into $OCF_RESKEY_config"
+ echo "$rep_mode_string" >> $OCF_RESKEY_config
+ fi
+ else
+ if grep -q "$rep_mode_string" $OCF_RESKEY_config; then
+ ocf_log info "deleting include directive from $OCF_RESKEY_config"
+ rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'`
+ sed -i "/$rep_mode_string/d" $OCF_RESKEY_config
+ fi
+ fi
+ fi
+ if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then
+ ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
+ return $OCF_ERR_PERM
+ fi
+ fi
+
+ if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
+ if ocf_is_ms; then
+ ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration."
+ return $OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ if use_replication_slot; then
+ ocf_version_cmp "$version" "9.4"
+ rc=$?
+ if [ $rc -eq 0 ]||[ $rc -eq 3 ]; then
+ ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher."
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ echo "$OCF_RESKEY_replication_slot_name" | grep -q -e '[^a-z0-9_]'
+ if [ $? -eq 0 ]; then
+ ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character."
+ return $OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# Validate most critical parameters
+pgsql_validate_all() {
+ local rc
+
+ getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
+ if [ ! $? -eq 0 ]; then
+ ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
+ return $OCF_ERR_INSTALLED;
+ fi
+
+ if [ -n "$OCF_RESKEY_monitor_user" ] && [ -z "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor password can't be empty"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -z "$OCF_RESKEY_monitor_user" ] && [ -n "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor_user has to be set if monitor_password is set"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ validate_ocf_check_level_10
+ rc=$?
+ [ $rc -ne "$OCF_SUCCESS" ] && exit $rc
+ fi
+
+ return $OCF_SUCCESS
+}
+
+
+#
+# Check if we need to create a log file
+#
+
+check_log_file() {
+ if [ ! -e "$1" ]
+ then
+ touch $1 > /dev/null 2>&1
+ chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1
+ fi
+
+ #Check if $OCF_RESKEY_pgdba can write to the log file
+ if ! runasowner "test -w $1"
+ then
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Check if we need to create stats temp directory in tmpfs
+#
+
+check_stat_temp_directory() {
+ local stats_temp
+
+ stats_temp=`get_pgsql_param stats_temp_directory`
+
+ if [ -z "$stats_temp" ]; then
+ return
+ fi
+
+ if [ "${stats_temp#/}" = "$stats_temp" ]; then
+ stats_temp="$OCF_RESKEY_pgdata/$stats_temp"
+ fi
+
+ if [ -d "$stats_temp" ]; then
+ return
+ fi
+
+ if ! mkdir -p "$stats_temp"; then
+ ocf_exit_reason "Can't create directory $stats_temp"
+ exit $OCF_ERR_PERM
+ fi
+
+ if ! chown $OCF_RESKEY_pgdba: "$stats_temp"; then
+ ocf_exit_reason "Can't change ownership for $stats_temp"
+ exit $OCF_ERR_PERM
+ fi
+
+ if ! chmod 700 "$stats_temp"; then
+ ocf_exit_reason "Can't change permissions for $stats_temp"
+ exit $OCF_ERR_PERM
+ fi
+}
+
+#
+# Check socket directory
+#
+check_socket_dir() {
+ if [ ! -d "$OCF_RESKEY_socketdir" ]; then
+ if ! mkdir "$OCF_RESKEY_socketdir"; then
+ ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir"
+ exit $OCF_ERR_PERM
+ fi
+
+ if ! chown $OCF_RESKEY_pgdba:`getent passwd \
+ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir"
+ then
+ ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir"
+ exit $OCF_ERR_PERM
+ fi
+
+ if ! chmod 2775 "$OCF_RESKEY_socketdir"; then
+ ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir"
+ exit $OCF_ERR_PERM
+ fi
+ else
+ if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then
+ ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
+ exit $OCF_ERR_PERM
+ fi
+ rm $OCF_RESKEY_socketdir/test.$$
+ fi
+}
+
+print_crm_mon() {
+ if [ -z "$CRM_MON_OUTPUT" ]; then
+ ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0"
+ res=$?
+ if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then
+ XMLOPT="--output-as=xml"
+ ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0"
+ if [ $? -eq 1 ]; then
+ crm_mon -1 $XMLOPT >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ XMLOPT="--as-xml"
+ fi
+ fi
+ else
+ XMLOPT="--as-xml"
+ fi
+ CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -1 $XMLOPT`
+ fi
+ printf "${CRM_MON_OUTPUT}\n"
+}
+
+#
+# 'main' starts here...
+#
+
+
+if [ $# -ne 1 ]
+then
+ usage
+ exit $OCF_ERR_GENERIC
+fi
+
+PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
+BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label
+RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
+PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status"
+RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf
+NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]')
+USE_STANDBY_SIGNAL=false
+
+case "$1" in
+ methods) pgsql_methods
+ exit $?;;
+
+ meta-data) meta_data
+ exit $OCF_SUCCESS;;
+esac
+
+[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10
+pgsql_validate_all
+rc=$?
+
+[ "$1" = "validate-all" ] && exit $rc
+
+if [ $rc -ne 0 ]
+then
+ case "$1" in
+ stop) if is_replication; then
+ change_pgsql_status "$NODENAME" "UNKNOWN"
+ fi
+ exit $OCF_SUCCESS;;
+ monitor) exit $OCF_NOT_RUNNING;;
+ status) exit $OCF_NOT_RUNNING;;
+ *) exit $rc;;
+ esac
+fi
+
+US=`id -u -n`
+
+if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
+then
+ ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba"
+ exit $OCF_ERR_GENERIC
+fi
+
+# make psql command options
+if [ -n "$OCF_RESKEY_monitor_user" ]; then
+ PGUSER=$OCF_RESKEY_monitor_user; export PGUSER
+ PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD
+ psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb"
+else
+ psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb"
+fi
+
+if [ -n "$OCF_RESKEY_pghost" ]; then
+ psql_options="$psql_options -h $OCF_RESKEY_pghost"
+else
+ if [ -n "$OCF_RESKEY_socketdir" ]; then
+ psql_options="$psql_options -h $OCF_RESKEY_socketdir"
+ fi
+fi
+
+if [ -n "$OCF_RESKEY_pgport" ]; then
+ export PGPORT=$OCF_RESKEY_pgport
+fi
+
+if [ -n "$OCF_RESKEY_pglibs" ]; then
+ if [ -n "$LD_LIBRARY_PATH" ]; then
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs
+ else
+ export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs
+ fi
+fi
+
+
+# What kind of method was invoked?
+case "$1" in
+ status) if pgsql_status
+ then
+ ocf_log info "PostgreSQL is up"
+ exit $OCF_SUCCESS
+ else
+ ocf_log info "PostgreSQL is down"
+ exit $OCF_NOT_RUNNING
+ fi;;
+
+ monitor) pgsql_monitor
+ exit $?;;
+
+ start) pgsql_start
+ exit $?;;
+
+ promote) pgsql_promote
+ exit $?;;
+
+ demote) pgsql_demote
+ exit $?;;
+
+ notify) pgsql_notify
+ exit $?;;
+
+ stop) pgsql_stop
+ exit $?;;
+ *)
+ exit $OCF_ERR_UNIMPLEMENTED;;
+esac