diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile.am | 27 | ||||
-rw-r--r-- | doc/README.webapps | 143 | ||||
-rw-r--r-- | doc/dev-guides/ra-dev-guide-docinfo.xml | 89 | ||||
-rw-r--r-- | doc/dev-guides/ra-dev-guide.asc | 2072 | ||||
-rw-r--r-- | doc/dev-guides/writing-python-agents.md | 90 | ||||
-rw-r--r-- | doc/man/Makefile.am | 263 | ||||
-rwxr-xr-x | doc/man/mkappendix.sh | 18 | ||||
-rw-r--r-- | doc/man/ra2refentry.xsl | 649 | ||||
-rwxr-xr-x | doc/man/ralist.sh | 9 |
9 files changed, 3360 insertions, 0 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..dd9644b --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,27 @@ +# +# doc: Linux-HA resource agents +# +# Copyright (C) 2009 Florian Haas +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +SUBDIRS = man + +MAINTAINERCLEANFILES = Makefile.in + +EXTRA_DIST = $(doc_DATA) + +doc_DATA = README.webapps diff --git a/doc/README.webapps b/doc/README.webapps new file mode 100644 index 0000000..bd9f534 --- /dev/null +++ b/doc/README.webapps @@ -0,0 +1,143 @@ +Monitoring web applications with the Apache RA + +One of typical uses of apache is as an interface to the one or +the other kind of web application. It could be expressed thus in +terms of a resource group: + + IP address + apache + web_app + +where web_app is a JSP application (tomcat,jeronimo) or similar. + +Rumour has it that the web applications suffer from occasional +instability which may make them an administration nightmare. But, +typical remedy is simply an application restart. + +How do we increase availability in this situation? + +The web applications are most commonly represented as one or more +processes in a UNIX environment. The afore mentioned instability +is most commonly not reflected in the process state. Hence, +checking the process status makes us no wiser. What could help, +though, is probing the application just as our unhappy user +does---through the web interface. We can ask the application +developers to provide a URL which should exercise the application +and then provide predictable output. + +Now, given our generic resource group and the failed web +application, which we established using a http client, we have +the following situation: + + IP address + apache FAILED + web_app + +Some might argue that it's not apache that is the culprit or has +failed, but this nevertheless should serve our purpose well. The +cluster will stop web_app and apache and then start them, either +on the some node or elsewhere. There's an extra apache restart +which was not needed, but then again it cannot really hurt. + +What to monitor? + +Choose carefully the URL to monitor. It should probe exactly what +is further up in the resource group, no more and no less. In +other words, if you have a database backend running elsewhere, it +would be of no use to specify a URL which depends on the +database. You should monitor only what is within reach. + +Configuration and usage + +It is possible to configure the monitoring either through CIB or +using an extra configuration file. If your monitoring spec +consists only of a URL and a regular expression to be matched in +the output, then something like this should suffice: + +primitive apache_a1 ocf:heartbeat:apache \ + params configfile="/apps/a1.conf" \ + op monitor interval=120s timeout=60s start-delay=120s \ + OCF_CHECK_LEVEL=10 testurl="/webapp1_mon" testregex="This application is alive" + +The testurl parameter is where we connect and the testregex is +what we should look for. The OCF_CHECK_LEVEL must be set to "10". +Note that testurl specifies a URL which is relative to where the +apache listens for connections. Obviously, this should be +preferred to specifying the full URL. + +It is important to set start-delay to a value larger than the +time needed to start the web application (the next resource). If +we don't, then the first monitor operation is likely to fail. + +In case you need more complex configuration, it can be set +in an extra configuration file: + +primitive apache_a1 ocf:heartbeat:apache \ + params configfile="/apps/a1.conf" testconffile="/apps/webmon.cf" \ + op monitor ... OCF_CHECK_LEVEL=10 + +/etc/apache2/webmon.cf: + +test webapp1 +url /webapp1_mon +match This application is alive +client curl +end + +This test configuration is equivalent to the first one, it's just +that in the latter we want to use curl(1) as an http client +instead of wget(1). + +Another example: + +test webapp1 +url /webapp1_mon +match This application is alive +client curl +client_opts --header 'Host: www.webapp1.megacorp.com' +end + +Here we use the curl's --header option to specify the virtual +host we want to talk to. + +It is also possible to set the credentials using the "user" and +"password" keywords. + +The configuration file may contain more than one test definition +which is handy in case one should monitor more than one web +application. In that case you should specify the test name in the +CIB: + +primitive apache_common ocf:heartbeat:apache \ + params configfile="/apps/httpd.conf" testconffile="/apps/webmon.cf" \ + op monitor ... OCF_CHECK_LEVEL=10 testname="a1" \ + op monitor ... OCF_CHECK_LEVEL=10 testname="b1" + +The apache OCF RA supports wget(1) (the default) and curl(1) http +clients. If neither will do, then you can specify your own using +the client and client_opts keywords. Your client must allow URL +as the last parameter and it must dump output from the web server +to stdout. + +All configuration file keywords: + +test The name of the text. +url The url to test. If it doesn't start with http, it's + considered to be relative to the apache Listen directive. +match The regular expression to match. +user Username to authenticate with. +password Password to authenticate with. +client The http client. +client_opts Options for the http client. +end Marks the end of the test definition. +# Comment. May be used only at the start of line. + +Notes + +We could support more depth levels, but it is not clear if +anybody really needs that. Different check levels could be +defined as different monitor operations. + +In case you are using the external configuration file, don't +forget to replicate it to all cluster members and to keep it +synchronized. diff --git a/doc/dev-guides/ra-dev-guide-docinfo.xml b/doc/dev-guides/ra-dev-guide-docinfo.xml new file mode 100644 index 0000000..a003fc3 --- /dev/null +++ b/doc/dev-guides/ra-dev-guide-docinfo.xml @@ -0,0 +1,89 @@ +<author> + <firstname>Florian</firstname> + <surname>Haas</surname> + <affiliation> + <orgname>hastexo</orgname> + </affiliation> + <email>florian.haas@hastexo.com</email> +</author> +<collab> + <firstname>John</firstname> + <surname>Shi</surname> + <affiliation> + <orgname>SUSE</orgname> + </affiliation> + <contrib>Original ocft README</contrib> + <email>jshi@suse.com</email> +</collab> +<collab> + <firstname>Dejan</firstname> + <surname>Muhamedagic</surname> + <affiliation> + <orgname>SUSE</orgname> + </affiliation> + <contrib>ocft documentation rewrite</contrib> + <email>dmuhamedagic@suse.com</email> +</collab> +<copyright> + <year>2010</year> + <year>2011</year> + <holder> + <ulink url="http://www.linbit.com">LINBIT HA-Solutions GmbH</ulink> + </holder> +</copyright> +<copyright> + <year>2011</year> + <holder> + <ulink url="http://www.novell.com">Novell, Inc.</ulink> + </holder> +</copyright> +<copyright> + <year>2011</year> + <holder> + <ulink url="http://www.suse.com">SUSE Linux GmbH</ulink> + </holder> +</copyright> +<copyright> + <year>2011</year> + <holder> + <ulink url="http://www.hastexo.com">hastexo Professional Services GmbH</ulink> + </holder> +</copyright> +<legalnotice> + <title>License information</title> + <para>The text of and illustrations in this document are licensed + under a Creative Commons Attribution–Share Alike 3.0 Unported + license ("CC-BY-SA").</para> + <itemizedlist> + <listitem> + <para>A summary of CC-BY-SA is available at <ulink + url="http://creativecommons.org/licenses/by-sa/3.0/"/>.</para> + </listitem> + <listitem> + <para>The full license text is available at <ulink + url="http://creativecommons.org/licenses/by-sa/3.0/legalcode"/>.</para> + </listitem> + <listitem> + <para>In accordance with CC-BY-SA, if you distribute this document + or an adaptation of it, you must provide the URL for the original + version.</para> + </listitem> + </itemizedlist> +</legalnotice> +<revhistory> + <revision> + <revnumber>1.0.2</revnumber> + <date>November 18, 2011</date> + <authorinitials>FGH</authorinitials> + </revision> + <revision> + <revnumber>1.0.1</revnumber> + <date>January 3, 2011</date> + <authorinitials>FGH</authorinitials> + </revision> + <revision> + <revnumber>1.0.0</revnumber> + <date>December 13, 2010</date> + <authorinitials>FGH</authorinitials> + </revision> +</revhistory> diff --git a/doc/dev-guides/ra-dev-guide.asc b/doc/dev-guides/ra-dev-guide.asc new file mode 100644 index 0000000..7a788b6 --- /dev/null +++ b/doc/dev-guides/ra-dev-guide.asc @@ -0,0 +1,2072 @@ += The OCF Resource Agent Developer's Guide + +== Introduction + +This document is to serve as a guide and reference for all developers, +maintainers, and contributors working on OCF (Open Cluster Framework) +compliant cluster resource agents. It explains the anatomy and general +functionality of a resource agent, illustrates the resource agent API, +and provides valuable hints and tips to resource agent authors. + +=== What is a resource agent? + +A resource agent is an executable that manages a cluster resource. No +formal definition of a cluster resource exists, other than "anything a +cluster manages is a resource." Cluster resources can be as diverse as +IP addresses, file systems, database services, and entire virtual +machines -- to name just a few examples. + +=== Who or what uses a resource agent? + +Any Open Cluster Framework (OCF) compliant cluster management +application is capable of managing resources using the resource agents +described in this document. At the time of writing, two OCF compliant +cluster management applications exist for the Linux platform: + +* _Pacemaker_, a cluster manager supporting both the Corosync and + Heartbeat cluster messaging frameworks. Pacemaker evolved out of the + Linux-HA project. +* _RGmanager_, the cluster manager bundled in Red Hat Cluster + Suite. It supports the Corosync cluster messaging framework + exclusively. + +=== Which language is a resource agent written in? + +An OCF compliant resource agent can be implemented in _any_ +programming language. The API is not language specific. However, most +resource agents are implemented as shell scripts, which is why this +guide primarily uses example code written in shell language. + +=== Is there a naming convention? + +Yes! We have agreed to the following convention for resource agent +names: Please name resource agents using lower case letters, with +words separated by dashes (+example-agent-name+). + +Existing agents may or may not follow this convention, but it is the +intention to make sure future agents follow this rule. + +== API definitions + +=== Environment variables + +A resource agent receives all configuration information about the +resource it manages via environment variables. The names of these +environment variables are always the name of the resource parameter, +prefixed with +OCF_RESKEY_+. For example, if the resource has an +ip+ +parameter set to +192.168.1.1+, then the resource agent will have +access to an environment variable +OCF_RESKEY_ip+ holding that value. + +For any resource parameter that is not required to be set by the user +-- that is, its parameter definition in the resource agent metadata +does not specify +required="true"+ -- then the resource agent must + +* Provide a reasonable default. This should be advertised in the + metadata. By convention, the resource agent uses a variable named + +OCF_RESKEY_<parametername>_default+ that holds this default. +* Alternatively, cater correctly for the value being empty. + +In addition, the cluster manager may also support _meta_ resource +parameters. These do not apply directly to the resource configuration, +but rather specify _how_ the cluster resource manager is expected to manage +the resource. For example, the Pacemaker cluster manager uses the ++target-role+ meta parameter to specify whether the resource should be +started or stopped. + +Meta parameters are passed into the resource agent in the ++OCF_RESKEY_CRM_meta_+ namespace, with any hypens converted to +underscores. Thus, the +target-role+ attribute maps to an environment +variable named +OCF_RESKEY_CRM_meta_target_role+. + +The <<_script_variables>> section contains other system environment +variables. + +=== Actions + +Any resource agent must support one command-line argument which +specifies the action the resource agent is about to execute. The +following actions must be supported by any resource agent: + +* +start+ -- starts the resource. +* +stop+ -- shuts down the resource. +* +monitor+ -- queries the resource for its state. +* +meta-data+ -- dumps the resource agent metadata. + +In addition, resource agents may optionally support the following +actions: + +* +promote+ -- turns a resource into the +Master+ role (Master/Slave + resources only). +* +demote+ -- turns a resource into the +Slave+ role (Master/Slave + resources only). +* +migrate_to+ and +migrate_from+ -- implement live migration of + resources. +* +validate-all+ -- validates a resource's configuration. +* +usage+ or +help+ -- displays a usage message when the resource + agent is invoked from the command line, rather than by the cluster + manager. +* +notify+ -- inform resource about changes in state of other clones. +* +status+ -- historical (deprecated) synonym for +monitor+. + +=== Timeouts + +Action timeouts are enforced outside the resource agent proper. It is +the cluster manager's responsibility to monitor how long a resource +agent action has been running, and terminate it if it does not meet +its completion deadline. Thus, resource agents need not themselves +check for any timeout expiry. + +Resource agents can, however, _advise_ the user of sensible timeout +values (which, when correctly set, will be duly enforced by the +cluster manager). See <<_metadata,the following section>> for details +on how a resource agent advertises its suggested timeouts. + +=== Metadata + +Every resource agent must describe its own purpose and supported +parameters in a set of XML metadata. This metadata is used by cluster +management applications for on-line help, and resource agent man pages +are generated from it as well. The following is a fictitious set of +metadata from an imaginary resource agent: + +[source,xml] +-------------------------------------------------------------------------- +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="foobar" version="0.1"> + <version>1.0</version> + <longdesc lang="en"> +This is a fictitious example resource agent written for the +OCF Resource Agent Developers Guide. + </longdesc> + <shortdesc lang="en">Example resource agent + for budding OCF RA developers</shortdesc> + <parameters> + <parameter name="eggs" unique="0" required="1"> + <longdesc lang="en"> + Number of eggs, an example numeric parameter + </longdesc> + <shortdesc lang="en">Number of eggs</shortdesc> + <content type="integer"/> + </parameter> + <parameter name="superfrobnicate" unique="0" required="0"> + <longdesc lang="en"> + Enable superfrobnication, an example boolean parameter + </longdesc> + <shortdesc lang="en">Enable superfrobnication</shortdesc> + <content type="boolean" default="false"/> + </parameter> + <parameter name="datadir" unique="0" required="1"> + <longdesc lang="en"> + Data directory, an example string parameter + </longdesc> + <shortdesc lang="en">Data directory</shortdesc> + <content type="string"/> + </parameter> + </parameters> + <actions> + <action name="start" timeout="20" /> + <action name="stop" timeout="20" /> + <action name="monitor" timeout="20" + interval="10" depth="0" /> + <action name="notify" timeout="20" /> + <action name="reload" timeout="20" /> + <action name="migrate_to" timeout="20" /> + <action name="migrate_from" timeout="20" /> + <action name="meta-data" timeout="5" /> + <action name="validate-all" timeout="20" /> + </actions> +</resource-agent> +-------------------------------------------------------------------------- + +The +resource-agent+ element, of which there must only be one per +resource agent, defines the resource agent +name+ and +version+. The ++version+ element specifies the OCF version standard the metadata complies +with. + +The +longdesc+ and +shortdesc+ elements in +resource-agent+ provide a +long and short description of the resource agent's +functionality. While +shortdesc+ is a one-line description of what +the resource agent does and is usually used in terse listings, ++longdesc+ should give a full-blown description of the resource agent +in as much detail as possible. + +The +parameters+ element describes the resource agent parameters, and +should hold any number of +parameter+ children -- one for each +parameter that the resource agent supports. + +Every +parameter+ should, like the +resource-agent+ as a whole, come +with a +shortdesc+ and a +longdesc+, and also a +content+ child that +describes the parameter's expected content. + +On the +content+ element, there may be four different attributes: + +* +type+ describes the parameter type (+string+, +integer+, or + +boolean+). If unset, +type+ defaults to +string+. + +* +required+ indicates whether setting the parameter is mandatory + (+required="true"+) or optional (+required="false"+). + +* For optional parameters, it is customary to provide a sensible + default via the +default+ attribute. + +* Finally, the +unique+ attribute (allowed values: +true+ or +false+) + indicates that a specific value must be unique across the cluster, + for this parameter of this particular resource type. For example, a + highly available floating IP address is declared +unique+ -- as that + one IP address should run only once throughout the cluster, avoiding + duplicates. + +The +actions+ list defines the actions that the resource agent +advertises as supported. + +Every +action+ should list its own +timeout+ value. This is a +hint to the user what _minimal_ timeout should be configured for the +action. This is meant to cater for the fact that some resources are +quick to start and stop (IP addresses or filesystems, for example), +some may take several minutes to do so (such as databases). + +In addition, recurring actions (such as +monitor+) should also specify +a recommended minimum +interval+, which is the time between two +consecutive invocations of the same action. Like +timeout+, this value +does not constitute a default -- it is merely a hint for the user +which action interval to configure, at minimum. + +== Return codes + +For any invocation, resource agents must exit with a defined return +code that informs the caller of the outcome of the invoked +action. The return codes are explained in detail in the following +subsections. + +=== +OCF_SUCCESS+ (0) + +The action completed successfully. This is the expected return code +for any successful +start+, +stop+, +promote+, +demote+, ++migrate_from+, +migrate_to+, +meta_data+, +help+, and +usage+ action. + +For +monitor+ (and its deprecated alias, +status+), however, a +modified convention applies: + +* For primitive (stateless) resources, +OCF_SUCCESS+ from +monitor+ + means that the resource is running. Non-running and gracefully + shut-down resources must instead return +OCF_NOT_RUNNING+. + +* For master/slave (stateful) resources, +OCF_SUCCESS+ from +monitor+ + means that the resource is running _in Slave mode_. Resources + running in Master mode must instead return +OCF_RUNNING_MASTER+, and + gracefully shut-down resources must instead return + +OCF_NOT_RUNNING+. + +=== +OCF_ERR_GENERIC+ (1) + +The action returned a generic error. A resource agent should use this +exit code only when none of the more specific error codes, defined +below, accurately describes the problem. + +The cluster resource manager interprets this exit code as a _soft_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with ++OCF_ERR_GENERIC+ in-place -- usually by restarting the resource on +the same node. + +=== +OCF_ERR_ARGS+ (2) + +The resource’s configuration is not valid on this machine. E.g. it +refers to a location not found on the node. + +NOTE: The resource agent should not return this error when instructed +to perform an action that it does not support. Instead, under those +circumstances, it should return +OCF_ERR_UNIMPLEMENTED+. + +=== +OCF_ERR_UNIMPLEMENTED+ (3) + +The resource agent was instructed to execute an action that the agent +does not implement. + +Not all resource agent actions are mandatory. +promote+, +demote+, ++migrate_to+, +migrate_from+, and +notify+, are all optional actions +which the resource agent may or may not implement. When a non-stateful +resource agent is misconfigured as a master/slave resource, for +example, then the resource agent should alert the user about this +misconfiguration by returning +OCF_ERR_UNIMPLEMENTED+ on the +promote+ +and +demote+ actions. + +=== +OCF_ERR_PERM+ (4) + +The action failed due to insufficient permissions. This may be due to +the agent not being able to open a certain file, to listen on a +specific socket, to write to a directory, or similar. + +The cluster resource manager interprets this exit code as a _hard_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with +this error by restarting the resource on a different node (where the +permission problem may not exist). + +=== +OCF_ERR_INSTALLED+ (5) + +The action failed because a required component is missing on the node +where the action was executed. This may be due to a required binary +not being executable, or a vital configuration file being unreadable. + +The cluster resource manager interprets this exit code as a _hard_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with +this error by restarting the resource on a different node (where the +required files or binaries may be present). + +=== +OCF_ERR_CONFIGURED+ (6) + +The action failed because the user misconfigured the resource. For +example, the user may have configured an alphanumeric string for a +parameter that really should be an integer. + +The cluster resource manager interprets this exit code as a _fatal_ +error. Since this is a configuration error that is present +cluster-wide, it would make no sense to recover such a resource on a +different node, let alone in-place. When a resource fails with this +error, the cluster manager will attempt to shut down the resource, and +wait for administrator intervention. + +=== +OCF_NOT_RUNNING+ (7) + +The resource was found not to be running. This is an exit code that +may be returned by the +monitor+ action exclusively. Note that this +implies that the resource has either _gracefully_ shut down, or has +never been started. + +If the resource is not running due to an error condition, the ++monitor+ action should instead return one of the +OCF_ERR_+ exit +codes or +OCF_FAILED_MASTER+. + +=== +OCF_RUNNING_MASTER+ (8) + +The resource was found to be running in the +Master+ role. This +applies only to stateful (Master/Slave) resources, and only to +their +monitor+ action. + +Note that there is no specific exit code for "running in slave +mode". This is because their is no functional distinction between a +primitive resource running normally, and a stateful resource running +as a slave. The +monitor+ action of a stateful resource running +normally in the +Slave+ role should simply return +OCF_SUCCESS+. + +=== +OCF_FAILED_MASTER+ (9) + +The resource was found to have failed in the +Master+ role. This +applies only to stateful (Master/Slave) resources, and only to their ++monitor+ action. + +The cluster resource manager interprets this exit code as a _soft_ +error. This means that unless specifically configured otherwise, the +resource manager will attempt to recover a resource which failed with ++$OCF_FAILED_MASTER+ in-place -- usually by demoting, stopping, +starting and then promoting the resource on the same node. + + +== Resource agent structure + +A typical (shell-based) resource agent contains standard structural +items, in the order as listed in this section. It describes the +expected behavior of a resource agent with respect to the various +actions it supports, using a fictitous resource agent named +foobar+ +as an example. + +=== Resource agent interpreter + +Any resource agent implemented as a script must specify its +interpreter using standard "shebang" (+#!+) header syntax. + +[source,bash] +-------------------------------------------------------------------------- +#!/bin/sh +-------------------------------------------------------------------------- + +If a resource agent is written in shell, specifying the generic shell +interpreter (+#!/bin/sh+) is generally preferred, though not +required. Resource agents declared as +/bin/sh+ compatible must not +use constructs native to a specific shell (such as, for example, ++${!variable}+ syntax native to +bash+). It is advisable to +occasionally run such resource agents through a sanitization utility +such as +checkbashisms+. + +It is considered a regression to introduce a patch that will make a +previously +sh+ compatible resource agent suitable only for +bash+, ++ksh+, or any other non-generic shell. It is, however, perfectly +acceptable for a new resource agent to explicitly define a specific +shell, such as +/bin/bash+, as its interpreter. + +=== Author and license information + +The resource agent should contain a comment listing the resource agent +author(s) and/or copyright holder(s), and stating the license that +applies to the resource agent: + +[source,bash] +-------------------------------------------------------------------------- +# +# Resource Agent for managing foobar resources. +# +# License: GNU General Public License (GPL) +# (c) 2008-2010 John Doe, Jane Roe, +# and Linux-HA contributors +-------------------------------------------------------------------------- + +When a resource agent refers to a license for which multiple versions +exist, it is assumed that the current version applies. + +=== Initialization + +Any shell resource agent should source the +ocf-shellfuncs+ function +library. With the syntax below, this is done in terms of ++$OCF_FUNCTIONS_DIR+, which -- for testing purposes, and also for +generating documentation -- may be overridden from the command line. + +[source,bash] +-------------------------------------------------------------------------- +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +-------------------------------------------------------------------------- + +=== Functions implementing resource agent actions + +What follows next are the functions implementing the resource agent's +advertised actions. The individual actions are described in detail in +<<_resource_agent_actions>>. + +=== Execution block + +This is the part of the resource agent that actually executes when the +resource agent is invoked. It typically follows a fairly standard +structure: + +[source,bash] +-------------------------------------------------------------------------- +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in +meta-data) foobar_meta_data + exit $OCF_SUCCESS + ;; +usage|help) foobar_usage + exit $OCF_SUCCESS + ;; +esac + +# Anything other than meta-data and usage must pass validation +foobar_validate_all || exit $? + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) foobar_start;; +stop) foobar_stop;; +status|monitor) foobar_monitor;; +promote) foobar_promote;; +demote) foobar_demote;; +notify) foobar_notify;; +reload) ocf_log info "Reloading..." + foobar_start + ;; +validate-all) ;; +*) foobar_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +exit $rc +-------------------------------------------------------------------------- + + +== Resource agent actions + +Each action is typically implemented in a separate function or method +in the resource agent. By convention, these are usually named ++<agent>_<action>+, so the function implementing the +start+ action in ++foobar+ would be named +foobar_start()+. + +As a general rule, whenever the resource agent encounters an error +that it is not able to recover, it is permitted to immediately exit, +throw an exception, or otherwise cease execution. Examples for this +include configuration issues, missing binaries, permission problems, +etc. It is not necessary to pass these errors up the call stack. + +It is the cluster manager's responsibility to initiate the appropriate +recovery action based on the user's configuration. The resource agent +should not guess at said configuration. + +=== +start+ action + +When invoked with the +start+ action, the resource agent must start +the resource if it is not yet running. This means that the agent must +verify the resource's configuration, query its state, and then start +it only if it is not running. A common way of doing this would be to +invoke the +validate_all+ and +monitor+ function first, as in the +following example: + +[source,bash] +-------------------------------------------------------------------------- +foobar_start() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is already running, bail out early + if foobar_monitor; then + ocf_log info "Resource is already running" + return $OCF_SUCCESS + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ... + + # After the resource has been started, check whether it started up + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # start up within the defined timeout, the cluster manager will + # consider the start action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not started yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +stop+ action + +When invoked with the +stop+ action, the resource agent must stop the +resource, if it is running. This means that the agent must verify the +resource configuration, query its state, and then stop it only if it +is currently running. A common way of doing this would be to invoke +the +validate_all+ and +monitor+ function first. It is important to +understand that +stop+ is a force operation -- the resource agent must +do everything in its power to shut down, the resource, short of +rebooting the node or shutting it off. Consider the following example: + +[source,bash] +-------------------------------------------------------------------------- +foobar_stop() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + foobar_monitor + rc=$? + case "$rc" in + "$OCF_SUCCESS") + # Currently running. Normal, expected behavior. + ocf_log debug "Resource is currently running" + ;; + "$OCF_RUNNING_MASTER") + # Running as a Master. Need to demote before stopping. + ocf_log info "Resource is currently running as Master" + foobar_demote || \ + ocf_log warn "Demote failed, trying to stop anyway" + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Nothing to do. + ocf_log info "Resource is already stopped" + return $OCF_SUCCESS + ;; + esac + + # actually shut down the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ... + + # After the resource has been stopped, check whether it shut down + # correctly. If the resource stops asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # shut down within the defined timeout, the cluster manager will + # consider the stop action failed + while foobar_monitor; do + ocf_log debug "Resource has not stopped yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS + +} +-------------------------------------------------------------------------- + +NOTE: The expected exit code for a successful stop operation is ++$OCF_SUCCESS+, _not_ +$OCF_NOT_RUNNING+. + +IMPORTANT: A failed stop operation is a potentially dangerous +situation which the cluster manager will almost invariably try to +resolve by means of node fencing. In other words, the cluster manager +will forcibly evict from the cluster a node on which a stop operation +has failed. While this measure serves ultimately to protect data, it +does cause disruption to applications and their users. Thus, a +resource agent should make sure that it exits with an error only if +all avenues for proper resource shutdown have been exhausted. + +=== +monitor+ action + +The +monitor+ action queries the current status of a resource. It must +discern between three different states: + +* resource is currently running (return +$OCF_SUCCESS+); +* resource has stopped gracefully (return +$OCF_NOT_RUNNING+); +* resource has run into a problem and must be considered failed + (return the appropriate +$OCF_ERR_+ code to indicate the nature of the + problem). + + +[source,bash] +-------------------------------------------------------------------------- +foobar_monitor() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + ocf_run frobnicate --test + + # This example assumes the following exit code convention + # for frobnicate: + # 0: running, and fully caught up with master + # 1: gracefully stopped + # any other: error + case "$?" in + 0) + rc=$OCF_SUCCESS + ocf_log debug "Resource is running" + ;; + 1) + rc=$OCF_NOT_RUNNING + ocf_log debug "Resource is not running" + ;; + *) + ocf_log err "Resource has failed" + exit $OCF_ERR_GENERIC + esac + + return $rc +} +-------------------------------------------------------------------------- + +Stateful (master/slave) resource agents may use a more elaborate +monitoring scheme where they can provide "hints" to the cluster +manager identifying which instance is best suited to assume the ++Master+ role. <<_specifying_a_master_preference>> explains the +details. + +NOTE: The cluster manager may invoke the +monitor+ action for a +_probe_, which is a test whether the resource is currently +running. Normally, the monitor operation would behave exactly the same +during a probe and a "real" monitor action. If a specific resource +does require special treatment for probes, however, the +ocf_is_probe+ +convenience function is available in the OCF shell functions library +for that purpose. + +=== +validate-all+ action + +The +validate-all+ action tests for correct resource agent +configuration and a working environment. +validate-all+ should exit +with one of the following return codes: + +* +$OCF_SUCCESS+ -- all is well, the configuration is valid and + usable. +* +$OCF_ERR_CONFIGURED+ -- the user has misconfigured the resource. +* +$OCF_ERR_INSTALLED+ -- the resource has possibly been configured + correctly, but a vital component is missing on the node where + +validate-all+ is being executed. +* +$OCF_ERR_PERM+ -- the resource is configured correctly and is not + missing any required components, but is suffering from a permission + issue (such as not being able to create a necessary file). + ++validate-all+ is usually wrapped in a function that is not only +called when explicitly invoking the corresponding action, but also -- +as a sanity check -- from just about any other function. Therefore, +the resource agent author must keep in mind that the function may be +invoked during the +start+, +stop+, and +monitor+ operations, and also +during probes. + +Probes pose a separate challenge for validation. During a probe (when +the cluster manager may expect the resource _not_ to be running on the +node where the probe is executed), some required components may be +_expected_ to not be available on the affected node. For example, this +includes any shared data on storage devices not available for reading +during the probe. The +validate-all+ function may thus need to treat +probes specially, using the +ocf_is_probe+ convenience function: + +[source,bash] +-------------------------------------------------------------------------- +foobar_validate_all() { + # Test for configuration errors first + if ! ocf_is_decimal $OCF_RESKEY_eggs; then + ocf_log err "eggs is not numeric!" + exit $OCF_ERR_CONFIGURED + fi + + # Test for required binaries + check_binary frobnicate + + # Check for data directory (this may be on shared storage, so + # disable this test during probes) + if ! ocf_is_probe; then + if ! [ -d $OCF_RESKEY_datadir ]; then + ocf_log err "$OCF_RESKEY_datadir does not exist or is not a directory!" + exit $OCF_ERR_INSTALLED + fi + fi + + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +meta-data+ action + +The +meta-data+ action dumps the resource agent metadata to standard +output. The output must follow the metadata format as specified in +<<_metadata>>. + +[source,bash] +-------------------------------------------------------------------------- +foobar_meta_data { + cat <<EOF +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="foobar" version="0.1"> + <version>1.0</version> + <longdesc lang="en"> +... +EOF +} +-------------------------------------------------------------------------- + +=== +promote+ action + +The +promote+ action is optional. It must only be supported by +_stateful_ resource agents, which means agents that discern between +two distinct _roles_: +Master+ and +Slave+. +Slave+ is functionally +identical to the +Started+ state in a stateless resource agent. Thus, +while a regular (stateless) resource agent only needs to implement ++start+ and +stop+, a stateful resource agent must also support the ++promote+ action to be able to make a transition between the +Started+ +(+Slave+) and +Master+ roles. + +[source,bash] +-------------------------------------------------------------------------- +foobar_promote() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # test the resource's current state + foobar_monitor + rc=$? + case "$rc" in + "$OCF_SUCCESS") + # Running as slave. Normal, expected behavior. + ocf_log debug "Resource is currently running as Slave" + ;; + "$OCF_RUNNING_MASTER") + # Already a master. Unexpected, but not a problem. + ocf_log info "Resource is already running as Master" + return $OCF_SUCCESS + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Need to start before promoting. + ocf_log info "Resource is currently not running" + foobar_start + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "Unexpected error, cannot promote" + exit $rc + ;; + esac + + # actually promote the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --master-mode || exit $OCF_ERR_GENERIC + + # After the resource has been promoted, check whether the + # promotion worked. If the resource promotion is asynchronous, the + # agent may spin on the monitor function here -- if the resource + # does not assume the Master role within the defined timeout, the + # cluster manager will consider the promote action failed. + while true; do + foobar_monitor + if [ $? -eq $OCF_RUNNING_MASTER ]; then + ocf_log debug "Resource promoted" + break + else + ocf_log debug "Resource still awaiting promotion" + sleep 1 + fi + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +demote+ action + +The +demote+ action is optional. It must only be supported by +_stateful_ resource agents, which means agents that discern between +two distict _roles_: +Master+ and +Slave+. +Slave+ is functionally +identical to the +Started+ state in a stateless resource agent. Thus, +while a regular (stateless) resource agent only needs to implement ++start+ and +stop+, a stateful resource agent must also support the ++demote+ action to be able to make a transition between the +Master+ +and +Started+ (+Slave+) roles. + +[source,bash] +-------------------------------------------------------------------------- +foobar_demote() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # test the resource's current state + foobar_monitor + rc=$? + case "$rc" in + "$OCF_RUNNING_MASTER") + # Running as master. Normal, expected behavior. + ocf_log debug "Resource is currently running as Master" + ;; + "$OCF_SUCCESS") + # Alread running as slave. Nothing to do. + ocf_log debug "Resource is currently running as Slave" + return $OCF_SUCCESS + ;; + "$OCF_NOT_RUNNING") + # Currently not running. Getting a demote action + # in this state is unexpected. Exit with an error + # and let the cluster manager recover. + ocf_log err "Resource is currently not running" + exit $OCF_ERR_GENERIC + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "Unexpected error, cannot demote" + exit $rc + ;; + esac + + # actually demote the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --unset-master-mode || exit $OCF_ERR_GENERIC + + # After the resource has been demoted, check whether the + # demotion worked. If the resource demotion is asynchronous, the + # agent may spin on the monitor function here -- if the resource + # does not assume the Slave role within the defined timeout, the + # cluster manager will consider the demote action failed. + while true; do + foobar_monitor + if [ $? -eq $OCF_RUNNING_MASTER ]; then + ocf_log debug "Resource still demoting" + sleep 1 + else + ocf_log debug "Resource demoted" + break + fi + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +=== +migrate_to+ action + +The +migrate_to+ action can serve one of two purposes: + +* Initiate a native _push_ type migration for the resource. In other + words, instruct the resource to move _to_ a specific node from the + node it is currently running on. The resource agent knows about its + destination node via the +$OCF_RESKEY_CRM_meta_migrate_target+ environment + variable. + +* Freeze the resource in a _freeze/thaw_ (also known as + _suspend/resume_) type migration. In this mode, the resource does + not need any information about its destination node at this point. + +The example below illustrates a push type migration: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_to() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is not running, bail out early + if ! foobar_monitor; then + ocf_log err "Resource is not running" + exit $OCF_ERR_GENERIC + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --migrate \ + --dest=$OCF_RESKEY_CRM_meta_migrate_target \ + || exit OCF_ERR_GENERIC + ... + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +In contrast, a freeze/thaw type migration may implement its freeze +operation like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_to() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is not running, bail out early + if ! foobar_monitor; then + ocf_log err "Resource is not running" + exit $OCF_ERR_GENERIC + fi + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --freeze || exit OCF_ERR_GENERIC + ... + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +migrate_from+ action + +The +migrate_from+ action can serve one of two purposes: + +* Complete a native _push_ type migration for the resource. In other + words, check whether the migration has succeeded properly, and the + resource is running on the local node. The resource agent knows + about its the migration source via the + +$OCF_RESKEY_CRM_meta_migrate_source+ environment variable. + +* Thaw the resource in a _freeze/thaw_ (also known as + _suspend/resume_) type migration. In this mode, the resource usually + not need any information about its source node at this point. + +The example below illustrates a push type migration: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_from() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # After the resource has been migrated, check whether it resumed + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # run within the defined timeout, the cluster manager will + # consider the migrate_from action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not yet migrated, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +In contrast, a freeze/thaw type migration may implement its thaw +operation like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_migrate_from() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # actually start up the resource here (make sure to immediately + # exit with an $OCF_ERR_ error code if anything goes seriously + # wrong) + ocf_run frobnicate --thaw || exit OCF_ERR_GENERIC + + # After the resource has been migrated, check whether it resumed + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # run within the defined timeout, the cluster manager will + # consider the migrate_from action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not yet migrated, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +=== +notify+ action + +With notifications, instances of clones (and of master/slave +resources, which are an extended kind of clones) can inform each other +about their state. When notifications are enabled, certain actions on +any instance of a clone carries a +pre+ and +post+ notification. + +List of actions that trigger notifications: + +* start +* stop +* promote +* demote + +The cluster manager invokes the +notify+ operation on _all_ clone +instances. For +notify+ operations, additional environment variables +are passed into the resource agent during execution: + +* +$OCF_RESKEY_CRM_meta_notify_type+ -- the notification type (+pre+ + or +post+) + +* +$OCF_RESKEY_CRM_meta_notify_operation+ -- the operation (action) + that the notification is about (+start+, +stop+, +promote+, +demote+ + etc.) + +* +$OCF_RESKEY_CRM_meta_notify_start_uname+ -- node name of the node + where the resource is being started (+start+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_stop_uname+ -- node name of the node + where the resource is being stopped (+stop+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_master_uname+ -- node name of the node + where the resource currently _is in_ the Master role + +* +$OCF_RESKEY_CRM_meta_notify_promote_uname+ -- node name of the node + where the resource currently _is being promoted to_ the Master role + (+promote+ notifications only) + +* +$OCF_RESKEY_CRM_meta_notify_demote_uname+ -- node name of the node + where the resource currently _is being demoted to_ the Slave role + (+demote+ notifications only) + +Notifications come in particularly handy for master/slave resources +using a "pull" scheme, where the master is a publisher and the slave a +subscriber. Since the master is obviously only available as such when +a promotion has occurred, the slaves can use a "pre-promote" +notification to configure themselves to subscribe to the right +publisher. + +Likewise, the subscribers may want to unsubscribe from the publisher +after it has relinquished its master status, and a "post-demote" +notification can be used for that purpose. + +Consider the example below to illustrate the concept. + +[source,bash] +-------------------------------------------------------------------------- +foobar_notify() { + local type_op + type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + + ocf_log debug "Received $type_op notification." + case "$type_op" in + 'pre-promote') + ocf_run frobnicate --slave-mode \ + --master=$OCF_RESKEY_CRM_meta_notify_promote_uname \ + || exit $OCF_ERR_GENERIC + ;; + 'post-demote') + ocf_run frobnicate --unset-slave-mode || exit $OCF_ERR_GENERIC + ;; + esac + + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + +NOTE: A master/slave resource agent may support a _multi-master_ +configuration, where there is possibly more than one master at any +given time. If that is the case, then the ++$OCF_RESKEY_CRM_meta_notify_*_uname+ variables may each contain a +space-separated lists of hostnames, rather than a single host name as +shown in the example. Under those circumstances the resource agent +would have to properly iterate over this list. + +== Script variables + +This section outlines variables typically available to resource agents, +primarily for convenience purposes. For additional variables +available while the agent is being executed, refer to +<<_environment_variables>> and <<_return_codes>>. + +=== +$OCF_RA_VERSION_MAJOR+ + +The major version number of the resource agent API that the cluster +manager is currently using. + +=== +$OCF_RA_VERSION_MINOR+ + +The minor version number of the resource agent API that the cluster +manager is currently using. + +=== +$OCF_ROOT+ + +The root of the OCF resource agent hierarchy. This should never be +changed by a resource agent. This is usually +/usr/lib/ocf+. + +=== +$OCF_FUNCTIONS_DIR+ + +The directory where the resource agents shell function library, ++ocf-shellfuncs+, resides. This is usually defined in terms of ++$OCF_ROOT+ and should never be changed by a resource agent. This +variable may, however, be overridden from the command line while +testing a new or modified resource agent. + +=== +$OCF_EXIT_REASON_PREFIX+ + +Used as a prefix when printing error messages from the resource agent. +Script functions use this automaticly so no explicit use is required +for shell based scripts. + +=== +$OCF_RESOURCE_INSTANCE+ + +The resource instance name. For primitive (non-clone, non-stateful) +resources, this is simply the resource name. For clones and stateful +resources, this is the primitive name, followed by a colon an the +clone instance number (such as +p_foobar:0+). + +=== +$OCF_RESOURCE_TYPE+ + +The resource type of the current resource, e.g. IPaddr2. + +=== +$OCF_RESOURCE_PROVIDER+ + +The resource provider, e.g. heartbeat. This may not be in all cluster +managers of Resource Agent API version 1.0. + +=== +$__OCF_ACTION+ + +The currently invoked action. This is exactly the first command-line +argument that the cluster manager specifies when it invokes the +resource agent. + +=== +$__SCRIPT_NAME+ + +The name of the resource agent. This is exactly the base name of the +resource agent script, with leading directory names removed. + +=== +$HA_RSCTMP+ + +A temporary directory for use by resource agents. The system startup +sequence (on any LSB compliant Linux distribution) guarantees that +this directory is emptied on system startup, so this directory will +not contain any stale data after a node reboot. + +== Convenience functions + +=== Logging: +ocf_log+ + +Resource agents should use the +ocf_log+ function for logging +purposes. This convenient logging wrapper is invoked as follows: + +[source,bash] +-------------------------------------------------------------------------- +ocf_log <severity> "Log message" +-------------------------------------------------------------------------- + +It supports following the following severity levels: + +* +debug+ -- for debugging messages. Most logging configurations + suppress this level by default. +* +info+ -- for informational messages about the agent's behavior or + status. +* +warn+ -- for warnings. This is for any messages which reflect + unexpected behavior that does _not_ constitute an unrecoverable + error. +* +err+ -- for errors. As a general rule, this logging level should + only be used immediately prior to an +exit+ with the appropriate + error code. +* +crit+ -- for critical errors. As with +err+, this logging level + should not be used unless the resource agent also exits with an + error code. Very rarely used. + +=== Testing for binaries: +have_binary+ and +check_binary+ + +A resource agent may need to test for the availability of a specific +executable. The +have_binary+ convenience function comes in handy +here: + +[source,bash] +-------------------------------------------------------------------------- +if ! have_binary frobnicate; then + ocf_log warn "Missing frobnicate binary, frobnication disabled!" +fi +-------------------------------------------------------------------------- + +If a missing binary is a fatal problem for the resource, then the ++check_binary+ function should be used: + +[source,bash] +-------------------------------------------------------------------------- +check_binary frobnicate +-------------------------------------------------------------------------- + +Using +check_binary+ is a shorthand method for testing for the +existence (and executability) of the specified binary, and exiting +with +$OCF_ERR_INSTALLED+ if it cannot be found or executed. + +NOTE: Both +have_binary+ and +check_binary+ honor +$PATH+ when the +binary to test for is not specified as a full path. It is usually wise +to _not_ test for a full path, as binary installations path may vary +by distribution or user policy. + +=== Executing commands and capturing their output: +ocf_run+ + +Whenever a resource agent needs to execute a command and capture its +output, it should use the +ocf_run+ convenience function, invoked as +in this example: + +[source,bash] +-------------------------------------------------------------------------- +ocf_run frobnicate --spam=eggs || exit $OCF_ERR_GENERIC +-------------------------------------------------------------------------- + +With the command specified above, the resource agent will invoke ++frobnicate --spam=eggs+ and capture its output and +exit code. If the exit code is nonzero (indicating an error), ++ocf_run+ logs the command output with the +err+ logging severity, and +the resource agent subsequently exits. If the exit code is zero +(indicating success), any command output will be logged with the +info+ +logging severity. + +If the resource agent wishes to ignore the output of a successful +command execution, it can use the +-q+ flag with +ocf_run+. In the +example below, +ocf_run+ will only log output if the command exit code +is nonzero. + +[source,bash] +-------------------------------------------------------------------------- +ocf_run -q frobnicate --spam=eggs || exit $OCF_ERR_GENERIC +-------------------------------------------------------------------------- + +Finally, if the resource agent wants to log the output of a command +with a nonzero exit code with a severity _other_ than error, it may do +so by adding the +-info+ or +-warn+ option to +ocf_run+: + +[source,bash] +-------------------------------------------------------------------------- +ocf_run -warn frobnicate --spam=eggs +-------------------------------------------------------------------------- + +=== Locks: +ocf_take_lock+ and +ocf_release_lock_on_exit+ + +Occasionally, there may be different resources of the same type in a +cluster configuration that should not execute actions in +parallel. When a resource agent needs to guard against parallel +execution on the same machine, it can use the +ocf_take_lock+ and ++ocf_release_lock_on_exit+ convenience functions: + +[source,bash] +-------------------------------------------------------------------------- +LOCKFILE=${HA_RSCTMP}/foobar +ocf_release_lock_on_exit $LOCKFILE + +foobar_start() { + ... + ocf_take_lock $LOCKFILE + ... +} +-------------------------------------------------------------------------- + ++ocf_take_lock+ attempts to acquire the designated +$LOCKFILE+. When +it is unavailable, it sleeps a random amount of time between 0 and 1 +seconds, and retries. +ocf_release_lock_on_exit+ releases the lock +file when the agent exits (for any reason). + +=== Testing for numerical values: +ocf_is_decimal+ + +Specifically for parameter validation, it can be helpful to test +whether a given value is numeric. The +ocf_is_decimal+ function exists +for that purpose: +-------------------------------------------------------------------------- +foobar_validate_all() { + if ! ocf_is_decimal $OCF_RESKEY_eggs; then + ocf_log err "eggs is not numeric!" + exit $OCF_ERR_CONFIGURED + fi + ... +} +-------------------------------------------------------------------------- + +=== Testing for boolean values: +ocf_is_true+ + +When a resource agent defines a boolean parameter, the value +for this parameter may be specified by the user as +0+/+1+, ++true+/+false+, or +on+/+off+. Since it is tedious to test for all +these values from within the resource agent, the agent should instead +use the +ocf_is_true+ convenience function: + +[source,bash] +-------------------------------------------------------------------------- +if ocf_is_true $OCF_RESKEY_superfrobnicate; then + ocf_run frobnicate --super +fi +-------------------------------------------------------------------------- + +NOTE: If +ocf_is_true+ is used against an empty or non-existant +variable, it always returns an exit code of +1+, which is equivalent +to +false+. + +=== Version comparison: +ocf_version_cmp+ + +A resource agent may want to check the version of software +installed. +ocf_version_cmp+ takes care of all the necessary +details. + +The return codes are + +* +0+ -- the first version is smaller (earlier) than the second +* +1+ -- the two versions are equal +* +2+ -- the first version is greater (later) than the second +* +3+ -- one of arguments is not recognized as a version string + +The versions are allowed to contain digits, dots, and dashes. + +[source,bash] +-------------------------------------------------------------------------- +local v=`gooey --version` +ocf_version_cmp "$v" 12.0.8-1 +case $? in + 0) ocf_log err "we do not support version $v, it is too old" + exit $OCF_ERR_INSTALLED + ;; + [12]) ;; # we can work with versions >= 12.0.8-1 + 3) ocf_log err "gooey produced version <$v>, too funky for me" + exit $OCF_ERR_INSTALLED + ;; +esac +-------------------------------------------------------------------------- + +=== Pseudo resources: +ha_pseudo_resource+ + +"Pseudo resources" are those where the resource agent in fact does not +actually start or stop something akin to a runnable process, but +merely executes a single action and then needs some form of tracing +whether that action has been executed or not. The +portblock+ resource +agent is an example of this. + +Resource agents for pseudo resources can use a convenience function, ++ha_pseudo_resource+, which makes use of _tracking files_ to keep tabs +on the status of a resource. If +foobar+ was designed to manage a +pseudo resource, then its +start+ action could look like this: + +[source,bash] +-------------------------------------------------------------------------- +foobar_start() { + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + # if resource is already running, bail out early + if foobar_monitor; then + ocf_log info "Resource is already running" + return $OCF_SUCCESS + fi + + # start the pseudo resource + ha_pseudo_resource ${OCF_RESOURCE_INSTANCE} start + + # After the resource has been started, check whether it started up + # correctly. If the resource starts asynchronously, the agent may + # spin on the monitor function here -- if the resource does not + # start up within the defined timeout, the cluster manager will + # consider the start action failed + while ! foobar_monitor; do + ocf_log debug "Resource has not started yet, waiting" + sleep 1 + done + + # only return $OCF_SUCCESS if _everything_ succeeded as expected + return $OCF_SUCCESS +} +-------------------------------------------------------------------------- + + +== Conventions + +This section contains a collection of conventions that have emerged in +the resource agent repositories over the years. Following these +conventions is by no means mandatory for resource agent authors, but +it is a good idea based on the +http://en.wikipedia.org/wiki/Principle_of_least_surprise[Principle of +Least Surprise] -- resource agents following these conventions will be +easier to understand, review, and use than those that do not. + +=== Well-known parameter names + +Several parameter names are supported by a number of resource +agents. For new resource agents, following these examples is generally +a good idea: + +* +binary+ -- the name of a binary that principally manages the + resource, such as a server daemon +* +config+ -- the full path to a configuration file +* +pid+ -- the full path to a file holding a process ID (PID) +* +log+ -- the full path to a log file +* +socket+ -- the full path to a UNIX socket that the resource manages +* +ip+ -- an IP address that a daemon binds to +* +port+ -- a TCP or UDP port that a daemon binds to + +Needless to say, resource agents should only implement any of these +parameters if they are sensible to use in the agent's context. + +=== Parameter defaults + +Defaults for resource agent parameters should be set by initializing +variables with the suffix +_default+: + +[source,bash] +-------------------------------------------------------------------------- +# Defaults +OCF_RESKEY_superfrobnicate_default=0 + +: ${OCF_RESKEY_superfrobnicate=${OCF_RESKEY_superfrobnicate_default}} +-------------------------------------------------------------------------- + +NOTE: The resource agent should make sure that it sets a default for +any parameter not marked as +required+ in the metadata. + + +=== Honoring +PATH+ for binaries + +When a resource agent supports a parameter designed to hold the name +of a binary (such as a daemon, or a client utility for querying +status), then that parameter should honor the +PATH+ environment +variable. Do not supply full paths. Thus, the following approach: + +[source,bash] +-------------------------------------------------------------------------- +# Good example -- do it this way +OCF_RESKEY_frobnicate_default="frobnicate" +: ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} +-------------------------------------------------------------------------- + +is much preferred over specifying a full path, as shown here: + +[source,bash] +-------------------------------------------------------------------------- +# Bad example -- avoid if you can +OCF_RESKEY_frobnicate_default="/usr/local/sbin/frobnicate" +: ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} +-------------------------------------------------------------------------- + +This rule holds for defaults, as well. + + + +== Special considerations + +=== Licensing + +Whenever possible, resource agent contributors are _encouraged_ to use +the GNU General Public License (GPL), version 2 and later, for any new +resource agents. The shell functions library does not strictly mandate +this, however, as it is licensed under the GNU Lesser General Public +License (LGPL), version 2.1 and later (so it can be used by non-GPL +agents). + +The resource agent _must_ explicitly state its own license in the +agent source code. + + +=== Locale settings + +When sourcing +ocf-shellfuncs+ as explained in <<_initialization>>, +any resource agent automatically sets +LANG+ and +LC_ALL+ to the +C+ +locale. Resource agents can thus expect to always operate in the +C+ +locale, and need not reset +LANG+ or any of the +LC_+ environment +variables themselves. + + +=== Testing for running processes + +For testing whether a particular process (with a known process ID) is +currently running, a frequently found method is to send it a +0+ +signal and catch errors, similar to this example: + +[source,bash] +-------------------------------------------------------------------------- +if kill -s 0 `cat $daemon_pid_file`; then + ocf_log debug "Process is currently running" +else + ocf_log warn "Process is dead, removing pid file" + rm -f $daemon_pid_file +if +-------------------------------------------------------------------------- + +IMPORTANT: An approach far superior to this example is to instead test +the _functionality_ of the daemon by connecting to it with a client +process, as shown in the example in +<<_literal_monitor_literal_action>>. + + +=== Specifying a master preference + +Stateful (master/slave) resources must set their own _master +preference_ -- they can thus provide hints to the cluster manager +which is the the best instance to promote to the +Master+ role. + +IMPORTANT: It is acceptable for multiple instances to have identical +positive master preferences. In that case, the cluster resource +manager will automatically select a resource agent to +promote. However, if _all_ instances have the (default) master score +of zero, the cluster manager will not promote any instance at +all. Thus, it is crucial that at least one instance has a positive +master score. + +For this purpose, +crm_master+ comes in handy. This convenience +wrapper around the +crm_attribute+ sets a node attribute named ++master-<<_literal_ocf_resource_instance_literal,$OCF_RESOURCE_INSTANCE>>+ +for the node it is being executed on, and fills this attribute with +the specified value. The cluster manager is then expected to translate +this into a promotion score for the corresponding instance, and base +its promotion preference on that score. + +Stateful resource agents typically execute +crm_master+ during the +<<_literal_monitor_literal_action,+monitor+>> and/or +<<_literal_notify_literal_action,+notify+>> action. + +The following example assumes that the +foobar+ resource agent can +test the application's status by executing a binary that returns +certain exit codes based on whether + +* the resource is either in the master role, or is a slave that is + fully caught up with the master (at any rate, it has current data), + or +* the resource is in the slave role, but through some form of + asynchronous replication has "fallen behind" the master, or +* the resource has gracefully stopped, or +* the resource has unexpectedly failed. + +[source,bash] +-------------------------------------------------------------------------- +foobar_monitor() { + local rc + + # exit immediately if configuration is not valid + foobar_validate_all || exit $? + + ocf_run frobnicate --test + + # This example assumes the following exit code convention + # for frobnicate: + # 0: running, and fully caught up with master + # 1: gracefully stopped + # 2: running, but lagging behind master + # any other: error + case "$?" in + 0) + rc=$OCF_SUCCESS + ocf_log debug "Resource is running" + # Set a high master preference. The current master + # will always get this, plus 1. Any current slaves + # will get a high preference so that if the master + # fails, they are next in line to take over. + crm_master -l reboot -v 100 + ;; + 1) + rc=$OCF_NOT_RUNNING + ocf_log debug "Resource is not running" + # Remove the master preference for this node + crm_master -l reboot -D + ;; + 2) + rc=$OCF_SUCCESS + ocf_log debug "Resource is lagging behind master" + # Set a low master preference: if the master fails + # right now, and there is another slave that does + # not lag behind the master, its higher master + # preference will win and that slave will become + # the new master + crm_master -l reboot -v 5 + ;; + *) + ocf_log err "Resource has failed" + exit $OCF_ERR_GENERIC + esac + + return $rc +} +-------------------------------------------------------------------------- + + +== Testing resource agents + +This section discusses automated testing for resource agents. Testing +is a vital aspect of development; it is crucial both for creating new +resource agents, and for modifying existing ones. + + +=== Testing with +ocf-tester+ + +The resource agents repository (and hence, any installed resource +agents package) contains a utility named +ocf-tester+. This shell +script allows you to conveniently and easily test the functionality of +your resource agent. + ++ocf-tester+ is commonly invoked, as +root+, like this: + +-------------------------------------------------------------------------- +ocf-tester -n <name> [-o <param>=<value> ... ] <resource agent> +-------------------------------------------------------------------------- + +* +<name>+ is an arbitrary resource name. + +* You may set any number of +<param>=<value>+ with the +-o+ option, + corresponding to any resource parameters you wish to set for + testing. + +* +<resource agent>+ is the full path to your resource agent. + +When invoked, +ocf-tester+ executes all mandatory actions and enforces +action behavior as explained in <<_resource_agent_actions>>. + +It also tests for optional actions. Optional actions must behave as +expected when advertised, but do not cause +ocf-tester+ to flag an +error if not implemented. + +IMPORTANT: +ocf-tester+ does not initiate "dry runs" of actions, nor +does it create resource dummies of any kind. Instead, it exercises the +actual resource agent as-is, whether that may include opening and +closing databases, mounting file systems, starting or stopping virtual +machines, etc. Use with care. + +For example, you could run +ocf-tester+ on the +foobar+ resource agent +as follows: + +-------------------------------------------------------------------------- +# ocf-tester -n foobartest \ + -o superfrobnicate=true \ + -o datadir=/tmp \ + /home/johndoe/ra-dev/foobar +Beginning tests for /home/johndoe/ra-dev/foobar... +* Your agent does not support the notify action (optional) +* Your agent does not support the reload action (optional) +/home/johndoe/ra-dev/foobar passed all tests +-------------------------------------------------------------------------- + +If the resource agent exhibits some difficult to grasp behaviour, +which is typically the case with just developed software, there +are +-v+ and +-d+ options to dump more output. If that does not +help, instruct +ocf-tester+ to trace the resource agent with ++-X+ (make sure to redirect output to a file, unless you are a +really fast reader). + +=== Testing with +ocft+ + ++ocft+ is a testing tool for resource agents. The main difference +to +ocf-tester+ is that +ocft+ can automate creating complex +testing environments. That includes package installation and +arbitrary shell scripting. + +==== +ocft+ components + ++ocft+ consists of the following components: + +* A test case generator (+/usr/sbin/ocft+) -- generates shell + scripts from test case configuration files + +* Configuration files (+/usr/share/resource-agents/ocft/configs/+) -- + a configuration file contains environment setup and test cases + for one resource agent + +* The testing scripts are stored in +/var/lib/resource-agents/ocft/cases/+, + but normally there is no need to inspect them + +==== Customizing the testing environment + ++ocft+ modifies the runtime environment of the resource agent +either by changing environment variables (through the interface +defined by OCF) or by running ad-hoc shell scripts which can for +instance change permissions of a file or unmount a file system. + +==== How to test + +You need to know the software (resource) you want to test. Draw a +sketch of all interesting scenarios, with all expected and +unexpected conditions and how the resource agent should react to +them. Then you need to encode these conditions and the expected +outcomes as +ocft+ test cases. Running ocft is then simple: + +--------------------------------------- +# ocft make <RA> +# ocft test <RA> +--------------------------------------- + +The first subcommand generates the scripts for your test cases +whereas the second runs them and checks the outcome. + +==== +ocft+ configuration file syntax + +There are four top level options each of which can contain +one or more sub-options. + +===== +CONFIG+ (top level option) + +This option is global and influences every test case. + + ** +AgentRoot+ (sub-option) +--------------------------------------- +AgentRoot /usr/lib/ocf/resource.d/xxx +--------------------------------------- + +Normally, we assume that the resource agent lives under the ++heartbeat+ provider. Use `AgentRoot` to test agent which is +distributed by another vendor. + + ** +InstallPackage+ (sub-option) +--------------------------------------- +InstallPackage package [package2 [...]] +--------------------------------------- + +Install packages necessary for testing. The installation is +skipped if the packages have already been installed. + + ** 'HangTimeout' (sub-option) +--------------------------------------- +HangTimeout secs +--------------------------------------- + +The maximum time allowed for a single RA action. If this timer +expires, the action is considered as failed. + +===== +SETUP-AGENT+ (top level option) +--------------------------------------- +SETUP-AGENT + bash commands +--------------------------------------- + +If the RA needs to be initialized before testing, you can put +bash code here for that purpose. The initialization is done only +once. If you need to reinitialize then delete the ++/tmp/.[AGENT_NAME]_set+ stamp file. + +===== +CASE+ (top level option) +--------------------------------------- +CASE "description" +--------------------------------------- + +This is the main building block of the test suite. Each test +case is to be described in one +CASE+ top level option. + +One case consists of several suboptions typically followed by the ++RunAgent+ suboption. + + ** +Var+ (sub-option) +--------------------------------------- +Var VARIABLE=value +--------------------------------------- + +It is to set up an environment variable of the resource agent. They +usually appear to be OCF_RESKEY_xxx. One point is to be noted is there +is no blank by both sides of "=". + + ** +Unvar+ (sub-option) +--------------------------------------- +Unvar VARIABLE [VARIABLE2 [...]] +--------------------------------------- + +Remove the environment variable. + + ** +Include+ (sub-option) +--------------------------------------- +Include macro_name +--------------------------------------- + +Include statements in 'macro_name'. See below for description of ++CASE-BLOCK+. + +** +Bash+ (sub-option) +--------------------------------------- +Bash bash_codes +--------------------------------------- + +This option is to set up the environment of OS, where you can insert +BASH code to customize the system randomly. Note, do not cause +unrecoverable consequences to the system. + +** +BashAtExit+ (sub-option) +--------------------------------------- +BashAtExit bash_codes +--------------------------------------- + +This option is to recover the OS environment in order to run another +test case correctly. Of cause you can use 'Bash' option to recover +it. However, if mistakes occur in the process, the script will quit +directly instead of running your recovery codes. If it happens, you +ought to use BashAtExit which can restore the system environment +before you quit. + +** +RunAgent+ (sub-option) +--------------------------------------- +RunAgent cmd [ret_value] +--------------------------------------- + +This option is to run resource agent. "cmd" is the parameter of the +resource agent, such as "start, status, stop ...". The second +parameter is optional. It will compare the actual returned value with +the expected value when the script has run recourse agent. If +differs, bugs will be found. + +It is also possible to execute a suboption on a remote host +instead of locally. The protocol used is ssh and the command is +run in the background. Just add the +@<ipaddr>+ suffix to the +suboption name. For instance: + +--------------------------------------- +Bash@192.168.1.100 date +--------------------------------------- + +would run the date program. Remote commands are run in +background. + +NB: Not clear how can ssh be automated as we don't know in +advance the environment. Perhaps use "well-known" host names such +as "node2"? Also, if the command runs in the background, it's not +clear how is the exit code checked. Finally, does Var@node make +sense? Or is the current environment somehow copied over? We +probably need an example here. + +Need examples in general. + +===== +CASE-BLOCK+ (top level option) +--------------------------------------- +CASE-BLOCK macro_name +--------------------------------------- + +The +CASE-BLOCK+ option defines a macro which can be +Include+d +in any +CASE+. All +CASE+ suboptions are valid in +CASE-BLOCK+. + + +== Installing and packaging resource agents + +This section discusses what to do with your resource agent once it is +done and tested -- where to install it, and how to include it in either +your own application package or in the Linux-HA resource agents +repository. + +=== Installing resource agents + +If you choose to include your resource agent in your own project, make +sure it installs into the correct location. Resource agents should +install into the +/usr/lib/ocf/resource.d/<provider>+ directory, where ++<provider>+ is the name of your project or any other name you wish to +identify the resource agent with. + +For example, if your +foobar+ resource agent is being packaged as part +of a project named +fortytwo+, then the correct full path to your +resource agent would be ++/usr/lib/ocf/resource.d/fortytwo/foobar+. Make sure your resource +agent installs with +0755+ (+-rwxr-xr-x+) permission bits. + +When installed this way, OCF-compliant cluster resource managers will +be able to properly identify, parse, and execute your resource +agent. The Pacemaker cluster manager, for example, would map the +above-mentioned installation path to the +ocf:fortytwo:foobar+ +resource type identifier. + +=== Packaging resource agents + +When you package resource agents as part of your own project, you +should apply the considerations outlined in this section. + +NOTE: If you instead prefer to submit your resource agent to the +Linux-HA resource agents repository, see +<<_submitting_resource_agents>> for information on doing so. + +==== RPM packaging + +It is recommended to put your OCF resource agent(s) in an RPM +sub-package, with the name +<toppackage>-resource-agents+. Ensure that +the package owns its provider directory, and depends on the upstream ++resource-agents+ package which lays out the directory hierarchy and +provides convenience shell functions. An example RPM spec snippet is +given below: + +-------------------------------------------------------------------------- +%package resource-agents +Summary: OCF resource agent for Foobar +Group: System Environment/Base +Requires: %{name} = %{version}-%{release}, resource-agents + +%description resource-agents +This package contains the OCF-compliant resource agents for Foobar. + +%files resource-agents +%defattr(755,root,root,-) +%dir %{_prefix}/lib/ocf/resource.d/fortytwo +%{_prefix}/lib/ocf/resource.d/fortytwo/foobar +-------------------------------------------------------------------------- + +NOTE: If an RPM spec file contains a +%package+ declaration, then RPM +considers this a sub-package which inherits top-level fields such as ++Name+, +Version+, +License+, etc. Sub-packages have the top-level +package name automatically prepended to their own name. Thus the snippet +above would create a sub-package named +foobar-resource-agents+ +(presuming the package +Name+ is +foobar+). + +==== Debian packaging + +For Debian packages, like for <<_rpm_packaging,RPMs>>, it is +recommended to create a separate package holding your resource agents, +which then should depend on the +cluster-agents+ package. + +NOTE: This section assumes that you are packaging with +debhelper+. + +An example +debian/control+ snippet is given below: + +-------------------------------------------------------------------------- +Package: foobar-cluster-agents +Priority: extra +Architecture: all +Depends: cluster-agents +Description: OCF-compliant resource agents for Foobar +-------------------------------------------------------------------------- + +You will also create a separate +.install+ file. Sticking with the +example of installing the +foobar+ resource agent as a sub-package of ++fortytwo+, the +debian/fortytwo-cluster-agents.install+ file could +consist of the following content: + +-------------------------------------------------------------------------- +usr/lib/ocf/resource.d/fortytwo/foobar +-------------------------------------------------------------------------- + +=== Submitting resource agents + +If you choose not to bundle your resource agent with your own package, +but instead wish to submit it to the upstream resource agent +repository hosted on +https://github.com/ClusterLabs/resource-agents[the ClusterLabs +repository on GitHub], please follow the steps outlined in this section. + +Create a fork of the +https://github.com/ClusterLabs/resource-agents[upstream repository] and +clone it with the following commands: + +-------------------------------------------------------------------------- +git clone git://github.com/<your-username>/resource-agents +git remote add upstream git@github.com:ClusterLabs/resource-agents.git +git checkout -b <new-branch> +-------------------------------------------------------------------------- + +Then, copy your resource agent into the +heartbeat+ subdirectory: +-------------------------------------------------------------------------- +cd resource-agents/heartbeat +cp /path/to/your/local/copy/of/foobar . +chmod 0755 foobar +cd .. +-------------------------------------------------------------------------- + +Next, modify the +Makefile.am+ file in +resource-agents/heartbeat+ and +add your new resource agent to the +ocf_SCRIPTS+ list. This will make +sure the agent is properly installed. + +Lastly, open Makefile.am in +resource-agents/doc/man+ and add ++ocf_heartbeat_<name>.7+ to the +man_MANS+ variable. This will +automatically generate a resource agent manual page from its metadata, +and then install that man page into the correct location. + +Now, add your new resource agents, and the two modifications to the +Makefiles, to your changeset: + +-------------------------------------------------------------------------- +git add heartbeat/foobar +git add heartbeat/Makefile.am +git add doc/man/Makefile.am +git commit +-------------------------------------------------------------------------- + +In your commit message, be sure to include a meaningful description, +for example: +-------------------------------------------------------------------------- +High: foobar: new resource agent + +This new resource agent adds functionality to manage a foobar service. +It supports being configured as a primitive or as a master/slave set, +and also optionally supports superfrobnication. +-------------------------------------------------------------------------- + +Now push the patch set to GitHub: +-------------------------------------------------------------------------- +git push +-------------------------------------------------------------------------- + +Create a Pull Request (PR) on Github that will be reviewed by the +upstream developers. + +Once your new resource agent has been accepted for merging, one of the +upstream developers will Merge the Pull Request into the upstream +repository. At that point, you can update your main branch from +upstream, and remove your own branch. + +-------------------------------------------------------------------------- +git checkout main +git fetch upstream +git merge upstream/main +git branch -D <branch> +-------------------------------------------------------------------------- + +=== Maintaining resource agents + +If you maintain a specific resource agent, or you are making repeated +contributions to the codebase, it's usually a good idea to maintain +your own _fork_ of the +ClusterLabs/resource-agents+ repository on +GitHub. + +To do so, + +* https://github.com/signup[Create a GitHub account] if you do not + have one already. +* http://help.github.com/fork-a-repo/[Fork] the + https://github.com/ClusterLabs/resource-agents[+resource-agents+ + repository]. +* Clone your personal fork into a local working copy. + +As you work on resource agents, *please* commit early, and commit +often. You can always fold commits later with +git rebase -i+. + +Once you have made a number of changes that you would like others to +review, push them to your GitHub fork and send a post to the ++linux-ha-dev+ mailing list pointing people to it. + +After the review is done, fix up your tree with any requested changes, +and then issue a pull request. There are two ways of doing so: + +* You can use the +git request-pull+ utility to get a pre-populated + email skeleton summarizing your changesets. Add any information you + see fit, and send it to the list. It is a good idea to prefix your + email subject with +[GIT PULL]+ so upstream maintainers can pick the + message out easily. + +* You can also issue a pull request directly on GitHub. GitHub + automatically notifies upstream maintainers about new pull requests + by email. Please refer to + http://help.github.com/send-pull-requests/[github:help] for details + on initiating pull requests. diff --git a/doc/dev-guides/writing-python-agents.md b/doc/dev-guides/writing-python-agents.md new file mode 100644 index 0000000..f26313f --- /dev/null +++ b/doc/dev-guides/writing-python-agents.md @@ -0,0 +1,90 @@ +# Resource Agent guide for Python + +## Introduction + +A simple library for authoring resource agents in Python is +provided in the `ocf.py` library. + +Agents written in Python should be ideally compatible both with Python +2.7+ and Python 3.3+. + +The library provides various helper constants and functions, a logging +implementation as well as a run loop and metadata generation facility. + +## Constants + +The following OCF constants are provided: + +* `OCF_SUCCESS` +* `OCF_ERR_GENERIC` +* `OCF_ERR_ARGS` +* `OCF_ERR_UNIMPLEMENTED` +* `OCF_ERR_PERM` +* `OCF_ERR_INSTALLED` +* `OCF_ERR_CONFIGURED` +* `OCF_NOT_RUNNING` +* `OCF_RUNNING_MASTER` +* `OCF_FAILED_MASTER` +* `OCF_RESOURCE_INSTANCE` +* `HA_DEBUG` +* `HA_DATEFMT` +* `HA_LOGFACILITY` +* `HA_LOGFILE` +* `HA_DEBUGLOG` +* `OCF_ACTION` -- Set to `$__OCF_ACTION` if set, or to the first command line argument. + +## Logger + +The `logger` variable holds a Python standard log object with its +formatter set to follow the OCF standard logging format. + +Example: + +``` python + +from ocf import logger + +logger.error("Something went terribly wrong.") + +``` + +## Helper functions + +* `ocf_exit_reason`: Prints the exit error string to stderr. +* `have_binary`: Returns True if the given binary is available. +* `is_true`: Converts an OCF truth value to a Python boolean. +* `get_parameter`: Looks up the matching `OCF_RESKEY_` environment variable. +* `Agent`: Class which helps to generate the XML metadata. +* `run`: OCF run loop implementation. + +## Run loop and metadata example + +``` python +import os +import sys + +OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) +sys.path.append(OCF_FUNCTIONS_DIR) +import ocf + +def start_action(argument): + print("The start action receives the argument as a parameter: {}".format(argument)) + + +def main(): + agent = ocf.Agent("example-agent", + shortdesc="This is an example agent", + longdesc="An example of how to " + + "write an agent in Python using the ocf " + + "Python library.") + agent.add_parameter("argument", + shortdesc="Example argument", + longdesc="This argument is just an example.", + content_type="string", + default="foobar") + agent.add_action("start", timeout=60, handler=start_action) + agent.run() + +if __name__ == "__main__": + main() +``` diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am new file mode 100644 index 0000000..0584238 --- /dev/null +++ b/doc/man/Makefile.am @@ -0,0 +1,263 @@ +# +# doc: Linux-HA resource agents +# +# Copyright (C) 2009 Florian Haas +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + + +MAINTAINERCLEANFILES = Makefile.in + +EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ + mkappendix.sh ralist.sh + +CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml + +STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current +MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl +HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl +FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl +REFENTRY_STYLESHEET ?= ra2refentry.xsl + +XSLTPROC_OPTIONS ?= --xinclude +XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) +XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) +XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) + +radir = $(abs_top_builddir)/heartbeat + +# required for out-of-tree build +symlinkstargets = \ + ocf-distro ocf.py ocf-rarun ocf-returncodes \ + findif.sh apache-conf.sh http-mon.sh mysql-common.sh \ + nfsserver-redhat.sh openstack-common.sh ora-common.sh + +preptree: + for i in $(symlinkstargets); do \ + if [ ! -f $(radir)/$$i ]; then \ + rm -rf $(radir)/$$i; \ + ln -sf $(abs_top_srcdir)/heartbeat/$$i $(radir)/$$i; \ + fi; \ + done + +$(radir)/%: $(abs_top_srcdir)/heartbeat/% + if [ ! -f $@ ]; then \ + ln -sf $< $@; \ + fi + +# OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs +# (which tests whether $OCF_ROOT points to a directory +metadata-%.xml: $(radir)/% preptree + OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ + +metadata-IPv6addr.xml: $(radir)/IPv6addr + OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ + +clean-local: + find $(radir) -type l -exec rm -rf {} \; + +# Please note: we can't name the man pages +# ocf:heartbeat:<name>. Believe me, I've tried. It looks like it +# works, but then it doesn't. While make can deal correctly with +# colons in target names (when properly escaped), it royally messes up +# when it is deals with _dependencies_ that contain colons. See Bug +# 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was +# first reported in 1995 and added to Savannah in in 2005... +if BUILD_DOC +man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_AudibleAlarm.7 \ + ocf_heartbeat_ClusterMon.7 \ + ocf_heartbeat_CTDB.7 \ + ocf_heartbeat_Delay.7 \ + ocf_heartbeat_Dummy.7 \ + ocf_heartbeat_EvmsSCC.7 \ + ocf_heartbeat_Evmsd.7 \ + ocf_heartbeat_Filesystem.7 \ + ocf_heartbeat_ICP.7 \ + ocf_heartbeat_IPaddr.7 \ + ocf_heartbeat_IPaddr2.7 \ + ocf_heartbeat_IPsrcaddr.7 \ + ocf_heartbeat_LVM.7 \ + ocf_heartbeat_LVM-activate.7 \ + ocf_heartbeat_LinuxSCSI.7 \ + ocf_heartbeat_MailTo.7 \ + ocf_heartbeat_ManageRAID.7 \ + ocf_heartbeat_ManageVE.7 \ + ocf_heartbeat_NodeUtilization.7 \ + ocf_heartbeat_Pure-FTPd.7 \ + ocf_heartbeat_Raid1.7 \ + ocf_heartbeat_Route.7 \ + ocf_heartbeat_SAPDatabase.7 \ + ocf_heartbeat_SAPInstance.7 \ + ocf_heartbeat_SendArp.7 \ + ocf_heartbeat_ServeRAID.7 \ + ocf_heartbeat_SphinxSearchDaemon.7 \ + ocf_heartbeat_Squid.7 \ + ocf_heartbeat_Stateful.7 \ + ocf_heartbeat_SysInfo.7 \ + ocf_heartbeat_VIPArip.7 \ + ocf_heartbeat_VirtualDomain.7 \ + ocf_heartbeat_WAS.7 \ + ocf_heartbeat_WAS6.7 \ + ocf_heartbeat_WinPopup.7 \ + ocf_heartbeat_Xen.7 \ + ocf_heartbeat_Xinetd.7 \ + ocf_heartbeat_ZFS.7 \ + ocf_heartbeat_aliyun-vpc-move-ip.7 \ + ocf_heartbeat_anything.7 \ + ocf_heartbeat_apache.7 \ + ocf_heartbeat_asterisk.7 \ + ocf_heartbeat_aws-vpc-move-ip.7 \ + ocf_heartbeat_aws-vpc-route53.7 \ + ocf_heartbeat_awseip.7 \ + ocf_heartbeat_awsvip.7 \ + ocf_heartbeat_azure-lb.7 \ + ocf_heartbeat_clvm.7 \ + ocf_heartbeat_conntrackd.7 \ + ocf_heartbeat_corosync-qnetd.7 \ + ocf_heartbeat_crypt.7 \ + ocf_heartbeat_db2.7 \ + ocf_heartbeat_dhcpd.7 \ + ocf_heartbeat_docker.7 \ + ocf_heartbeat_docker-compose.7 \ + ocf_heartbeat_dovecot.7 \ + ocf_heartbeat_dnsupdate.7 \ + ocf_heartbeat_dummypy.7 \ + ocf_heartbeat_eDir88.7 \ + ocf_heartbeat_ethmonitor.7 \ + ocf_heartbeat_exportfs.7 \ + ocf_heartbeat_fio.7 \ + ocf_heartbeat_galera.7 \ + ocf_heartbeat_garbd.7 \ + ocf_heartbeat_gcp-ilb.7 \ + ocf_heartbeat_gcp-vpc-move-ip.7 \ + ocf_heartbeat_iSCSILogicalUnit.7 \ + ocf_heartbeat_iSCSITarget.7 \ + ocf_heartbeat_iface-bridge.7 \ + ocf_heartbeat_iface-macvlan.7 \ + ocf_heartbeat_iface-vlan.7 \ + ocf_heartbeat_ipsec.7 \ + ocf_heartbeat_ids.7 \ + ocf_heartbeat_iscsi.7 \ + ocf_heartbeat_jboss.7 \ + ocf_heartbeat_jira.7 \ + ocf_heartbeat_kamailio.7 \ + ocf_heartbeat_lvmlockd.7 \ + ocf_heartbeat_lxc.7 \ + ocf_heartbeat_lxd-info.7 \ + ocf_heartbeat_machine-info.7 \ + ocf_heartbeat_mariadb.7 \ + ocf_heartbeat_mdraid.7 \ + ocf_heartbeat_minio.7 \ + ocf_heartbeat_mpathpersist.7 \ + ocf_heartbeat_mysql.7 \ + ocf_heartbeat_mysql-proxy.7 \ + ocf_heartbeat_nagios.7 \ + ocf_heartbeat_named.7 \ + ocf_heartbeat_nfsnotify.7 \ + ocf_heartbeat_nfsserver.7 \ + ocf_heartbeat_nginx.7 \ + ocf_heartbeat_nvmet-subsystem.7 \ + ocf_heartbeat_nvmet-namespace.7 \ + ocf_heartbeat_nvmet-port.7 \ + ocf_heartbeat_openstack-info.7 \ + ocf_heartbeat_ocivip.7 \ + ocf_heartbeat_openstack-cinder-volume.7 \ + ocf_heartbeat_openstack-floating-ip.7 \ + ocf_heartbeat_openstack-virtual-ip.7 \ + ocf_heartbeat_oraasm.7 \ + ocf_heartbeat_oracle.7 \ + ocf_heartbeat_oralsnr.7 \ + ocf_heartbeat_ovsmonitor.7 \ + ocf_heartbeat_pgagent.7 \ + ocf_heartbeat_pgsql.7 \ + ocf_heartbeat_pingd.7 \ + ocf_heartbeat_podman.7 \ + ocf_heartbeat_portblock.7 \ + ocf_heartbeat_postfix.7 \ + ocf_heartbeat_pound.7 \ + ocf_heartbeat_proftpd.7 \ + ocf_heartbeat_rabbitmq-cluster.7 \ + ocf_heartbeat_rabbitmq-server-ha.7 \ + ocf_heartbeat_redis.7 \ + ocf_heartbeat_rkt.7 \ + ocf_heartbeat_rsyncd.7 \ + ocf_heartbeat_rsyslog.7 \ + ocf_heartbeat_scsi2reservation.7 \ + ocf_heartbeat_sfex.7 \ + ocf_heartbeat_slapd.7 \ + ocf_heartbeat_smb-share.7 \ + ocf_heartbeat_sybaseASE.7 \ + ocf_heartbeat_sg_persist.7 \ + ocf_heartbeat_storage-mon.7 \ + ocf_heartbeat_symlink.7 \ + ocf_heartbeat_syslog-ng.7 \ + ocf_heartbeat_tomcat.7 \ + ocf_heartbeat_varnish.7 \ + ocf_heartbeat_vdo-vol.7 \ + ocf_heartbeat_vmware.7 \ + ocf_heartbeat_vsftpd.7 \ + ocf_heartbeat_zabbixserver.7 + +if USE_IPV6ADDR_AGENT +man_MANS += ocf_heartbeat_IPv6addr.7 +endif + +if BUILD_AZURE_EVENTS +man_MANS += ocf_heartbeat_azure-events.7 +endif + +if BUILD_AZURE_EVENTS_AZ +man_MANS += ocf_heartbeat_azure-events-az.7 +endif + +if BUILD_GCP_PD_MOVE +man_MANS += ocf_heartbeat_gcp-pd-move.7 +endif + +if BUILD_GCP_VPC_MOVE_ROUTE +man_MANS += ocf_heartbeat_gcp-vpc-move-route.7 +endif + +if BUILD_GCP_VPC_MOVE_VIP +man_MANS += ocf_heartbeat_gcp-vpc-move-vip.7 +endif + +xmlfiles = $(man_MANS:.7=.xml) + +%.1 %.5 %.7 %.8: %.xml + $(XSLTPROC) \ + $(XSLTPROC_MANPAGES_OPTIONS) \ + $(MANPAGES_STYLESHEET) $< + +ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) + $(XSLTPROC) --novalid \ + --stringparam package $(PACKAGE_NAME) \ + --stringparam version $(VERSION) \ + --output $@ \ + $(srcdir)/$(REFENTRY_STYLESHEET) $< + +ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh + ./mkappendix.sh $(xmlfiles) > $@ + +%.html: %.xml + $(XSLTPROC) \ + $(XSLTPROC_HTML_OPTIONS) \ + --output $@ \ + $(HTML_STYLESHEET) $< + +xml: ocf_resource_agents.xml +endif diff --git a/doc/man/mkappendix.sh b/doc/man/mkappendix.sh new file mode 100755 index 0000000..8f3ed3d --- /dev/null +++ b/doc/man/mkappendix.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +cat <<EOF +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"> +<appendix id="ap-ra-man-pages"> + <title>Resource agent manual pages</title> +EOF + +for manpage in $(printf "%s\n" "$@" | sort -f); do + cat <<EOF + <xi:include href="./$manpage" xmlns:xi="http://www.w3.org/2001/XInclude"/> +EOF +done + +cat <<EOF +</appendix> +EOF diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl new file mode 100644 index 0000000..f8e1232 --- /dev/null +++ b/doc/man/ra2refentry.xsl @@ -0,0 +1,649 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + + <xsl:output indent="yes" + doctype-public="-//OASIS//DTD DocBook XML V4.4//EN" + doctype-system="http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"/> + + <!--<xsl:strip-space elements="longdesc shortdesc"/>--> + + <!-- Package name. --> + <xsl:param name="package">resource-agents</xsl:param> + + <!-- Package version number. Must be passed in. --> + <xsl:param name="version"/> + + <!-- RA class --> + <xsl:param name="class">ocf</xsl:param> + + <!-- RA provider --> + <xsl:param name="provider">heartbeat</xsl:param> + + <!-- Man volume number --> + <xsl:param name="manvolum">7</xsl:param> + + <!-- --> + <xsl:param name="variable.prefix"/> + + <!-- Separator between different action/@name --> + <xsl:param name="separator"> | </xsl:param> + + <xsl:variable name="manpagetitleprefix"><xsl:value-of select="$class"/>_<xsl:value-of select="$provider"/>_</xsl:variable> + + <xsl:template match="/"> + <refentry> + <xsl:apply-templates mode="root"/> + </refentry> + </xsl:template> + + <xsl:template match="resource-agent" mode="root"> + <xsl:param name="this" select="self::resource-agent"/> + <xsl:attribute name="id"> + <xsl:text>re-ra-</xsl:text> + <xsl:value-of select="@name"/> + </xsl:attribute> + <xsl:apply-templates select="$this" mode="refentryinfo"/> + <xsl:apply-templates select="$this" mode="refmeta"/> + <xsl:apply-templates select="$this" mode="refnamediv"/> + <xsl:apply-templates select="$this" mode="synopsis"/> + <xsl:apply-templates select="$this" mode="description"/> + <xsl:apply-templates select="$this" mode="parameters"/> + <xsl:apply-templates select="$this" mode="actions"/> + <xsl:apply-templates select="$this" mode="examplecrmsh"/> + <xsl:apply-templates select="$this" mode="examplepcs"/> + <xsl:apply-templates select="$this" mode="seealso"/> + </xsl:template> + + + <!-- Empty Templates --> + <xsl:template match="node()" mode="root"/> + <xsl:template match="*" mode="refmeta"/> + <xsl:template match="*" mode="refnamediv"/> + + <xsl:template match="*" mode="synopsis"/> + <xsl:template match="*" mode="description"/> + <xsl:template match="*" mode="parameters"/> + + <!-- Mode refentryinfo --> + <xsl:template match="resource-agent" mode="refentryinfo"> + <refentryinfo> + <productname><xsl:value-of select="$package"/></productname> + <productnumber><xsl:value-of select="$version"/></productnumber> + <corpauthor>ClusterLabs contributors (see the resource agent source for information about individual authors)</corpauthor> + </refentryinfo> + </xsl:template> + + <!-- Mode refmeta --> + <xsl:template match="resource-agent" mode="refmeta"> + <refmeta> + <refentrytitle><xsl:value-of select="$manpagetitleprefix"/><xsl:value-of select="@name"/></refentrytitle> + <manvolnum><xsl:value-of select="$manvolum"/></manvolnum> + <refmiscinfo class="manual">OCF resource agents</refmiscinfo> + </refmeta> + </xsl:template> + + <!-- Mode refnamediv --> + <xsl:template match="resource-agent" mode="refnamediv"> + <refnamediv> + <refname><xsl:value-of select="$manpagetitleprefix"/><xsl:value-of select="@name"/></refname> + <refpurpose><xsl:apply-templates select="shortdesc"/></refpurpose> + </refnamediv> + </xsl:template> + + + <!-- Mode synopsis --> + <xsl:template match="resource-agent" mode="synopsis"> + <refsynopsisdiv> + <cmdsynopsis sepchar=" "> + <command moreinfo="none"> + <xsl:value-of select="@name"/> + </command> + <xsl:apply-templates select="actions" mode="synopsis"/> + </cmdsynopsis> + </refsynopsisdiv> + </xsl:template> + + <xsl:template match="actions" mode="synopsis"> + <group choice="opt" rep="norepeat"> + <xsl:apply-templates select="action[@name = 'start'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'stop'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'status'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'monitor'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'migrate_to'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'migrate_from'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'promote'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'demote'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'meta-data'][1]" mode="synopsis"/> + <xsl:apply-templates select="action[@name = 'validate-all'][1]" mode="synopsis"/> + </group> + </xsl:template> + + <xsl:template match="action" mode="synopsis"> + <arg choice="plain" rep="norepeat"> + <xsl:value-of select="@name"/> + </arg> + </xsl:template> + + + <!-- Mode Description --> + + <!-- break string into <para> elements on linefeeds --> + <!-- would be so much easier with replace(...) --> + +<xsl:template name="break_into_para"> + <xsl:param name="string" /> + + <xsl:choose> + <xsl:when test="starts-with($string, '
') or starts-with($string, ' ')" > + <!-- trim leading newlines and other witespace --> + <xsl:variable name="normalized" select="normalize-space($string)" /> + <xsl:variable name="nlen" select="string-length($normalized)" /> + <xsl:if test="$nlen > 0" > + <xsl:variable name="leading" select="string-length(substring-before($string, substring($normalized, 1, 1)))" /> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="substring($string, $leading + 1)" /> + </xsl:call-template> + </xsl:if> + </xsl:when> + <xsl:otherwise> + + <xsl:variable name="lf" select="'

'" /> + <xsl:variable name="lf_dash" select="'
-'" /> + <xsl:choose> + <xsl:when test="contains($string, $lf)"> + <xsl:variable name="first" select="substring-before($string, $lf)" /> + <!-- recursively call on remaining string --> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="$first"/> + </xsl:call-template> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="substring-after($string, $lf)" /> + </xsl:call-template> + </xsl:when> + <xsl:when test="contains($string, $lf_dash)"> + <xsl:variable name="first" select="substring-before($string, $lf_dash)" /> + <!-- recursively call on remaining string --> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="$first"/> + </xsl:call-template> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="concat('-',substring-after($string, $lf_dash))" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <para> + <xsl:value-of select="'
'"/> + <xsl:value-of select="$string"/> + <xsl:value-of select="'
'"/> + </para> + <xsl:value-of select="'
'"/> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> +</xsl:template> + + + <xsl:template match="resource-agent" mode="description"> + <refsection> + <title>Description</title> + <xsl:apply-templates mode="description"/> + </refsection> + </xsl:template> + + <xsl:template match="text()" mode="longdesc"> + <xsl:call-template name="break_into_para"> + <xsl:with-param name="string" select="." /> + </xsl:call-template> + </xsl:template> + + <xsl:template match="longdesc" mode="description"> + <xsl:apply-templates mode="longdesc"/> + </xsl:template> + + <xsl:template match="actions" mode="description"> + <xsl:if test="action[@name = 'migrate_from' or @name = 'migrate_to']"> + <para>This resource agent may be configured for <emphasis>native + migration</emphasis> if available in the cluster manager. For + Pacemaker, the + <parameter>allow-migrate="true"</parameter> meta + attribute enables native migration.</para> + </xsl:if> + <xsl:apply-templates mode="longdesc"/> + </xsl:template> + + <!-- Mode Parameters --> + <xsl:template match="resource-agent" mode="parameters"> + <refsection> + <title>Supported Parameters</title> + <xsl:choose> + <xsl:when test="parameters"> + <xsl:apply-templates mode="parameters"/> + </xsl:when> + <xsl:otherwise> + <para> + <xsl:text>This resource agent does not support any parameters.</xsl:text> + </para> + </xsl:otherwise> + </xsl:choose> + </refsection> + </xsl:template> + + <xsl:template match="resource-agent/shortdesc|resource-agent/longdesc" mode="parameters"/> + + <xsl:template match="parameters" mode="parameters"> + <variablelist> + <xsl:apply-templates mode="parameters"/> + </variablelist> + </xsl:template> + + + <xsl:template match="parameter" mode="parameters"> + <varlistentry> + <term> + <option><xsl:value-of select="concat($variable.prefix, @name)"/></option> + </term> + <listitem> + <xsl:apply-templates select="longdesc" mode="parameters"/> + <para> + <xsl:apply-templates select="content" mode="parameters"/> + </para> + </listitem> + </varlistentry> + </xsl:template> + + <xsl:template match="longdesc" mode="parameters"> + <xsl:apply-templates select="node()" mode="longdesc"/> + </xsl:template> + + <xsl:template match="shortdesc" mode="parameters"> + <xsl:apply-templates select="text()" mode="parameters"/> + </xsl:template> + + <xsl:template match="content" mode="parameters"> + <xsl:if test="@type != '' or @default != ''"> + <xsl:text> (</xsl:text> + <xsl:if test="../@unique = 1"> + <xsl:text>unique, </xsl:text> + </xsl:if> + <xsl:choose> + <xsl:when test="../@required = 1"> + <xsl:text>required</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:text>optional</xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:text>, </xsl:text> + <xsl:if test="@parameter != ''"> + <xsl:value-of select="@type"/> + <xsl:text>, </xsl:text> + </xsl:if> + <xsl:if test="@type != ''"> + <xsl:value-of select="@type"/> + <xsl:text>, </xsl:text> + </xsl:if> + <xsl:choose> + <xsl:when test="@default != ''"> + <xsl:text>default </xsl:text> + <xsl:if test="@type = 'string'"> + <xsl:text>"</xsl:text> + </xsl:if> + <code> + <xsl:value-of select="@default"/> + </code> + <xsl:if test="@type = 'string'"> + <xsl:text>"</xsl:text> + </xsl:if> + </xsl:when> + <xsl:when test="@type='boolean' and @default = ''"> + <xsl:text>default </xsl:text> + <code>false</code> + </xsl:when> + <xsl:otherwise> + <xsl:text>no default</xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:text>)</xsl:text> + </xsl:if> + </xsl:template> + + + <!-- Mode Actions --> + <xsl:template match="resource-agent" mode="actions"> + <refsection> + <title>Supported Actions</title> + <xsl:choose> + <xsl:when test="actions"> + <xsl:apply-templates select="actions" mode="actions"/> + </xsl:when> + <xsl:otherwise> + <!-- This should actually never happen. Every RA must + advertise the actions it supports. --> + <para> + <xsl:text>This resource agent does not advertise any supported actions.</xsl:text> + </para> + </xsl:otherwise> + </xsl:choose> + </refsection> + </xsl:template> + + <xsl:template match="actions" mode="actions"> + <para>This resource agent supports the following actions (operations):</para> + <variablelist> + <xsl:apply-templates select="action" mode="actions"/> + </variablelist> + </xsl:template> + + <xsl:template match="action" mode="actions"> + <varlistentry> + <term> + <option> + <xsl:value-of select="@name"/> + <xsl:if test="@role != ''"> + <xsl:text> (</xsl:text> + <xsl:value-of select="@role"/> + <xsl:text> role)</xsl:text> + </xsl:if> + </option> + </term> + <listitem> + <para> + <xsl:choose> + <xsl:when test="@name = 'start'"> + <xsl:text>Starts the resource.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'stop'"> + <xsl:text>Stops the resource.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'status'"> + <xsl:text>Performs a status check.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'monitor'"> + <xsl:text>Performs a detailed status check.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'promote'"> + <xsl:text>Promotes the resource to the Master role.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'demote'"> + <xsl:text>Demotes the resource to the Slave role.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'migrate_from'"> + <xsl:text>Executes steps necessary for migrating the + resource </xsl:text> + <emphasis>away from</emphasis> + <xsl:text> the node.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'migrate_to'"> + <xsl:text>Executes steps necessary for migrating the + resource </xsl:text> + <emphasis>to</emphasis> + <xsl:text> the node.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'validate-all'"> + <xsl:text>Performs a validation of the resource configuration.</xsl:text> + </xsl:when> + <xsl:when test="@name = 'meta-data'"> + <xsl:text>Retrieves resource agent metadata (internal use only).</xsl:text> + </xsl:when> + </xsl:choose> + <xsl:if test="@timeout != ''"> + <xsl:text> Suggested minimum timeout: </xsl:text> + <xsl:value-of select="@timeout"/> + <xsl:text>.</xsl:text> + </xsl:if> + <xsl:if test="@interval != ''"> + <xsl:text> Suggested interval: </xsl:text> + <xsl:value-of select="@interval"/> + <xsl:text>.</xsl:text> + </xsl:if> + </para> + </listitem> + </varlistentry> + </xsl:template> + + + <!-- Mode Example CRM Shell--> + <xsl:template match="resource-agent" mode="examplecrmsh"> + <refsection> + <title>Example CRM Shell</title> + <para> + <xsl:text>The following is an example configuration for a </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> resource using the </xsl:text> + <citerefentry><refentrytitle>crm</refentrytitle><manvolnum>8</manvolnum></citerefentry> + <xsl:text> shell:</xsl:text> + </para> + <programlisting> + <xsl:text>primitive p_</xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> </xsl:text> + <xsl:value-of select="$class"/> + <xsl:text>:</xsl:text> + <xsl:value-of select="$provider"/> + <xsl:text>:</xsl:text> + <xsl:choose> + <xsl:when test="parameters/parameter[@required = 1]"> + <xsl:value-of select="@name"/> + <xsl:text> \ + params \ +</xsl:text> + <xsl:apply-templates select="parameters" mode="examplecrmsh"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="@name"/><xsl:text> \</xsl:text> + </xsl:otherwise> + </xsl:choose> + <!-- Insert a suggested allow-migrate meta attribute if the + resource agent supports migration --> + <xsl:if test="actions/action[@name = 'migrate_from' or @name = 'migrate_to']"> + <xsl:text> + meta allow-migrate="true" \</xsl:text> + </xsl:if> + <xsl:apply-templates select="actions" mode="examplecrmsh"/> + </programlisting> + <!-- Insert a master/slave set definition if the resource + agent supports promotion and demotion --> + <xsl:if test="actions/action/@name = 'promote' and actions/action/@name = 'demote'"> + <programlisting> + <xsl:text>ms ms_</xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> p_</xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> \ + meta notify="true" interleave="true"</xsl:text> + </programlisting> + </xsl:if> + </refsection> + </xsl:template> + + <xsl:template match="parameters" mode="examplecrmsh"> + <xsl:apply-templates select="parameter[@required = 1]" mode="examplecrmsh"/> + </xsl:template> + + <xsl:template match="parameter" mode="examplecrmsh"> + <xsl:text> </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text>=</xsl:text> + <xsl:apply-templates select="content" mode="examplecrmsh"/> + <xsl:text> \</xsl:text> + <xsl:if test="following-sibling::parameter/@required = 1"> + <xsl:text> +</xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="content" mode="examplecrmsh"> + <xsl:choose> + <xsl:when test="@default != ''"> + <xsl:text>"</xsl:text> + <xsl:value-of select="@default"/> + <xsl:text>"</xsl:text> + </xsl:when> + <xsl:otherwise> + <replaceable><xsl:value-of select="@type"/></replaceable> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="actions" mode="examplecrmsh"> + <!-- In the CRM shell example, show only the monitor action --> + <xsl:apply-templates select="action[@name = 'monitor']" mode="examplecrmsh"/> + </xsl:template> + + <xsl:template match="action" mode="examplecrmsh"> + <xsl:text> + op </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> </xsl:text> + <xsl:apply-templates select="@*" mode="examplecrmsh"/> + <xsl:if test="following-sibling::action/@name = 'monitor'"> + <xsl:text>\</xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="action/@*" mode="examplecrmsh"> + <xsl:choose> + <xsl:when test="name() = 'name'"><!-- suppress --></xsl:when> + <xsl:otherwise> + <xsl:value-of select="name()"/> + <xsl:text>="</xsl:text> + <xsl:value-of select="current()"/> + <xsl:text>" </xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:if test="following-sibling::*"> + <xsl:text> </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="longdesc" mode="examplecrmsh"/> + + <xsl:template match="shortdesc" mode="examplecrmsh"/> + + <!-- Mode Example PCS--> + <xsl:template match="resource-agent" mode="examplepcs"> + <refsection> + <title>Example PCS</title> + <para> + <xsl:text>The following is an example configuration for a </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> resource using </xsl:text> + <citerefentry><refentrytitle>pcs</refentrytitle><manvolnum>8</manvolnum></citerefentry> + </para> + <programlisting> + <xsl:text>pcs resource create p_</xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> </xsl:text> + <xsl:value-of select="$class"/> + <xsl:text>:</xsl:text> + <xsl:value-of select="$provider"/> + <xsl:text>:</xsl:text> + <xsl:choose> + <xsl:when test="parameters/parameter[@required = 1]"> + <xsl:value-of select="@name"/> + <xsl:text> \ +</xsl:text> + <xsl:apply-templates select="parameters" mode="examplepcs"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="@name"/><xsl:text> \</xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:apply-templates select="actions" mode="examplepcs"/> + + <!-- Insert a master/slave set definition if the resource + agent supports promotion and demotion --> + <xsl:if test="actions/action/@name = 'promote' and actions/action/@name = 'demote'"> + <xsl:text>promotable</xsl:text> + </xsl:if> + </programlisting> + + </refsection> + </xsl:template> + + <xsl:template match="parameters" mode="examplepcs"> + <xsl:apply-templates select="parameter[@required = 1]" mode="examplepcs"/> + </xsl:template> + + <xsl:template match="parameter" mode="examplepcs"> + <xsl:text> </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text>=</xsl:text> + <xsl:apply-templates select="content" mode="examplepcs"/> + <xsl:text> \</xsl:text> + <xsl:if test="following-sibling::parameter/@required = 1"> + <xsl:text> +</xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="content" mode="examplepcs"> + <xsl:choose> + <xsl:when test="@default != ''"> + <xsl:text>"</xsl:text> + <xsl:value-of select="@default"/> + <xsl:text>"</xsl:text> + </xsl:when> + <xsl:otherwise> + <replaceable><xsl:value-of select="@type"/></replaceable> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="actions" mode="examplepcs"> + <!-- In the CRM shell example, show only the monitor action --> + <xsl:apply-templates select="action[@name = 'monitor']" mode="examplepcs"/> + </xsl:template> + + <xsl:template match="action" mode="examplepcs"> + <xsl:text> + op </xsl:text> + <xsl:value-of select="@name"/> + <xsl:text> </xsl:text> + <xsl:apply-templates select="@*" mode="examplepcs"/> + <xsl:if test="following-sibling::action/@name = 'monitor'"> + <xsl:text>\</xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="action/@*" mode="examplepcs"> + <xsl:choose> + <xsl:when test="name() = 'name'"><!-- suppress --></xsl:when> + <xsl:otherwise> + <xsl:choose> + <xsl:when test="name() != 'depth'"> + <xsl:value-of select="name()"/> + </xsl:when> + <xsl:otherwise> + <xsl:text>OCF_CHECK_LEVEL</xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:text>="</xsl:text> + <xsl:value-of select="current()"/> + <xsl:text>" </xsl:text> + </xsl:otherwise> + </xsl:choose> + <xsl:if test="following-sibling::*"> + <xsl:text> </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="longdesc" mode="examplepcs"/> + <xsl:template match="shortdesc" mode="examplepcs"/> + + <xsl:template match="resource-agent" mode="seealso"> + <refsection> + <title>See also</title> + <para> + <ulink> + <xsl:attribute name="url"> + <xsl:text>http://clusterlabs.org/</xsl:text> + </xsl:attribute> + </ulink> + </para> + </refsection> + </xsl:template> + +</xsl:stylesheet> diff --git a/doc/man/ralist.sh b/doc/man/ralist.sh new file mode 100755 index 0000000..31444b6 --- /dev/null +++ b/doc/man/ralist.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +RADIR=$1 +PREFIX=$2 +SUFFIX=$3 + +find "$RADIR" -type f -executable | while read -r file; do + echo "${PREFIX}$(basename "$file")${SUFFIX}" +done |