diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-03 13:39:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-03 13:39:28 +0000 |
commit | 924f5ea83e48277e014ebf0d19a27187cb93e2f7 (patch) | |
tree | 75920a275bba045f6d108204562c218a9a26ea15 /doc/sphinx | |
parent | Adding upstream version 2.1.7. (diff) | |
download | pacemaker-upstream.tar.xz pacemaker-upstream.zip |
Adding upstream version 2.1.8~rc1.upstream/2.1.8_rc1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'doc/sphinx')
29 files changed, 3667 insertions, 2387 deletions
diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am index e48e19a..b95f47b 100644 --- a/doc/sphinx/Makefile.am +++ b/doc/sphinx/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2003-2023 the Pacemaker project contributors +# Copyright 2003-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -84,7 +84,7 @@ EXTRA_DIST = $(wildcard */*.rst) $(DOTS) $(SVGS) \ # don't cross filesystems, sparse, show progress RSYNC_OPTS = -rlptvzxS --progress -PACKAGE_SERIES=$(shell echo "$VERSION" | awk -F. '{ print $1"."$2 }') +PACKAGE_SERIES=$(shell echo "$(VERSION)" | awk -F. '{ print $$1"."$$2 }') BOOK_RSYNC_DEST = $(RSYNC_DEST)/$(PACKAGE)/doc/$(PACKAGE_SERIES) @@ -120,20 +120,22 @@ INKSCAPE_CMD = $(INKSCAPE) --export-dpi=90 -C # Create the book directory in case this is a VPATH build. $(BOOKS:%=%/conf.py): conf.py.in $(AM_V_at)-$(MKDIR_P) "$(@:%/conf.py=%)" - $(AM_V_GEN)sed \ - -e 's/%VERSION%/$(VERSION)/g' \ - -e 's/%BOOK_ID%/$(@:%/conf.py=%)/g' \ - -e 's/%BOOK_TITLE%/$(subst _, ,$(@:%/conf.py=%))/g' \ - -e 's#%SRC_DIR%#$(abs_srcdir)#g' \ + $(AM_V_GEN)sed \ + -e 's/%VERSION%/$(VERSION)/g' \ + -e 's/%BOOK_ID%/$(@:%/conf.py=%)/g' \ + -e 's/%BOOK_TITLE%/$(subst _, ,$(@:%/conf.py=%))/g' \ + -e 's#%SRC_DIR%#$(abs_srcdir)#g' \ -e 's#%ABS_TOP_SRCDIR%#$(abs_top_srcdir)#g' \ -e 's#%CONFIGDIR%#@CONFIGDIR@#g' \ -e 's#%CRM_BLACKBOX_DIR%#@CRM_BLACKBOX_DIR@#g' \ + -e 's#%CRM_CONFIG_DIR%#@CRM_CONFIG_DIR@#g' \ -e 's#%CRM_DAEMON_GROUP%#@CRM_DAEMON_GROUP@#g' \ -e 's#%CRM_DAEMON_USER%#@CRM_DAEMON_USER@#g' \ -e 's#%CRM_LOG_DIR%#@CRM_LOG_DIR@#g' \ -e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \ -e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \ -e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \ + -e 's#%PCMK__REMOTE_SCHEMA_DIR%#@PCMK__REMOTE_SCHEMA_DIR@#g' \ $(<) > "$@" $(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst) @@ -176,6 +178,15 @@ if BUILD_SPHINX_DOCS "$(RSYNC_DEST)/$(PACKAGE)/doc" endif +.PHONY: vars +vars: + @echo "BOOK_FORMATS='$(BOOK_FORMATS)'" + @echo "PAPER='$(PAPER)'" + @echo "SPHINXFLAGS='$(SPHINXFLAGS)'" + @echo "RSYNC_DEST='$(RSYNC_DEST)'" + @echo "VERSION='$(VERSION)'" + @echo "PACKAGE_SERIES='$(PACKAGE_SERIES)'" + .PHONY: all-local all-local: if BUILD_SPHINX_DOCS diff --git a/doc/sphinx/Pacemaker_Administration/agents.rst b/doc/sphinx/Pacemaker_Administration/agents.rst index e5b17e2..34bea60 100644 --- a/doc/sphinx/Pacemaker_Administration/agents.rst +++ b/doc/sphinx/Pacemaker_Administration/agents.rst @@ -53,123 +53,143 @@ _______ All OCF resource agents are required to implement the following actions. -.. table:: **Required Actions for OCF Agents** - - +--------------+-------------+------------------------------------------------+ - | Action | Description | Instructions | - +==============+=============+================================================+ - | start | Start the | .. index:: | - | | resource | single: OCF resource agent; start | - | | | single: start action | - | | | | - | | | Return 0 on success and an appropriate | - | | | error code otherwise. Must not report | - | | | success until the resource is fully | - | | | active. | - +--------------+-------------+------------------------------------------------+ - | stop | Stop the | .. index:: | - | | resource | single: OCF resource agent; stop | - | | | single: stop action | - | | | | - | | | Return 0 on success and an appropriate | - | | | error code otherwise. Must not report | - | | | success until the resource is fully | - | | | stopped. | - +--------------+-------------+------------------------------------------------+ - | monitor | Check the | .. index:: | - | | resource's | single: OCF resource agent; monitor | - | | state | single: monitor action | - | | | | - | | | Exit 0 if the resource is running, 7 | - | | | if it is stopped, and any other OCF | - | | | exit code if it is failed. NOTE: The | - | | | monitor script should test the state | - | | | of the resource on the local machine | - | | | only. | - +--------------+-------------+------------------------------------------------+ - | meta-data | Describe | .. index:: | - | | the | single: OCF resource agent; meta-data | - | | resource | single: meta-data action | - | | | | - | | | Provide information about this | - | | | resource in the XML format defined by | - | | | the OCF standard. Exit with 0. NOTE: | - | | | This is *not* required to be performed | - | | | as root. | - +--------------+-------------+------------------------------------------------+ +.. list-table:: **Required Actions for OCF Agents** + :class: longtable + :widths: 1 4 3 + :header-rows: 1 + + * - Action + - Description + - Instructions + * - .. _start_action: + + .. index:: + single: OCF resource agent; start + single: start action + + start + - Start the resource + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` on success and an appropriate + error code otherwise. Must not report success until the resource is fully + active. + * - .. _stop_action: + + .. index:: + single: OCF resource agent; stop + single: stop action + + stop + - Stop the resource + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` on success and an appropriate + error code otherwise. Must not report success until the resource is fully + stopped. + * - .. _monitor_action: + + .. index:: + single: OCF resource agent; monitor + single: monitor action + + monitor + - Check the resource's state + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` if the resource is running, + :ref:`OCF_NOT_RUNNING <OCF_NOT_RUNNING>` if it is stopped, and any other + :ref:`OCF exit code <ocf_return_codes>` if it is failed. **Note:** The + monitor action should test the state of the resource on the local machine + only. + * - .. _meta_data_action: + + .. index:: + single: OCF resource agent; meta-data + single: meta-data action + + meta-data + - Describe the resource + - Provide information about this resource in the XML format defined by the + OCF standard. Return :ref:`OCF_SUCCESS <OCF_SUCCESS>`. **Note:** This is + *not* required to be performed as root. OCF resource agents may optionally implement additional actions. Some are used only with advanced resource types such as clones. -.. table:: **Optional Actions for OCF Resource Agents** - - +--------------+-------------+------------------------------------------------+ - | Action | Description | Instructions | - +==============+=============+================================================+ - | validate-all | This should | .. index:: | - | | validate | single: OCF resource agent; validate-all | - | | the | single: validate-all action | - | | instance | | - | | parameters | Return 0 if parameters are valid, 2 if | - | | provided. | not valid, and 6 if resource is not | - | | | configured. | - +--------------+-------------+------------------------------------------------+ - | promote | Bring the | .. index:: | - | | local | single: OCF resource agent; promote | - | | instance of | single: promote action | - | | a promotable| | - | | clone | Return 0 on success | - | | resource to | | - | | the promoted| | - | | role. | | - +--------------+-------------+------------------------------------------------+ - | demote | Bring the | .. index:: | - | | local | single: OCF resource agent; demote | - | | instance of | single: demote action | - | | a promotable| | - | | clone | Return 0 on success | - | | resource to | | - | | the | | - | | unpromoted | | - | | role. | | - +--------------+-------------+------------------------------------------------+ - | notify | Used by the | .. index:: | - | | cluster to | single: OCF resource agent; notify | - | | send | single: notify action | - | | the agent | | - | | pre- and | Must not fail. Must exit with 0 | - | | post- | | - | | notification| | - | | events | | - | | telling the | | - | | resource | | - | | what has | | - | | happened and| | - | | will happen.| | - +--------------+-------------+------------------------------------------------+ - | reload | Reload the | .. index:: | - | | service's | single: OCF resource agent; reload | - | | own | single: reload action | - | | config. | | - | | | Not used by Pacemaker | - +--------------+-------------+------------------------------------------------+ - | reload-agent | Make | .. index:: | - | | effective | single: OCF resource agent; reload-agent | - | | any changes | single: reload-agent action | - | | in instance | | - | | parameters | This is used when the agent can handle a | - | | marked as | change in some of its parameters more | - | | reloadable | efficiently than stopping and starting the | - | | in the | resource. | - | | agent's | | - | | meta-data. | | - +--------------+-------------+------------------------------------------------+ - | recover | Restart the | .. index:: | - | | service. | single: OCF resource agent; recover | - | | | single: recover action | - | | | | - | | | Not used by Pacemaker | - +--------------+-------------+------------------------------------------------+ +.. list-table:: **Optional Actions for OCF Resource Agents** + :class: longtable: + :widths: 1 4 3 + :header-rows: 1 + + * - Action + - Description + - Instructions + * - .. _validate_all_action: + + .. index:: + single: OCF resource agent; validate-all + single: validate-all action + + validate-all + - Validate the instance parameters provided. + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` if parameters are valid, + :ref:`OCF_ERR_ARGS <OCF_ERR_ARGS>` if not valid, and + :ref:`OCF_ERR_CONFIGURED <OCF_ERR_CONFIGURED>` if resource is not + configured. + * - .. _promote_action: + + .. index:: + single: OCF resource agent; promote + single: promote action + + promote + - Bring the local instance of a promotable clone resource to the promoted + role. + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` on success. + * - .. _demote_action: + + .. index:: + single: OCF resource agent; demote + single: demote action + + demote + - Bring the local instance of a promotable clone resource to the unpromoted + role. + - Return :ref:`OCF_SUCCESS <OCF_SUCCESS>` on success. + * - .. _notify_action: + + .. index:: + single: OCF resource agent; notify + single: notify action + + notify + - Used by the cluster to send the agent pre- and post-notification events + telling the resource what has happened and what will happen. + - Must not fail. Must return :ref:`OCF_SUCCESS <OCF_SUCCESS>`. + * - .. _reload_action: + + .. index:: + single: OCF resource agent; reload + single: reload action + + reload + - Reload the service's own configuration. + - Not used by Pacemaker. + * - .. _reload_agent_action: + + .. index:: + single: OCF resource agent; reload-agent + single: reload-agent action + + reload-agent + - Make effective any changes in instance parameters marked as reloadable in + the agent's meta-data. + - This is used when the agent can handle a change in some of its parameters + more efficiently than stopping and starting the resource. + * - .. _recover_action: + + .. index:: + single: OCF resource agent; recover + single: recover action + + recover + - Restart the service. + - Not used by Pacemaker. .. important:: @@ -180,159 +200,214 @@ only with advanced resource types such as clones. .. index:: single: OCF resource agent; return code -How are OCF Return Codes Interpreted? +How Are OCF Return Codes Interpreted? _____________________________________ -The first thing the cluster does is to check the return code against -the expected result. If the result does not match the expected value, -then the operation is considered to have failed, and recovery action is -initiated. +The first thing the cluster does is to check the return code against the +expected result. If the result does not match the expected value, then the +operation is considered to have failed, and recovery action is initiated. There are three types of failure recovery: -.. table:: **Types of recovery performed by the cluster** - - +-------+--------------------------------------------+--------------------------------------+ - | Type | Description | Action Taken by the Cluster | - +=======+============================================+======================================+ - | soft | .. index:: | Restart the resource or move it to a | - | | single: OCF resource agent; soft error | new location | - | | | | - | | A transient error occurred | | - +-------+--------------------------------------------+--------------------------------------+ - | hard | .. index:: | Move the resource elsewhere and | - | | single: OCF resource agent; hard error | prevent it from being retried on the | - | | | current node | - | | A non-transient error that | | - | | may be specific to the | | - | | current node | | - +-------+--------------------------------------------+--------------------------------------+ - | fatal | .. index:: | Stop the resource and prevent it | - | | single: OCF resource agent; fatal error | from being started on any cluster | - | | | node | - | | A non-transient error that | | - | | will be common to all | | - | | cluster nodes (e.g. a bad | | - | | configuration was specified) | | - +-------+--------------------------------------------+--------------------------------------+ +.. list-table:: **Types of Recovery Performed by the Cluster** + :class: longtable + :widths: 1 5 5 + :header-rows: 1 + + * - Type + - Description + - Action Taken by the Cluster + * - .. _soft_error: + + .. index:: + single: OCF resource agent; soft error + + soft + - A transient error + - Restart the resource or move it to a new location + * - .. _hard_error: + + .. index:: + single: OCF resource agent; hard error + + hard + - A non-transient error that may be specific to the current node + - Move the resource elsewhere and prevent it from being retried on the + current node + * - .. _fatal_error: + + .. index:: + single: OCF resource agent; fatal error + + fatal + - A non-transient error that will be common to all cluster nodes (for + example, a bad configuration was specified) + - Stop the resource and prevent it from being started on any cluster node .. _ocf_return_codes: OCF Return Codes ________________ -The following table outlines the different OCF return codes and the type of +The following table outlines the various OCF return codes and the type of recovery the cluster will initiate when a failure code is received. Although -counterintuitive, even actions that return 0 (aka. ``OCF_SUCCESS``) can be -considered to have failed, if 0 was not the expected return value. - -.. table:: **OCF Exit Codes and their Recovery Types** - - +-------+-----------------------+---------------------------------------------------+----------+ - | Exit | OCF Alias | Description | Recovery | - | Code | | | | - +=======+=======================+===================================================+==========+ - | 0 | OCF_SUCCESS | .. index:: | soft | - | | | single: OCF_SUCCESS | | - | | | single: OCF return code; OCF_SUCCESS | | - | | | pair: OCF return code; 0 | | - | | | | | - | | | Success. The command completed successfully. | | - | | | This is the expected result for all start, | | - | | | stop, promote and demote commands. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 1 | OCF_ERR_GENERIC | .. index:: | soft | - | | | single: OCF_ERR_GENERIC | | - | | | single: OCF return code; OCF_ERR_GENERIC | | - | | | pair: OCF return code; 1 | | - | | | | | - | | | Generic "there was a problem" error code. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 2 | OCF_ERR_ARGS | .. index:: | hard | - | | | single: OCF_ERR_ARGS | | - | | | single: OCF return code; OCF_ERR_ARGS | | - | | | pair: OCF return code; 2 | | - | | | | | - | | | The resource's parameter values are not valid on | | - | | | this machine (for example, a value refers to a | | - | | | file not found on the local host). | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 3 | OCF_ERR_UNIMPLEMENTED | .. index:: | hard | - | | | single: OCF_ERR_UNIMPLEMENTED | | - | | | single: OCF return code; OCF_ERR_UNIMPLEMENTED | | - | | | pair: OCF return code; 3 | | - | | | | | - | | | The requested action is not implemented. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 4 | OCF_ERR_PERM | .. index:: | hard | - | | | single: OCF_ERR_PERM | | - | | | single: OCF return code; OCF_ERR_PERM | | - | | | pair: OCF return code; 4 | | - | | | | | - | | | The resource agent does not have | | - | | | sufficient privileges to complete the task. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 5 | OCF_ERR_INSTALLED | .. index:: | hard | - | | | single: OCF_ERR_INSTALLED | | - | | | single: OCF return code; OCF_ERR_INSTALLED | | - | | | pair: OCF return code; 5 | | - | | | | | - | | | The tools required by the resource are | | - | | | not installed on this machine. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 6 | OCF_ERR_CONFIGURED | .. index:: | fatal | - | | | single: OCF_ERR_CONFIGURED | | - | | | single: OCF return code; OCF_ERR_CONFIGURED | | - | | | pair: OCF return code; 6 | | - | | | | | - | | | The resource's parameter values are inherently | | - | | | invalid (for example, a required parameter was | | - | | | not given). | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 7 | OCF_NOT_RUNNING | .. index:: | N/A | - | | | single: OCF_NOT_RUNNING | | - | | | single: OCF return code; OCF_NOT_RUNNING | | - | | | pair: OCF return code; 7 | | - | | | | | - | | | The resource is safely stopped. This should only | | - | | | be returned by monitor actions, not stop actions. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 8 | OCF_RUNNING_PROMOTED | .. index:: | soft | - | | | single: OCF_RUNNING_PROMOTED | | - | | | single: OCF return code; OCF_RUNNING_PROMOTED | | - | | | pair: OCF return code; 8 | | - | | | | | - | | | The resource is running in the promoted role. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 9 | OCF_FAILED_PROMOTED | .. index:: | soft | - | | | single: OCF_FAILED_PROMOTED | | - | | | single: OCF return code; OCF_FAILED_PROMOTED | | - | | | pair: OCF return code; 9 | | - | | | | | - | | | The resource is (or might be) in the promoted | | - | | | role but has failed. The resource will be | | - | | | demoted, stopped and then started (and possibly | | - | | | promoted) again. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 190 | OCF_DEGRADED | .. index:: | none | - | | | single: OCF_DEGRADED | | - | | | single: OCF return code; OCF_DEGRADED | | - | | | pair: OCF return code; 190 | | - | | | | | - | | | The resource is properly active, but in such a | | - | | | condition that future failures are more likely. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | 191 | OCF_DEGRADED_PROMOTED | .. index:: | none | - | | | single: OCF_DEGRADED_PROMOTED | | - | | | single: OCF return code; OCF_DEGRADED_PROMOTED | | - | | | pair: OCF return code; 191 | | - | | | | | - | | | The resource is properly active in the promoted | | - | | | role, but in such a condition that future | | - | | | failures are more likely. | | - +-------+-----------------------+---------------------------------------------------+----------+ - | other | *none* | Custom error code. | soft | - +-------+-----------------------+---------------------------------------------------+----------+ +counterintuitive, even actions that return ``OCF_SUCCESS`` can be considered to +have failed, if ``OCF_SUCCESS`` was not the expected return value. + +.. list-table:: **OCF Exit Codes and Their Recovery Types** + :class: longtable + :widths: 1 3 6 2 + :header-rows: 1 + + * - Exit Code + - OCF Alias + - Description + - Recovery + * - .. _OCF_SUCCESS: + + .. index:: + single: OCF_SUCCESS + single: OCF return code; OCF_SUCCESS + pair: OCF return code; 0 + + 0 + - OCF_SUCCESS + - Success. The command completed successfully. This is the expected result + for all start, stop, promote, and demote actions. + - :ref:`soft <soft_error>` + * - .. _OCF_ERR_GENERIC: + + .. index:: + single: OCF_ERR_GENERIC + single: OCF return code; OCF_ERR_GENERIC + pair: OCF return code; 1 + + 1 + - OCF_ERR_GENERIC + - Generic "there was a problem" error code. + - :ref:`hard <hard_error>` + * - .. _OCF_ERR_ARGS: + + .. index:: + single: OCF_ERR_ARGS + single: OCF return code; OCF_ERR_ARGS + pair: OCF return code; 2 + + 2 + - OCF_ERR_ARGS + - The resource's parameter values are not valid on this machine (for + example, a value refers to a file not found on the local host). + - :ref:`hard <hard_error>` + * - .. _OCF_ERR_UNIMPLEMENTED: + + .. index:: + single: OCF_ERR_UNIMPLEMENTED + single: OCF return code; OCF_ERR_UNIMPLEMENTED + pair: OCF return code; 3 + + 3 + - OCF_ERR_UNIMPLEMENTED + - The requested action is not implemented. + - :ref:`hard <hard_error>` + * - .. _OCF_ERR_PERM: + + .. index:: + single: OCF_ERR_PERM + single: OCF return code; OCF_ERR_PERM + pair: OCF return code; 4 + + 4 + - OCF_ERR_PERM + - The resource agent does not have sufficient privileges to complete the + task. + - :ref:`hard <hard_error>` + * - .. _OCF_ERR_INSTALLED: + + .. index:: + single: OCF_ERR_INSTALLED + single: OCF return code; OCF_ERR_INSTALLED + pair: OCF return code; 5 + + 5 + - OCF_ERR_INSTALLED + - The tools required by the resource are not installed on this machine. + - :ref:`hard <hard_error>` + * - .. _OCF_ERR_CONFIGURED: + + .. index:: + single: OCF_ERR_CONFIGURED + single: OCF return code; OCF_ERR_CONFIGURED + pair: OCF return code; 6 + + 6 + - OCF_ERR_CONFIGURED + - The resource's parameter values are inherently invalid (for example, a + required parameter was not given). + - :ref:`fatal <fatal_error>` + * - .. _OCF_NOT_RUNNING: + + .. index:: + single: OCF_NOT_RUNNING + single: OCF return code; OCF_NOT_RUNNING + pair: OCF return code; 7 + + 7 + - OCF_NOT_RUNNING + - The resource is safely stopped. This should only be returned by monitor + actions, not stop actions. + - N/A + * - .. _OCF_RUNNING_PROMOTED: + + .. index:: + single: OCF_RUNNING_PROMOTED + single: OCF return code; OCF_RUNNING_PROMOTED + pair: OCF return code; 8 + + 8 + - OCF_RUNNING_PROMOTED + - The resource is running in the promoted role. + - :ref:`soft <soft_error>` + * - .. _OCF_FAILED_PROMOTED: + + .. index:: + single: OCF_FAILED_PROMOTED + single: OCF return code; OCF_FAILED_PROMOTED + pair: OCF return code; 9 + + 9 + - OCF_FAILED_PROMOTED + - The resource is (or might be) in the promoted role but has failed. The + resource will be demoted, stopped, and then started (and possibly + promoted) again. + - :ref:`soft <soft_error>` + * - .. _OCF_DEGRADED: + + .. index:: + single: OCF_DEGRADED + single: OCF return code; OCF_DEGRADED + pair: OCF return code; 190 + + 190 + - OCF_DEGRADED + - The resource is properly active, but in such a condition that future + failures are more likely. + - none + * - .. _OCF_DEGRADED_PROMOTED: + + .. index:: + single: OCF_DEGRADED_PROMOTED + single: OCF return code; OCF_DEGRADED_PROMOTED + pair: OCF return code; 191 + + 191 + - OCF_DEGRADED_PROMOTED + - The resource is properly active in the promoted role, but in such a + condition that future failures are more likely. + - none + * - other + - *none* + - Custom error code. + - soft Exceptions to the recovery handling described above: @@ -347,6 +422,670 @@ Exceptions to the recovery handling described above: if they had returned success, but status output will indicate that the resource is degraded. +.. _ocf_env_vars: + +Environment Variables +_____________________ + +Pacemaker sets certain environment variables when it executes an OCF resource +agent. Agents can check these variables to get information about resource +parameters or the execution environment. + +**Note:** Pacemaker may set other environment variables for its own purposes. +They may be present in the agent's environment, but Pacemaker is not providing +them for the agent's use, and so the agent should not rely on any variables not +listed in the table below. + +.. list-table:: **OCF Environment Variables** + :class: longtable + :widths: 1 6 + :header-rows: 1 + + * - Environment Variable + - Description + * - .. _OCF_CHECK_LEVEL: + + .. index:: + single: OCF_CHECK_LEVEL + single: environment variable; OCF_CHECK_LEVEL + + OCF_CHECK_LEVEL + - Requested intensity level of checks in ``monitor`` and ``validate-all`` + actions. Usually set as an operation attribute; see Pacemaker Explained + for an example. + * - .. _OCF_EXIT_REASON_PREFIX: + + .. index:: + single: OCF_EXIT_REASON_PREFIX + single: environment variable; OCF_EXIT_REASON_PREFIX + + OCF_EXIT_REASON_PREFIX + - Prefix for printing fatal error messages from the resource agent. + * - .. _OCF_RA_VERSION_MAJOR: + + .. index:: + single: OCF_RA_VERSION_MAJOR + single: environment variable; OCF_RA_VERSION_MAJOR + + OCF_RA_VERSION_MAJOR + - Major version number of the OCF Resource Agent API. If the script does + not support this revision, it should report an error. + See the `OCF specification <http://standards.clusterlabs.org>`_ for an + explanation of the versioning scheme used. The version number is split + into two numbers for ease of use in shell scripts. These two may be used + by the agent to determine whether it is run under an OCF-compliant + resource manager. + * - .. _OCF_RA_VERSION_MINOR: + + .. index:: + single: OCF_RA_VERSION_MINOR + single: environment variable; OCF_RA_VERSION_MINOR + + OCF_RA_VERSION_MINOR + - Minor version number of the OCF Resource Agent API. See + :ref:`OCF_RA_VERSION_MAJOR <OCF_RA_VERSION_MAJOR>` for more details. + * - .. _OCF_RESKEY_crm_feature_set: + + .. index:: + single: OCF_RESKEY_crm_feature_set + single: environment variable; OCF_RESKEY_crm_feature_set + + OCF_RESKEY_crm_feature_set + - ``crm_feature_set`` on the DC (or on the local node, if the agent is run + by ``crm_resource``). + * - .. _OCF_RESKEY_CRM_meta_interval: + + .. index:: + single: OCF_RESKEY_CRM_meta_interval + single: environment variable; OCF_RESKEY_CRM_meta_interval + + OCF_RESKEY_CRM_meta_interval + - Interval (in milliseconds) of the current operation. + * - .. _OCF_RESKEY_CRM_meta_name: + + .. index:: + single: OCF_RESKEY_CRM_meta_name + single: environment variable; OCF_RESKEY_CRM_meta_name + + OCF_RESKEY_CRM_meta_name + - Name of the current operation. + * - .. _OCF_RESKEY_CRM_meta_notify: + + .. index:: + single: OCF_RESKEY_CRM_meta_notify_* + single: environment variable; OCF_RESKEY_CRM_meta_notify_* + + OCF_RESKEY_CRM_meta_notify_* + - See :ref:`Clone Notifications <clone_notifications>`. + * - .. _OCF_RESKEY_CRM_meta_on_node: + + .. index:: + single: OCF_RESKEY_CRM_meta_on_node + single: environment variable; OCF_RESKEY_CRM_meta_on_node + + OCF_RESKEY_CRM_meta_on_node + - Name of the node where the current operation is running. + * - .. _OCF_RESKEY_CRM_meta_on_node_uuid: + + .. index:: + single: OCF_RESKEY_CRM_meta_on_node_uuid + single: environment variable; OCF_RESKEY_CRM_meta_on_node_uuid + + OCF_RESKEY_CRM_meta_on_node_uuid + - Cluster-layer ID of the node where the current operation is running (or + node name for Pacemaker Remote nodes). + * - .. _OCF_RESKEY_CRM_meta_physical_host: + + .. index:: + single: OCF_RESKEY_CRM_meta_physical_host + single: environment variable; OCF_RESKEY_CRM_meta_physical_host + + OCF_RESKEY_CRM_meta_physical_host + - If the node where the current operation is running is a guest node, the + host on which the container is running. + * - .. _OCF_RESKEY_CRM_meta_timeout: + + .. index:: + single: OCF_RESKEY_CRM_meta_timeout + single: environment variable; OCF_RESKEY_CRM_meta_timeout + + OCF_RESKEY_CRM_meta_timeout + - Timeout (in milliseconds) of the current operation. + * - .. _OCF_RESKEY_CRM_meta: + + .. index:: + single: OCF_RESKEY_CRM_meta_* + single: environment variable; OCF_RESKEY_CRM_meta_* + + OCF_RESKEY_CRM_meta_* + - Each of a resource's meta-attributes is converted to an environment + variable prefixed with "OCF_RESKEY_CRM_meta\_". See Pacemaker Explained + for some meta-attributes that have special meaning to Pacemaker. + * - .. _OCF_RESKEY: + + .. index:: + single: OCF_RESKEY_* + single: environment variable; OCF_RESKEY_* + + OCF_RESKEY_* + - Each of a resource's instance parameters is converted to an environment + variable prefixed with "OCF_RESKEY\_". + * - .. _OCF_RESOURCE_INSTANCE: + + .. index:: + single: OCF_RESOURCE_INSTANCE + single: environment variable; OCF_RESOURCE_INSTANCE + + OCF_RESOURCE_INSTANCE + - The name of the resource instance. + * - .. _OCF_RESOURCE_PROVIDER: + + .. index:: + single: OCF_RESOURCE_PROVIDER + single: environment variable; OCF_RESOURCE_PROVIDER + + OCF_RESOURCE_PROVIDER + - The name of the resource agent provider. + * - .. _OCF_RESOURCE_TYPE: + + .. index:: + single: OCF_RESOURCE_TYPE + single: environment variable; OCF_RESOURCE_TYPE + + OCF_RESOURCE_TYPE + - The name of the resource type. + * - .. _OCF_ROOT: + + .. index:: + single: OCF_ROOT + single: environment variable; OCF_ROOT + + OCF_ROOT + - The root of the OCF directory hierarchy. + * - .. _OCF_TRACE_FILE: + + .. index:: + single: OCF_TRACE_FILE + single: environment variable; OCF_TRACE_FILE + + OCF_TRACE_FILE + - The absolute path or file descriptor to write trace output to, if + ``OCF_TRACE_RA`` is set to true. Pacemaker sets this only to + ``/dev/stderr`` and only when running a resource agent via + ``crm_resource``. + * - .. _OCF_TRACE_RA: + + .. index:: + single: OCF_TRACE_RA + single: environment variable; OCF_TRACE_RA + + OCF_TRACE_RA + - If set to true, enable tracing of the resource agent. Trace output is + written to ``OCF_TRACE_FILE`` if set; otherwise, it's written to a file + in ``OCF_RESKEY_trace_dir`` if set or in a default directory if not. + Pacemaker sets this to true only when running a resource agent via + ``crm_resource`` with one or more ``-V`` flags. + * - .. _PCMK_DEBUGLOG: + .. _HA_DEBUGLOG: + + .. index:: + single: PCMK_DEBUGLOG + single: environment variable; PCMK_DEBUGLOG + single: HA_DEBUGLOG + single: environment variable; HA_DEBUGLOG + + PCMK_DEBUGLOG (and HA_DEBUGLOG) + - Where to write resource agent debug logs. Pacemaker sets this to + ``PCMK_logfile`` if set to a value other than ``none`` and if debugging + is enabled for the executor. + * - .. _PCMK_LOGFACILITY: + .. _HA_LOGFACILITY: + + .. index:: + single: PCMK_LOGFACILITY + single: environment variable; PCMK_LOGFACILITY + single: HA_LOGFACILITY + single: environment variable; HA_LOGFACILITY + + PCMK_LOGFACILITY (and HA_LOGFACILITY) + - Syslog facility for resource agent logs. Pacemaker sets this to + ``PCMK_logfacility`` if set to a value other than ``none`` or + ``/dev/null``. + * - .. _PCMK_LOGFILE: + .. _HA_LOGFILE:: + + .. index:: + single: PCMK_LOGFILE: + single: environment variable; PCMK_LOGFILE: + single: HA_LOGFILE: + single: environment variable; HA_LOGFILE: + + PCMK_LOGFILE (and HA_LOGFILE) + - Where to write resource agent logs. Pacemaker sets this to + ``PCMK_logfile`` if set to a value other than ``none``. + * - .. _PCMK_service: + + .. index:: + single: PCMK_service + single: environment variable; PCMK_service + + PCMK_service + - The name of the Pacemaker subsystem or command-line tool that's executing + the resource agent. Specific values are subject to change; useful mainly + for logging. + +Clone Resource Agent Requirements +_________________________________ + +Any resource can be used as an anonymous clone, as it requires no additional +support from the resource agent. Whether it makes sense to do so depends on your +resource and its resource agent. + +Resource Agent Requirements for Globally Unique Clones +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Globally unique clones require additional support in the resource agent. In +particular, it must respond with ``OCF_SUCCESS`` only if the node has that exact +instance active. All other probes for instances of the clone should result in +``OCF_NOT_RUNNING`` (or one of the other OCF error codes if they are failed). + +Individual instances of a clone are identified by appending a colon and a +numerical offset (for example, ``apache:2``). + +A resource agent can find out how many copies there are by examining the +``OCF_RESKEY_CRM_meta_clone_max`` environment variable and which instance it is +by examining ``OCF_RESKEY_CRM_meta_clone``. + +The resource agent must not make any assumptions (based on +``OCF_RESKEY_CRM_meta_clone``) about which numerical instances are active. In +particular, the list of active copies is not always an unbroken sequence, nor +does it always start at 0. + +Resource Agent Requirements for Promotable Clones +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Promotable clone resources require two extra actions, ``demote`` and ``promote``, +which are responsible for changing the state of the resource. Like ``start`` and +``stop``, they should return ``OCF_SUCCESS`` if they completed successfully or a +relevant error code if they did not. + +The states can mean whatever you wish, but when the resource is started, it must +begin in the unpromoted role. From there, the cluster will decide which +instances to promote. + +In addition to the clone requirements for monitor actions, agents must also +*accurately* report which state they are in. The cluster relies on the agent to +report its status (including role) accurately and does not indicate to the agent +what role it currently believes it to be in. + +.. list-table:: **Role Implications of OCF Return Codes** + :class: longtable + :widths: 1 3 + :header-rows: 1 + + * - Monitor Return Code + - Description + * - :ref:`OCF_NOT_RUNNING <OCF_NOT_RUNNING>` + - .. index:: + single: OCF_NOT_RUNNING + single: OCF return code; OCF_NOT_RUNNING + + Stopped + * - :ref:`OCF_SUCCESS <OCF_SUCCESS>` + - .. index:: + single: OCF_SUCCESS + single: OCF return code; OCF_SUCCESS + + Running (Unpromoted) + * - :ref:`OCF_RUNNING_PROMOTED <OCF_RUNNING_PROMOTED>` + - .. index:: + single: OCF_RUNNING_PROMOTED + single: OCF return code; OCF_RUNNING_PROMOTED + + Running (Promoted) + * - :ref:`OCF_FAILED_PROMOTED <OCF_FAILED_PROMOTED>` + - .. index:: + single: OCF_FAILED_PROMOTED + single: OCF return code; OCF_FAILED_PROMOTED + + Failed (Promoted) + * - Other + - Failed (Unpromoted) + +.. _clone_notifications: + +Clone Notifications +~~~~~~~~~~~~~~~~~~~ + +If the clone has the ``notify`` meta-attribute set to ``true`` and the resource +agent supports the ``notify`` action, Pacemaker will call the action when +appropriate, passing a number of extra variables. These variables, when combined +with additional context, can be used to calculate the current state of the +cluster and what is about to happen to it. + +.. index:: + single: clone; environment variables + single: notify; environment variables + +.. list-table:: **Environment Variables Supplied with Clone Notify Actions** + :class: longtable + :widths: 1 1 + :header-rows: 1 + + * - Variable + - Description + * - .. _OCF_RESKEY_CRM_meta_notify_type: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_type + single: OCF_RESKEY_CRM_meta_notify_type + + OCF_RESKEY_CRM_meta_notify_type + - Allowed values: ``pre``, ``post`` + * - .. _OCF_RESKEY_CRM_meta_notify_operation: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_operation + single: OCF_RESKEY_CRM_meta_notify_operation + + OCF_RESKEY_CRM_meta_notify_operation + - Allowed values: ``start``, ``stop`` + * - .. _OCF_RESKEY_CRM_meta_notify_start_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_start_resource + single: OCF_RESKEY_CRM_meta_notify_start_resource + + OCF_RESKEY_CRM_meta_notify_start_resource + - Resources to be started + * - .. _OCF_RESKEY_CRM_meta_notify_stop_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_stop_resource + single: OCF_RESKEY_CRM_meta_notify_stop_resource + + OCF_RESKEY_CRM_meta_notify_stop_resource + - Resources to be stopped + * - .. _OCF_RESKEY_CRM_meta_notify_active_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_active_resource + single: OCF_RESKEY_CRM_meta_notify_active_resource + + OCF_RESKEY_CRM_meta_notify_active_resource + - Resources that are running + * - .. _OCF_RESKEY_CRM_meta_notify_inactive_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_inactive_resource + single: OCF_RESKEY_CRM_meta_notify_inactive_resource + + OCF_RESKEY_CRM_meta_notify_inactive_resource + - Resources that are not running + * - .. _OCF_RESKEY_CRM_meta_notify_start_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_start_uname + single: OCF_RESKEY_CRM_meta_notify_start_uname + + OCF_RESKEY_CRM_meta_notify_start_uname + - Nodes on which resources will be started + * - .. _OCF_RESKEY_CRM_meta_notify_stop_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_stop_uname + single: OCF_RESKEY_CRM_meta_notify_stop_uname + + OCF_RESKEY_CRM_meta_notify_stop_uname + - Nodes on which resources will be stopped + * - .. _OCF_RESKEY_CRM_meta_notify_active_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_active_uname + single: OCF_RESKEY_CRM_meta_notify_active_uname + + OCF_RESKEY_CRM_meta_notify_active_uname + - Nodes on which resources are running + +The variables come in pairs, such as +``OCF_RESKEY_CRM_meta_notify_start_resource`` and +``OCF_RESKEY_CRM_meta_notify_start_uname``, and should be treated as an array of +whitespace-separated elements. + +``OCF_RESKEY_CRM_meta_notify_inactive_resource`` is an exception, as the +matching ``uname`` variable does not exist since inactive resources are not +running on any node. + +Thus, in order to indicate that ``clone:0`` will be started on ``sles-1``, +``clone:2`` will be started on ``sles-3``, and ``clone:3`` will be started +on ``sles-2``, the cluster would set: + +.. topic:: Notification Variables + + .. code-block:: none + + OCF_RESKEY_CRM_meta_notify_start_resource="clone:0 clone:2 clone:3" + OCF_RESKEY_CRM_meta_notify_start_uname="sles-1 sles-3 sles-2" + +.. note:: + + Pacemaker will log but otherwise ignore failures of notify actions. + +Interpretation of Notification Variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Pre-notification (stop):** + +* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` +* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +**Post-notification (stop) / Pre-notification (start):** + +* Active resources + * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Inactive resources + * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +**Post-notification (start):** + +* Active resources: + * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Inactive resources: + * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +Extra Notifications for Promotable Clones +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. index:: + single: clone; environment variables + single: promotable; environment variables + +.. list-table:: **Extra Environment Variables Supplied for Promotable Clones** + :class: longtable + :widths: 1 1 + :header-rows: 1 + + * - Variable + - Description + * - .. _OCF_RESKEY_CRM_meta_notify_promoted_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_promoted_resource + single: OCF_RESKEY_CRM_meta_notify_promoted_resource + + OCF_RESKEY_CRM_meta_notify_promoted_resource + - Resources that are running in the promoted role + * - .. _OCF_RESKEY_CRM_meta_notify_unpromoted_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_unpromoted_resource + single: OCF_RESKEY_CRM_meta_notify_unpromoted_resource + + OCF_RESKEY_CRM_meta_notify_unpromoted_resource + - Resources that are running in the unpromoted role + * - .. _OCF_RESKEY_CRM_meta_notify_promote_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_promote_resource + single: OCF_RESKEY_CRM_meta_notify_promote_resource + + OCF_RESKEY_CRM_meta_notify_promote_resource + - Resources to be promoted + * - .. _OCF_RESKEY_CRM_meta_notify_demote_resource: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_demote_resource + single: OCF_RESKEY_CRM_meta_notify_demote_resource + + OCF_RESKEY_CRM_meta_notify_demote_resource + - Resources to be demoted + * - .. _OCF_RESKEY_CRM_meta_notify_promote_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_promote_uname + single: OCF_RESKEY_CRM_meta_notify_promote_uname + + OCF_RESKEY_CRM_meta_notify_promote_uname + - Nodes on which resources will be promoted + * - .. _OCF_RESKEY_CRM_meta_notify_demote_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_demote_uname + single: OCF_RESKEY_CRM_meta_notify_demote_uname + + OCF_RESKEY_CRM_meta_notify_demote_uname + - Nodes on which resources will be demoted + * - .. _OCF_RESKEY_CRM_meta_notify_promoted_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_promoted_uname + single: OCF_RESKEY_CRM_meta_notify_promoted_uname + + OCF_RESKEY_CRM_meta_notify_promoted_uname + - Nodes on which resources are running in the promoted role + * - .. _OCF_RESKEY_CRM_meta_notify_unpromoted_uname: + + .. index:: + single: environment variable; OCF_RESKEY_CRM_meta_notify_unpromoted_uname + single: OCF_RESKEY_CRM_meta_notify_unpromoted_uname + + OCF_RESKEY_CRM_meta_notify_unpromoted_uname + - Nodes on which resources are running in the unpromoted role + +Interpretation of Promotable Notification Variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Pre-notification (demote):** + +* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` +* Promoted resources: ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` +* Unpromoted resources: ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` +* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +**Post-notification (demote) / Pre-notification (stop):** + +* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` +* Promoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Unpromoted resources: ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` +* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` + +**Post-notification (stop) / Pre-notification (start)** + +* Active resources: + * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Promoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Unpromoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Inactive resources: + * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +**Post-notification (start) / Pre-notification (promote)** + +* Active resources: + * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Promoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Unpromoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Inactive resources: + * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + +**Post-notification (promote)** + +* Active resources: + * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Promoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Unpromoted resources: + * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Inactive resources: + * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` + * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` +* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` +* Resources that were promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` +* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` +* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` + .. index:: single: resource agent; LSB diff --git a/doc/sphinx/Pacemaker_Administration/configuring.rst b/doc/sphinx/Pacemaker_Administration/configuring.rst index 295c96a..e4d70c4 100644 --- a/doc/sphinx/Pacemaker_Administration/configuring.rst +++ b/doc/sphinx/Pacemaker_Administration/configuring.rst @@ -186,53 +186,14 @@ Connecting from a Remote Machine Provided Pacemaker is installed on a machine, it is possible to connect to the cluster even if the machine itself is not in the same cluster. To do this, one -simply sets up a number of environment variables and runs the same commands as -when working on a cluster node. - -.. list-table:: **Environment Variables Used to Connect to Remote Instances of the CIB** - :class: longtable - :widths: 2 2 5 - :header-rows: 1 - - * - Environment Variable - - Default - - Description - * - .. index:: - single: CIB_user - single: environment variable; CIB_user - - CIB_user - - |CRM_DAEMON_USER_RAW| - - The user to connect as. Needs to be part of the |CRM_DAEMON_GROUP| group - on the target host. - * - .. index:: - single: CIB_passwd - single: environment variable; CIB_passwd - - CIB_passwd - - - - The user's password. Read from the command line if unset. - * - .. index:: - single: CIB_server - single: environment variable; CIB_server - - CIB_server - - localhost - - The host to contact - * - .. index:: - single: CIB_port - single: environment variable; CIB_port - - CIB_port - - - - The port on which to contact the server; required - * - .. index:: - single: CIB_encrypted - single: environment variable; CIB_encrypted - - CIB_encrypted - - true - - Whether to encrypt network traffic +simply sets the following environment variables and runs the same commands as +when working on a cluster node: + +* :ref:`CIB_port <CIB_port>` (required) +* :ref:`CIB_server <CIB_server>` +* :ref:`CIB_user <CIB_user>` +* :ref:`CIB_passwd <CIB_passwd>` +* :ref:`CIB_encrypted <CIB_encrypted>` So, if **c001n01** is an active cluster node and is listening on port 1234 for connections, and **someuser** is a member of the |CRM_DAEMON_GROUP| group, diff --git a/doc/sphinx/Pacemaker_Administration/index.rst b/doc/sphinx/Pacemaker_Administration/index.rst index af89380..c8fd722 100644 --- a/doc/sphinx/Pacemaker_Administration/index.rst +++ b/doc/sphinx/Pacemaker_Administration/index.rst @@ -20,6 +20,7 @@ Table of Contents intro installing cluster + options configuring tools administrative diff --git a/doc/sphinx/Pacemaker_Administration/installing.rst b/doc/sphinx/Pacemaker_Administration/installing.rst index 44a3f5f..feea962 100644 --- a/doc/sphinx/Pacemaker_Administration/installing.rst +++ b/doc/sphinx/Pacemaker_Administration/installing.rst @@ -4,6 +4,6 @@ Installing Cluster Software .. index:: installation Most major Linux distributions have pacemaker packages in their standard -package repositories, or the software can be built from source code. -See the `Install wiki page <https://wiki.clusterlabs.org/wiki/Install>`_ -for details. +package repositories, or the software can be built from source code. See +`How to Install <https://projects.clusterlabs.org/w/cluster_administration/how_to_install/>`_ +on the ClusterLabs wiki for details. diff --git a/doc/sphinx/Pacemaker_Administration/options.rst b/doc/sphinx/Pacemaker_Administration/options.rst new file mode 100644 index 0000000..731d17f --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/options.rst @@ -0,0 +1,178 @@ +.. index:: client options + +Client Options +-------------- + +Pacemaker uses several environment variables set on the client side. + +.. note:: Directory and file paths below may differ on your system depending on + your Pacemaker build settings. Check your Pacemaker configuration + file to find the correct paths. + +.. list-table:: **Client-side Environment Variables** + :class: longtable + :widths: 2 4 5 + :header-rows: 1 + + * - Environment Variable + - Default + - Description + * - .. _CIB_encrypted: + + .. index:: + single: CIB_encrypted + single: environment variable; CIB_encrypted + + CIB_encrypted + - true + - Whether to encrypt network traffic. Used with :ref:`CIB_port <CIB_port>` + for connecting to a remote CIB instance; ignored if + :ref:`CIB_port <CIB_port>` is not set. + * - .. _CIB_file: + + .. index:: + single: CIB_file + single: environment variable; CIB_file + + CIB_file + - + - If set, CIB connections are created against the named XML file. Clients + read an input CIB from, and write the result CIB to, the named file. + Ignored if :ref:`CIB_shadow <CIB_shadow>` is set. + * - .. _CIB_passwd: + + .. index:: + single: CIB_passwd + single: environment variable; CIB_passwd + + CIB_passwd + - + - :ref:`$CIB_user <CIB_user>`'s password. Read from the command line if + unset. Used with :ref:`CIB_port <CIB_port>` for connecting to a remote + CIB instance; ignored if :ref:`CIB_port <CIB_port>` is not set. + * - .. _CIB_port: + + .. index:: + single: CIB_port + single: environment variable; CIB_port + + CIB_port + - + - If set, CIB connections are created as clients to a remote CIB instance + on :ref:`$CIB_server <CIB_server>` via this port. Ignored if + :ref:`CIB_shadow <CIB_shadow>` or :ref:`CIB_file <CIB_file>` is set. + * - .. _CIB_server: + + .. index:: + single: CIB_server + single: environment variable; CIB_server + + CIB_server + - localhost + - The host to connect to. Used with :ref:`CIB_port <CIB_port>` for + connecting to a remote CIB instance; ignored if + :ref:`CIB_port <CIB_port>` is not set. + * - .. _CIB_shadow: + + .. index:: + single: CIB_shadow + single: environment variable; CIB_shadow + + CIB_shadow + - + - If set, CIB connections are created against a temporary working + ("shadow") CIB file called ``shadow.$CIB_shadow`` in + :ref:`$CIB_shadow_dir <CIB_shadow_dir>`. Should be set only to the name + of a shadow CIB created by :ref:`crm_shadow <crm_shadow>`. Otherwise, + behavior is undefined. + * - .. _CIB_shadow_dir: + + .. index:: + single: CIB_shadow_dir + single: environment variable; CIB_shadow_dir + + CIB_shadow_dir + - |CRM_CONFIG_DIR| if the current user is ``root`` or |CRM_DAEMON_USER|; + otherwise ``$HOME/.cib`` if :ref:`$HOME <HOME>` is set; otherwise + ``$TMPDIR/.cib`` if :ref:`$TMPDIR <TMPDIR>` is set to an absolute path; + otherwise ``/tmp/.cib`` + - If set, shadow files are created in this directory. Ignored if + :ref:`CIB_shadow <CIB_shadow>` is not set. + * - .. _CIB_user: + + .. index:: + single: CIB_user + single: environment variable; CIB_user + + CIB_user + - |CRM_DAEMON_USER| if used with :ref:`CIB_port <CIB_port>`, or the current + effective user otherwise + - If used with :ref:`CIB_port <CIB_port>`, connect to + :ref:`$CIB_server <CIB_server>` as this user. Must be part of the + |CRM_DAEMON_GROUP| group on :ref:`$CIB_server <CIB_server>`. Otherwise + (without :ref:`CIB_port <CIB_port>`), this is used only for ACL and + display purposes. + * - .. _EDITOR: + + .. index:: + single: EDITOR + single: environment variable; EDITOR + + EDITOR + - + - Text editor to use for editing shadow files. Required for the ``--edit`` + command of :ref:`crm_shadow <crm_shadow>`. + * - .. _HOME: + + .. index:: + single: HOME + single: environment variable; HOME + + HOME + - Current user's home directory as configured in the passwd database, if an + entry exists + - Used to create a default :ref:`CIB_shadow_dir <CIB_shadow_dir>` for non- + privileged users. + * - .. _PE_fail: + + .. index:: + single: PE_fail + single: environment variable; PE_fail + + PE_fail + - 0 + - Advanced use only: A dummy graph action with action ID matching this + option will be marked as failed. Primarily for developer use with + scheduler simulations. + * - .. _PS1: + + .. index:: + single: PS1 + single: environment variable; PS1 + + PS1 + - + - The shell's primary prompt string. Used by + :ref:`crm_shadow <crm_shadow>`: set to indicate that the user is in an + interactive shadow CIB session, and checked to determine whether the user + is already in an interactive session before creating a new one. + * - .. _SHELL: + + .. index:: + single: SHELL + single: environment variable; SHELL + + SHELL + - + - Absolute path to a shell. Used by :ref:`crm_shadow <crm_shadow>` when + launching an interactive session. + * - .. _TMPDIR: + + .. index:: + single: TMPDIR + single: environment variable; TMPDIR + + TMPDIR + - /tmp + - Directory for temporary files. If not an absolute path, the default is + used instead. diff --git a/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst b/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst index 3eda60a..06fb24f 100644 --- a/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst +++ b/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst @@ -4,7 +4,7 @@ Quick Comparison of pcs and crm shell ``pcs`` and ``crm shell`` are two popular higher-level command-line interfaces to Pacemaker. Each has its own syntax; this chapter gives a quick comparion of how to accomplish the same tasks using either one. Some examples also show the -equivalent command using low-level Pacmaker command-line tools. +equivalent command using low-level Pacemaker command-line tools. These examples show the simplest syntax; see the respective man pages for all possible options. @@ -118,6 +118,7 @@ Manage Resources .. topic:: Create a Resource .. code-block:: none + crmsh # crm configure primitive ClusterIP IPaddr2 params ip=192.168.122.120 cidr_netmask=24 pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.122.120 cidr_netmask=24 diff --git a/doc/sphinx/Pacemaker_Administration/upgrading.rst b/doc/sphinx/Pacemaker_Administration/upgrading.rst index 1ca2a4e..bccfc22 100644 --- a/doc/sphinx/Pacemaker_Administration/upgrading.rst +++ b/doc/sphinx/Pacemaker_Administration/upgrading.rst @@ -159,11 +159,12 @@ Special considerations when planning a rolling upgrade: * If the Pacemaker Remote protocol version is changing, all cluster nodes should be upgraded before upgrading any Pacemaker Remote nodes. -See the ClusterLabs wiki's -`release calendar <https://wiki.clusterlabs.org/wiki/ReleaseCalendar>`_ -to figure out whether the CRM feature set and/or Pacemaker Remote protocol -version changed between the the Pacemaker release versions in your rolling -upgrade. +See the +`Pacemaker release calendar +<https://projects.clusterlabs.org/w/projects/pacemaker/pacemaker_release_calendar/>`_ +on the ClusterLabs wiki to figure out whether the CRM feature set and/or +Pacemaker Remote protocol version changed between the the Pacemaker release +versions in your rolling upgrade. To perform a rolling upgrade, on each node in turn: @@ -302,9 +303,8 @@ A more cautious approach would proceed like this: #. The transformation was successful but produced an invalid result. If the result of the transformation is invalid, you may see a number of - errors from the validation library. If these are not helpful, visit the - `Validation FAQ wiki page <https://wiki.clusterlabs.org/wiki/Validation_FAQ>`_ - and/or try the manual upgrade procedure described below. + errors from the validation library. If these are not helpful, try the manual + upgrade procedure described below. #. Check the changes: @@ -398,9 +398,10 @@ the C API. Highlights: higher-level tools are strongly recommended to use instead of trying to parse the text output, which may change from release to release). -For a detailed list of changes, see the release notes and the -`Pacemaker 2.1 Changes <https://wiki.clusterlabs.org/wiki/Pacemaker_2.1_Changes>`_ -page on the ClusterLabs wiki. +For a detailed list of changes, see the release notes and +`Pacemaker 2.1 Changes +<https://projects.clusterlabs.org/w/projects/pacemaker/pacemaker_2.1_changes/>`_ +on the ClusterLabs wiki. What Changed in 2.0 @@ -431,9 +432,10 @@ behavior. Highlights: * The public API for Pacemaker libraries that software applications can use has changed significantly. -For a detailed list of changes, see the release notes and the -`Pacemaker 2.0 Changes <https://wiki.clusterlabs.org/wiki/Pacemaker_2.0_Changes>`_ -page on the ClusterLabs wiki. +For a detailed list of changes, see the release notes and +`Pacemaker 2.0 Changes +<https://projects.clusterlabs.org/w/projects/pacemaker/pacemaker_2.0_changes/>`_ +on the ClusterLabs wiki. What Changed in 1.0 diff --git a/doc/sphinx/Pacemaker_Development/c.rst b/doc/sphinx/Pacemaker_Development/c.rst index b03ddae..8bc5e80 100644 --- a/doc/sphinx/Pacemaker_Development/c.rst +++ b/doc/sphinx/Pacemaker_Development/c.rst @@ -752,12 +752,35 @@ Function names should be unique across the entire project, to allow for individual tracing via ``PCMK_trace_functions``, and make it easier to search code and follow detail logs. -A common function signature is a comparison function that returns 0 if its -arguments are equal for sorting purposes, -1 if the first argument should sort -first, and 1 is the second argument should sort first. Such a function should -have ``cmp`` in its name, to parallel ``strcmp()``; ``sort`` should only be -used in the names of functions that sort an entire list (typically using a -``cmp`` function). +.. _sort_func: + +Sorting +^^^^^^^ + +A function that sorts an entire list should have ``sort`` in its name. It sorts +elements using a :ref:`comparison <compare_func>` function, which may be either +hard-coded or passed as an argument. + +.. _compare_func: + +Comparison +^^^^^^^^^^ + +A comparison function for :ref:`sorting <sort_func>` should have ``cmp`` in its +name and should *not* have ``sort`` in its name. + +.. _constructor_func: + +Constructors +^^^^^^^^^^^^ + +A constructor creates a new dynamically allocated object. It may perform some +initialization procedure on the new object. + +* If the constructor always creates an independent object instance, its name + should include ``new``. +* If the constructor may add the new object to some existing object, its name + should include ``create``. Function Definitions @@ -832,6 +855,12 @@ messages and converting from one to another, can be found in Of course, functions may have return values that aren't success/failure indicators, such as a pointer, integer count, or bool. +:ref:`Comparison <compare_func>` functions should return + +* a negative integer if the first argument should sort first +* 0 if its arguments are equal for sorting purposes +* a positive integer is the second argument should sort first + Public API Functions ____________________ @@ -880,6 +909,30 @@ __________________________________ * The convenience macros ``pcmk__plural_s()`` and ``pcmk__plural_alt()`` are handy when logging a word that may be singular or plural. +Log Levels +__________ + +When to use each log level: + +* **critical:** fatal error (usually something that would make a daemon exit) +* **error:** failure of something that affects the cluster (such as a resource + action, fencing action, etc.) or daemon operation +* **warning:** minor, potential, or recoverable failures (such as something + only affecting a daemon client, or invalid configuration that can be left to + default) +* **notice:** important successful events (such as a node joining or leaving, + resource action results, or configuration changes) +* **info:** events that would be helpful with troubleshooting (such as status + section updates or elections) +* **debug:** information that would be helpful for debugging code or complex + problems +* **trace:** like debug but for very noisy or low-level stuff + +By default, critical through notice are logged to the system log and detail +log, info is logged to the detail log only, and debug and trace are not logged +(if enabled, they go to the detail log only). + + Logging _______ @@ -912,6 +965,34 @@ using libqb's "extended logging" feature: pcmk_rc_str(rc), rc, id); +Assertion Logging +_________________ + +``CRM_ASSERT(expr)`` + If ``expr`` is false, this will call <code>crm_err()</code> with a "Triggered + fatal assert" message (with details), then abort execution. This should be + used for logic errors that should be impossible (such as a NULL function + argument where not accepted) and environmental errors that can't be handled + gracefully (for example, memory allocation failures, though returning + ``ENOMEM`` is often better). + +``CRM_LOG_ASSERT(expr)`` + If ``expr`` is false, this will generally log a message without aborting. If + the log level is below trace, it just calls ``crm_err()`` with a "Triggered + assert" message (with details). If the log level is trace, and the caller is + a daemon, then it will fork a child process in which to dump core, as well as + logging the message. If the log level is trace, and the caller is not a + daemon, then it will behave like ``CRM_ASSERT()`` (i.e. log and abort). This + should be used for logic or protocol errors that require no special handling. + +``CRM_CHECK(expr, failed_action)`` + If ``expr`` is false, behave like ``CRM_LOG_ASSERT(expr)`` (that is, log a + message and dump core if requested) then perform ``failed_action`` (which + must not contain ``continue``, ``break``, or ``errno``). This should be used + for logic or protocol errors that can be handled, usually by returning an + error status. + + Output ______ @@ -924,12 +1005,40 @@ A custom message can be defined with a unique string identifier, plus implementation functions for each supported format. The caller invokes the message using the identifier. The user selects the output format via ``--output-as``, and the output code automatically calls the appropriate -implementation function. +implementation function. Custom messages are useful when you want to output +messages that are more complex than a one-line error or informational message, +reproducible, and automatically handled by the output formatting system. +Custom messages can contain other custom messages. + +Custom message functions are implemented as follows: Start with the macro +``PCMK__OUTPUT_ARGS``, whose arguments are the message name, followed by the +arguments to the message. Then there is the function declaration, for which the +arguments are the pointer to the current output object, then a variable argument +list. + +To output a custom message, you first need to create, i.e. register, the custom +message that you want to output. Either call ``register_message``, which +registers a custom message at runtime, or make use of the collection of +predefined custom messages in ``fmt_functions``, which is defined in +``lib/pacemaker/pcmk_output.c``. Once you have the message to be outputted, +output it by calling ``message``. + +Note: The ``fmt_functions`` functions accommodate all of the output formats; +the default implementation accommodates any format that isn't explicitly +accommodated. The default output provides valid output for any output format, +but you may still want to implement a specific output, i.e. xml, text, or html. +The ``message`` function automatically knows which implementation to use, +because the ``pcmk__output_s`` contains this information. The interface (most importantly ``pcmk__output_t``) is declared in ``include/crm/common/output*h``. See the API comments and existing tools for -examples. +examples. +Some of its important member functions are ``err``, which formats error messages +and ``info``, which formats informational messages. Also, ``list_item``, +which formats list items, ``begin_list``, which starts lists, and ``end_list``, +which ends lists, are important because lists can be useful, yet differently +handled by the different output types. .. index:: single: Makefile.am diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst index 5086fa8..ce6b36b 100644 --- a/doc/sphinx/Pacemaker_Development/components.rst +++ b/doc/sphinx/Pacemaker_Development/components.rst @@ -27,10 +27,10 @@ As might be expected, it has the most code of any of the daemons. Join sequence _____________ -Most daemons track their cluster peers using Corosync's membership and CPG -only. The controller additionally requires peers to `join`, which ensures they -are ready to be assigned tasks. Joining proceeds through a series of phases -referred to as the `join sequence` or `join process`. +Most daemons track their cluster peers using Corosync's membership and +:term:`CPG` only. The controller additionally requires peers to `join`, which +ensures they are ready to be assigned tasks. Joining proceeds through a series +of phases referred to as the `join sequence` or `join process`. A node's current join phase is tracked by the ``join`` member of ``crm_node_t`` (used in the peer cache). It is an ``enum crm_join_phase`` that (ideally) @@ -141,7 +141,7 @@ _______________ The function calls for a fencing request go something like this: -The local fencer receives the client's request via an IPC or messaging +The local fencer receives the client's request via an :term:`IPC` or messaging layer callback, which calls * ``stonith_command()``, which (for requests) calls @@ -199,8 +199,8 @@ __________________ Each ``STONITH_OP_FENCE`` request goes something like this: -The chosen peer fencer receives the ``STONITH_OP_FENCE`` request via IPC or -messaging layer callback, which calls: +The chosen peer fencer receives the ``STONITH_OP_FENCE`` request via +:term:`IPC` or messaging layer callback, which calls: * ``stonith_command()``, which (for requests) calls @@ -240,7 +240,7 @@ returns, and calls Fencing replies _______________ -The original fencer receives the ``STONITH_OP_FENCE`` reply via IPC or +The original fencer receives the ``STONITH_OP_FENCE`` reply via :term:`IPC` or messaging layer callback, which calls: * ``stonith_command()``, which (for replies) calls @@ -295,10 +295,10 @@ The purpose of the scheduler is to take a CIB as input and generate a transition graph (list of actions that need to be taken) as output. The controller invokes the scheduler by contacting the scheduler daemon via -local IPC. Tools such as ``crm_simulate``, ``crm_mon``, and ``crm_resource`` -can also invoke the scheduler, but do so by calling the library functions -directly. This allows them to run using a ``CIB_file`` without the cluster -needing to be active. +local :term:`IPC`. Tools such as ``crm_simulate``, ``crm_mon``, and +``crm_resource`` can also invoke the scheduler, but do so by calling the +library functions directly. This allows them to run using a ``CIB_file`` +without the cluster needing to be active. The main entry point for the scheduler code is ``lib/pacemaker/pcmk_scheduler.c:pcmk__schedule_actions()``. It sets @@ -315,7 +315,7 @@ defaults and calls a series of functions for the scheduling. Some key steps: the CIB status section. This is used to decide whether certain actions need to be done, such as deleting orphan resources, forcing a restart when a resource definition changes, etc. -* ``assign_resources()`` assigns resources to nodes. +* ``assign_resources()`` :term:`assigns <assign>` resources to nodes. * ``schedule_resource_actions()`` schedules resource-specific actions (which might or might not end up in the final graph). * ``pcmk__apply_orderings()`` processes ordering constraints in order to modify @@ -364,7 +364,7 @@ Resources _________ ``pcmk_resource_t`` is the data object representing cluster resources. A -resource has a variant: primitive (a.k.a. native), group, clone, or bundle. +resource has a variant: :term:`primitive`, group, clone, or :term:`bundle`. The resource object has members for two sets of methods, ``resource_object_functions_t`` from the ``libpe_status`` public API, and @@ -374,9 +374,9 @@ The resource object has members for two sets of methods, The object functions have basic capabilities such as unpacking the resource XML, and determining the current or planned location of the resource. -The assignment functions have more obscure capabilities needed for scheduling, -such as processing location and ordering constraints. For example, -``pcmk__create_internal_constraints()`` simply calls the +The :term:`assignment <assign>` functions have more obscure capabilities needed +for scheduling, such as processing location and ordering constraints. For +example, ``pcmk__create_internal_constraints()`` simply calls the ``internal_constraints()`` method for each top-level resource in the cluster. .. index:: @@ -385,9 +385,10 @@ such as processing location and ordering constraints. For example, Nodes _____ -Assignment of resources to nodes is done by choosing the node with the highest -score for a given resource. The scheduler does a bunch of processing to -generate the scores, then the actual assignment is straightforward. +:term:`Assignment <assign>` of resources to nodes is done by choosing the node +with the highest :term:`score` for a given resource. The scheduler does a bunch +of processing to generate the scores, then the actual assignment is +straightforward. Node lists are frequently used. For example, ``pcmk_scheduler_t`` has a ``nodes`` member which is a list of all nodes in the cluster, and @@ -435,8 +436,8 @@ ___________ Colocation constraints come into play in these parts of the scheduler code: -* When sorting resources for assignment, so resources with highest node score - are assigned first (see ``cmp_resources()``) +* When sorting resources for :term:`assignment <assign>`, so resources with + highest node :term:`score` are assigned first (see ``cmp_resources()``) * When updating node scores for resource assigment or promotion priority * When assigning resources, so any resources to be colocated with can be assigned first, and so colocations affect where the resource is assigned @@ -449,7 +450,8 @@ The resource assignment functions have several methods related to colocations: dependent's allowed node scores (if called while resources are being assigned) or the dependent's priority (if called while choosing promotable instance roles). It can behave differently depending on whether it is being - called as the primary's method or as the dependent's method. + called as the :term:`primary's <primary>` method or as the :term:`dependent's + <dependent>` method. * ``add_colocated_node_scores():`` This updates a table of nodes for a given colocation attribute and score. It goes through colocations involving a given resource, and updates the scores of the nodes in the table with the best diff --git a/doc/sphinx/Pacemaker_Development/documentation.rst b/doc/sphinx/Pacemaker_Development/documentation.rst new file mode 100644 index 0000000..6880bb0 --- /dev/null +++ b/doc/sphinx/Pacemaker_Development/documentation.rst @@ -0,0 +1,35 @@ +.. index:: + pair: documentation; guidelines + +Documentation Guidelines +------------------------ + +See `doc/README.md +<https://github.com/ClusterLabs/pacemaker/blob/main/doc/README.md>`_ in the +source code repository for the kinds of documentation that Pacemaker provides. + +Books +##### + +The ``doc/sphinx`` subdirectory has a subdirectory for each book by title. Each +book's directory contains .rst files, which are the chapter sources in +`reStructuredText +<https://www.sphinx-doc.org/en/master/usage/restructuredtext/>`_ format (with +index.rst as the starting point). + +Once you have edited the sources as desired, run ``make`` in the ``doc`` or +``doc/sphinx`` directory to generate all the books locally. You can view the +results by pointing your web browser to (replacing PATH\_TO\_CHECKOUT and +BOOK\_TITLE appropriately): + + file:///PATH_TO_CHECKOUT/doc/sphinx/BOOK_TITLE/_build/html/index.html + +See the comments at the top of ``doc/sphinx/Makefile.am`` for options you can +use. + +Recommended practices: + +* Use ``list-table`` instead of ``table`` for tables +* When documenting newly added features and syntax, add "\*(since X.Y.Z)\*" + with the version introducing them. These comments can be removed when rolling + upgrades from that version are no longer supported. diff --git a/doc/sphinx/Pacemaker_Development/faq.rst b/doc/sphinx/Pacemaker_Development/faq.rst index e738b7d..b1b1e5a 100644 --- a/doc/sphinx/Pacemaker_Development/faq.rst +++ b/doc/sphinx/Pacemaker_Development/faq.rst @@ -32,21 +32,20 @@ Frequently Asked Questions :Q: What are the different Git branches and repositories used for? :A: * The `main branch <https://github.com/ClusterLabs/pacemaker/tree/main>`_ - is the primary branch used for development. - * The `2.1 branch <https://github.com/ClusterLabs/pacemaker/tree/2.1>`_ is - the current release branch. Normally, it does not receive any changes, but - during the release cycle for a new release, it will contain release - candidates. During the release cycle, certain bug fixes will go to the - 2.1 branch first (and be pulled into main later). + is used for all new development. + * The `3.0 <https://github.com/ClusterLabs/pacemaker/tree/3.0>`_ and + `2.1 <https://github.com/ClusterLabs/pacemaker/tree/2.1>`_ branches are + for the currently supported major and minor version release series. + Normally, they do not receive any changes, but during the release cycle + for a new release, they will contain release candidates. The main branch + is pulled into 3.0 just before the first release candidate of a new + release, but otherwise, separate pull requests must be submitted to + backport changes from the main branch into a release branch. * The `2.0 branch <https://github.com/ClusterLabs/pacemaker/tree/2.0>`_, `1.1 branch <https://github.com/ClusterLabs/pacemaker/tree/1.1>`_, and separate `1.0 repository <https://github.com/ClusterLabs/pacemaker-1.0>`_ are frozen snapshots of earlier release series, no longer being developed. - * Messages will be posted to the - `developers@ClusterLabs.org <https://lists.ClusterLabs.org/mailman/listinfo/developers>`_ - mailing list during the release cycle, with instructions about which - branches to use when submitting requests. ---- @@ -163,9 +162,5 @@ Frequently Asked Questions :Q: What if I still have questions? :A: Ask on the - `developers@ClusterLabs.org <https://lists.ClusterLabs.org/mailman/listinfo/developers>`_ - mailing list for development-related questions, or on the - `users@ClusterLabs.org <https://lists.ClusterLabs.org/mailman/listinfo/users>`_ - mailing list for general questions about using Pacemaker. - Developers often also hang out on the - [ClusterLabs IRC channel](https://wiki.clusterlabs.org/wiki/ClusterLabs_IRC_channel). + `ClusterLabs mailing lists + <https://projects.clusterlabs.org/w/clusterlabs/clusterlabs_mailing_lists/>`_. diff --git a/doc/sphinx/Pacemaker_Development/general.rst b/doc/sphinx/Pacemaker_Development/general.rst index 9d9dcec..94015c9 100644 --- a/doc/sphinx/Pacemaker_Development/general.rst +++ b/doc/sphinx/Pacemaker_Development/general.rst @@ -38,3 +38,13 @@ may put more specific copyright notices in their commit messages if desired. `"Updating Copyright Notices" <https://techwhirl.com/updating-copyright-notices/>`_ for a more readable summary. + +Terminology +########### + +Pacemaker is extremely complex, and it helps to use terminology consistently +throughout documentation, symbol names and comments in code, and so forth. It +also helps to use natural language when practical instead of technical jargon +and acronyms. + +For specific recommendations, see the :ref:`glossary`. diff --git a/doc/sphinx/Pacemaker_Development/glossary.rst b/doc/sphinx/Pacemaker_Development/glossary.rst new file mode 100644 index 0000000..6f73e96 --- /dev/null +++ b/doc/sphinx/Pacemaker_Development/glossary.rst @@ -0,0 +1,84 @@ +.. index:: + single: glossary + +.. _glossary: + +Glossary +-------- + +.. glossary:: + + assign + In the scheduler, this refers to associating a resource with a node. Do + not use *allocate* for this purpose. + + bundle + The collective resource type associating instances of a container with + storage and networking. Do not use :term:`container` when referring to + the bundle as a whole. + + cluster layer + The layer of the :term:`cluster stack` that provides membership and + messaging capabilities (such as Corosync). + + cluster stack + The core components of a high-availability cluster: the + :term:`cluster layer` at the "bottom" of the stack, then Pacemaker, then + resource agents, and then the actual services managed by the cluster at + the "top" of the stack. Do not use *stack* for the cluster layer alone. + + CPG + Corosync Process Group. This is the messaging layer in a Corosync-based + cluster. Pacemaker daemons use CPG to communicate with their counterparts + on other nodes. + + container + This can mean either a container in the usual sense (whether as a + standalone resource or as part of a bundle), or as the container resource + meta-attribute (which does not necessarily reference a container in the + usual sense). + + dangling migration + Live migration of a resource consists of a **migrate_to** action on the + source node, followed by a **migrate_from** on the target node, followed + by a **stop** on the source node. If the **migrate_to** and + **migrate_from** have completed successfully, but the **stop** has not + yet been done, the migration is considered to be *dangling*. + + dependent + In colocation constraints, this refers to the resource located relative + to the :term:`primary` resource. Do not use *rh* or *right-hand* for this + purpose. + + IPC + Inter-process communication. In Pacemaker, clients send requests to + daemons using libqb IPC. + + message + This can refer to log messages, custom messages defined for a + **pcmk_output_t** object, or XML messages sent via :term:`CPG` or + :term:`IPC`. + + metadata + In the context of options and resource agents, this refers to OCF-style + metadata. Do not use a hyphen except when referring to the OCF-defined + action name *meta-data*. + + primary + In colocation constraints, this refers to the resource that the + :term:`dependent` resource is located relative to. Do not use *lh* or + *left-hand* for this purpose. + + primitive + The fundamental resource type in Pacemaker. Do not use *native* for this + purpose. + + score + An integer value constrained between **-PCMK_SCORE_INFINITY** and + **+PCMK_SCORE_INFINITY**. Certain strings (such as + **PCMK_VALUE_INFINITY**) parse as particular score values. Do not use + *weight* for this purpose. + + self-fencing + When a node is chosen to execute its own fencing. Do not use *suicide* + for this purpose. diff --git a/doc/sphinx/Pacemaker_Development/index.rst b/doc/sphinx/Pacemaker_Development/index.rst index cbe1499..a3f624f 100644 --- a/doc/sphinx/Pacemaker_Development/index.rst +++ b/doc/sphinx/Pacemaker_Development/index.rst @@ -20,11 +20,13 @@ Table of Contents faq general + documentation python c components helpers evolution + glossary Index ----- diff --git a/doc/sphinx/Pacemaker_Explained/alerts.rst b/doc/sphinx/Pacemaker_Explained/alerts.rst index 1d02187..f4cad72 100644 --- a/doc/sphinx/Pacemaker_Explained/alerts.rst +++ b/doc/sphinx/Pacemaker_Explained/alerts.rst @@ -1,3 +1,5 @@ +.. _alerts: + .. index:: single: alert single: resource; alert @@ -209,7 +211,28 @@ By default, an alert agent will be called for node events, fencing events, and resource events. An agent may choose to ignore certain types of events, but there is still the overhead of calling it for those events. To eliminate that overhead, you may select which types of events the agent should receive. - + +Alert filters are configured within a ``select`` element inside an ``alert`` +element. + +.. list-table:: **Possible alert filters** + :class: longtable + :widths: 1 3 + :header-rows: 1 + + * - Name + - Events alerted + * - select_nodes + - A node joins or leaves the cluster (whether at the cluster layer for + cluster nodes, or via a remote connection for Pacemaker Remote nodes). + * - select_fencing + - Fencing or unfencing of a node completes (whether successfully or not). + * - select_resources + - A resource action other than meta-data completes (whether successfully + or not). + * - select_attributes + - A transient attribute value update is sent to the CIB. + .. topic:: Alert configuration to receive only node events and fencing events .. code-block:: xml @@ -227,9 +250,6 @@ overhead, you may select which types of events the agent should receive. </alerts> </configuration> -The possible options within ``<select>`` are ``<select_nodes>``, -``<select_fencing>``, ``<select_resources>``, and ``<select_attributes>``. - With ``<select_attributes>`` (the only event type not enabled by default), the agent will receive alerts when a node attribute changes. If you wish the agent to be called only when certain attributes change, you can configure that as well. diff --git a/doc/sphinx/Pacemaker_Explained/cluster-options.rst b/doc/sphinx/Pacemaker_Explained/cluster-options.rst index 77bd7e6..042ed0b 100644 --- a/doc/sphinx/Pacemaker_Explained/cluster-options.rst +++ b/doc/sphinx/Pacemaker_Explained/cluster-options.rst @@ -62,143 +62,6 @@ Normally, you will use command-line tools that abstract the XML, so the distinction will be unimportant; both properties and options are cluster settings you can tweak. -Configuration Value Types -######################### - -Throughout this document, configuration values will be designated as having one -of the following types: - -.. list-table:: **Configuration Value Types** - :class: longtable - :widths: 1 3 - :header-rows: 1 - - * - Type - - Description - * - .. _boolean: - - .. index:: - pair: type; boolean - - boolean - - Case-insensitive text value where ``1``, ``yes``, ``y``, ``on``, - and ``true`` evaluate as true and ``0``, ``no``, ``n``, ``off``, - ``false``, and unset evaluate as false - * - .. _date_time: - - .. index:: - pair: type; date/time - - date/time - - Textual timestamp like ``Sat Dec 21 11:47:45 2013`` - * - .. _duration: - - .. index:: - pair: type; duration - - duration - - A time duration, specified either like a :ref:`timeout <timeout>` or an - `ISO 8601 duration <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_. - A duration may be up to approximately 49 days but is intended for much - smaller time periods. - * - .. _enumeration: - - .. index:: - pair: type; enumeration - - enumeration - - Text that must be one of a set of defined values (which will be listed - in the description) - * - .. _integer: - - .. index:: - pair: type; integer - - integer - - 32-bit signed integer value (-2,147,483,648 to 2,147,483,647) - * - .. _nonnegative_integer: - - .. index:: - pair: type; nonnegative integer - - nonnegative integer - - 32-bit nonnegative integer value (0 to 2,147,483,647) - * - .. _port: - - .. index:: - pair: type; port - - port - - Integer TCP port number (0 to 65535) - * - .. _score: - - .. index:: - pair: type; score - - score - - A Pacemaker score can be an integer between -1,000,000 and 1,000,000, or - a string alias: ``INFINITY`` or ``+INFINITY`` is equivalent to - 1,000,000, ``-INFINITY`` is equivalent to -1,000,000, and ``red``, - ``yellow``, and ``green`` are equivalent to integers as described in - :ref:`node-health`. - * - .. _text: - - .. index:: - pair: type; text - - text - - A text string - * - .. _timeout: - - .. index:: - pair: type; timeout - - timeout - - A time duration, specified as a bare number (in which case it is - considered to be in seconds) or a number with a unit (``ms`` or ``msec`` - for milliseconds, ``us`` or ``usec`` for microseconds, ``s`` or ``sec`` - for seconds, ``m`` or ``min`` for minutes, ``h`` or ``hr`` for hours) - optionally with whitespace before and/or after the number. - * - .. _version: - - .. index:: - pair: type; version - - version - - Version number (any combination of alphanumeric characters, dots, and - dashes, starting with a number). - - -Scores -______ - -Scores are integral to how Pacemaker works. Practically everything from moving -a resource to deciding which resource to stop in a degraded cluster is achieved -by manipulating scores in some way. - -Scores are calculated per resource and node. Any node with a negative score for -a resource can't run that resource. The cluster places a resource on the node -with the highest score for it. - -Score addition and subtraction follow these rules: - -* Any value (including ``INFINITY``) - ``INFINITY`` = ``-INFINITY`` -* ``INFINITY`` + any value other than ``-INFINITY`` = ``INFINITY`` - -.. note:: - - What if you want to use a score higher than 1,000,000? Typically this possibility - arises when someone wants to base the score on some external metric that might - go above 1,000,000. - - The short answer is you can't. - - The long answer is it is sometimes possible work around this limitation - creatively. You may be able to set the score to some computed value based on - the external metric rather than use the metric directly. For nodes, you can - store the metric as a node attribute, and query the attribute when computing - the score (possibly as part of a custom resource agent). - CIB Properties ############## @@ -321,6 +184,15 @@ holds. So the decision was made to place them in an easy-to-find location. - - Node ID of the cluster's current designated controller (DC). Used and maintained by the cluster. + * - .. _execution_date: + + .. index:: + pair: execution-date; cib + + execution-date + - :ref:`epoch time <epoch_time>` + - + - Time to use when evaluating rules. .. _cluster_options: @@ -427,6 +299,29 @@ values, by running the ``man pacemaker-schedulerd`` and - The number of :ref:`live migration <live-migration>` actions that the cluster is allowed to execute in parallel on a node. A value of -1 means unlimited. + * - .. _load_threshold: + + .. index:: + pair: cluster option; load-threshold + + load-threshold + - :ref:`percentage <percentage>` + - 80% + - Maximum amount of system load that should be used by cluster nodes. The + cluster will slow down its recovery process when the amount of system + resources used (currently CPU) approaches this limit. + * - .. _node_action_limit: + + .. index:: + pair: cluster option; node-action-limit + + node-action-limit + - :ref:`integer <integer>` + - 0 + - Maximum number of jobs that can be scheduled per node. If nonpositive or + invalid, double the number of cores is used as the maximum number of jobs + per node. :ref:`PCMK_node_action_limit <pcmk_node_action_limit>` + overrides this option on a per-node basis. * - .. _symmetric_cluster: .. index:: @@ -558,6 +453,22 @@ values, by running the ``man pacemaker-schedulerd`` and - How many times fencing can fail for a target before the cluster will no longer immediately re-attempt it. Any value below 1 will be ignored, and the default will be used instead. + * - .. _have_watchdog: + + .. index:: + pair: cluster option; have-watchdog + + have-watchdog + - :ref:`boolean <boolean>` + - *detected* + - Whether watchdog integration is enabled. This is set automatically by the + cluster according to whether SBD is detected to be in use. + User-configured values are ignored. The value `true` is meaningful if + diskless SBD is used and + :ref:`stonith-watchdog-timeout <stonith_watchdog_timeout>` is nonzero. In + that case, if fencing is required, watchdog-based self-fencing will be + performed via SBD without requiring a fencing resource explicitly + configured. * - .. _stonith_watchdog_timeout: .. index:: @@ -568,23 +479,29 @@ values, by running the ``man pacemaker-schedulerd`` and - 0 - If nonzero, and the cluster detects ``have-watchdog`` as ``true``, then watchdog-based self-fencing will be performed via SBD when fencing is - required, without requiring a fencing resource explicitly configured. - - If this is set to a positive value, unseen nodes are assumed to - self-fence within this much time. + required. - **Warning:** It must be ensured that this value is larger than the - ``SBD_WATCHDOG_TIMEOUT`` environment variable on all nodes. Pacemaker - verifies the settings individually on all nodes and prevents startup or - shuts down if configured wrongly on the fly. It is strongly recommended - that ``SBD_WATCHDOG_TIMEOUT`` be set to the same value on all nodes. + If this is set to a positive value, lost nodes are assumed to achieve + self-fencing within this much time. + + This does not require a fencing resource to be explicitly configured, + though a fence_watchdog resource can be configured, to limit use to + specific nodes. + + If this is set to 0 (the default), the cluster will never assume + watchdog-based self-fencing. + + If this is set to a negative value, the cluster will use twice the local + value of the ``SBD_WATCHDOG_TIMEOUT`` environment variable if that is + positive, or otherwise treat this as 0. - If this is set to a negative value, and ``SBD_WATCHDOG_TIMEOUT`` is set, - twice that value will be used. + **Warning:** When used, this timeout must be larger than + ``SBD_WATCHDOG_TIMEOUT`` on all nodes that use watchdog-based SBD, and + Pacemaker will refuse to start on any of those nodes where this is not + true for the local value or SBD is not active. When this is set to a + negative value, ``SBD_WATCHDOG_TIMEOUT`` must be set to the same value + on all nodes that use SBD, otherwise data corruption or loss could occur. - **Warning:** In this case, it is essential (and currently not verified - by pacemaker) that ``SBD_WATCHDOG_TIMEOUT`` is set to the same value on - all nodes. * - .. _concurrent-fencing: .. index:: @@ -607,12 +524,13 @@ values, by running the ``man pacemaker-schedulerd`` and - :ref:`enumeration <enumeration>` - stop - How should a cluster node react if notified of its own fencing? A - cluster node may receive notification of its own fencing if fencing is - misconfigured, or if fabric fencing is in use that doesn't cut cluster - communication. Allowed values are ``stop`` to attempt to immediately - stop Pacemaker and stay stopped, or ``panic`` to attempt to immediately - reboot the local node, falling back to stop on failure. The default is - likely to be changed to ``panic`` in a future release. *(since 2.0.3)* + cluster node may receive notification of a "succeeded" fencing that + targeted it if fencing is misconfigured, or if fabric fencing is in use + that doesn't cut cluster communication. Allowed values are ``stop`` to + attempt to immediately stop Pacemaker and stay stopped, or ``panic`` to + attempt to immediately reboot the local node, falling back to stop on + failure. The default is likely to be changed to ``panic`` in a future + release. *(since 2.0.3)* * - .. _priority_fencing_delay: .. index:: @@ -784,7 +702,7 @@ values, by running the ``man pacemaker-schedulerd`` and node-health-red - :ref:`score <score>` - - 0 + - -INFINITY - The score to use for a node health attribute whose value is ``red``. Only used when ``node-health-strategy`` is ``progressive`` or ``custom``. @@ -797,10 +715,10 @@ values, by running the ``man pacemaker-schedulerd`` and - :ref:`duration <duration>` - 15min - Pacemaker is primarily event-driven, and looks ahead to know when to - recheck the cluster for failure timeouts and most time-based rules - *(since 2.0.3)*. However, it will also recheck the cluster after this - amount of inactivity. This has two goals: rules with ``date_spec`` are - only guaranteed to be checked this often, and it also serves as a + recheck the cluster for failure-timeout settings and most time-based + rules *(since 2.0.3)*. However, it will also recheck the cluster after + this amount of inactivity. This has two goals: rules with ``date_spec`` + are only guaranteed to be checked this often, and it also serves as a fail-safe for some kinds of scheduler bugs. A value of 0 disables this polling. * - .. _shutdown_lock: diff --git a/doc/sphinx/Pacemaker_Explained/collective.rst b/doc/sphinx/Pacemaker_Explained/collective.rst index a4fa9dc..dc6832c 100644 --- a/doc/sphinx/Pacemaker_Explained/collective.rst +++ b/doc/sphinx/Pacemaker_Explained/collective.rst @@ -569,418 +569,7 @@ instances around the cluster. apply to clone instances as well. This means an explicit ``resource-stickiness`` of 0 in ``rsc_defaults`` works differently from the implicit default used when ``resource-stickiness`` is not specified. - -Clone Resource Agent Requirements -_________________________________ - -Any resource can be used as an anonymous clone, as it requires no -additional support from the resource agent. Whether it makes sense to -do so depends on your resource and its resource agent. - -Resource Agent Requirements for Globally Unique Clones -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Globally unique clones require additional support in the resource agent. In -particular, it must only respond with ``${OCF_SUCCESS}`` if the node has that -exact instance active. All other probes for instances of the clone should -result in ``${OCF_NOT_RUNNING}`` (or one of the other OCF error codes if -they are failed). - -Individual instances of a clone are identified by appending a colon and a -numerical offset, e.g. **apache:2**. - -Resource agents can find out how many copies there are by examining -the ``OCF_RESKEY_CRM_meta_clone_max`` environment variable and which -instance it is by examining ``OCF_RESKEY_CRM_meta_clone``. - -The resource agent must not make any assumptions (based on -``OCF_RESKEY_CRM_meta_clone``) about which numerical instances are active. In -particular, the list of active copies will not always be an unbroken -sequence, nor always start at 0. - -Resource Agent Requirements for Promotable Clones -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Promotable clone resources require two extra actions, ``demote`` and ``promote``, -which are responsible for changing the state of the resource. Like **start** and -**stop**, they should return ``${OCF_SUCCESS}`` if they completed successfully or -a relevant error code if they did not. - -The states can mean whatever you wish, but when the resource is -started, it must come up in the unpromoted role. From there, the -cluster will decide which instances to promote. - -In addition to the clone requirements for monitor actions, agents must -also *accurately* report which state they are in. The cluster relies -on the agent to report its status (including role) accurately and does -not indicate to the agent what role it currently believes it to be in. - -.. table:: **Role implications of OCF return codes** - :widths: 1 3 - - +----------------------+--------------------------------------------------+ - | Monitor Return Code | Description | - +======================+==================================================+ - | OCF_NOT_RUNNING | .. index:: | - | | single: OCF_NOT_RUNNING | - | | single: OCF return code; OCF_NOT_RUNNING | - | | | - | | Stopped | - +----------------------+--------------------------------------------------+ - | OCF_SUCCESS | .. index:: | - | | single: OCF_SUCCESS | - | | single: OCF return code; OCF_SUCCESS | - | | | - | | Running (Unpromoted) | - +----------------------+--------------------------------------------------+ - | OCF_RUNNING_PROMOTED | .. index:: | - | | single: OCF_RUNNING_PROMOTED | - | | single: OCF return code; OCF_RUNNING_PROMOTED | - | | | - | | Running (Promoted) | - +----------------------+--------------------------------------------------+ - | OCF_FAILED_PROMOTED | .. index:: | - | | single: OCF_FAILED_PROMOTED | - | | single: OCF return code; OCF_FAILED_PROMOTED | - | | | - | | Failed (Promoted) | - +----------------------+--------------------------------------------------+ - | Other | .. index:: | - | | single: return code | - | | | - | | Failed (Unpromoted) | - +----------------------+--------------------------------------------------+ - -Clone Notifications -~~~~~~~~~~~~~~~~~~~ - -If the clone has the ``notify`` meta-attribute set to **true**, and the resource -agent supports the ``notify`` action, Pacemaker will call the action when -appropriate, passing a number of extra variables which, when combined with -additional context, can be used to calculate the current state of the cluster -and what is about to happen to it. - -.. index:: - single: clone; environment variables - single: notify; environment variables - -.. table:: **Environment variables supplied with Clone notify actions** - :widths: 1 1 - - +----------------------------------------------+-------------------------------------------------------------------------------+ - | Variable | Description | - +==============================================+===============================================================================+ - | OCF_RESKEY_CRM_meta_notify_type | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_type | - | | single: OCF_RESKEY_CRM_meta_notify_type | - | | | - | | Allowed values: **pre**, **post** | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_operation | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_operation | - | | single: OCF_RESKEY_CRM_meta_notify_operation | - | | | - | | Allowed values: **start**, **stop** | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_start_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_start_resource | - | | single: OCF_RESKEY_CRM_meta_notify_start_resource | - | | | - | | Resources to be started | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_stop_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_stop_resource | - | | single: OCF_RESKEY_CRM_meta_notify_stop_resource | - | | | - | | Resources to be stopped | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_active_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_active_resource | - | | single: OCF_RESKEY_CRM_meta_notify_active_resource | - | | | - | | Resources that are running | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_inactive_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_inactive_resource | - | | single: OCF_RESKEY_CRM_meta_notify_inactive_resource | - | | | - | | Resources that are not running | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_start_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_start_uname | - | | single: OCF_RESKEY_CRM_meta_notify_start_uname | - | | | - | | Nodes on which resources will be started | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_stop_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_stop_uname | - | | single: OCF_RESKEY_CRM_meta_notify_stop_uname | - | | | - | | Nodes on which resources will be stopped | - +----------------------------------------------+-------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_active_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_active_uname | - | | single: OCF_RESKEY_CRM_meta_notify_active_uname | - | | | - | | Nodes on which resources are running | - +----------------------------------------------+-------------------------------------------------------------------------------+ - -The variables come in pairs, such as -``OCF_RESKEY_CRM_meta_notify_start_resource`` and -``OCF_RESKEY_CRM_meta_notify_start_uname``, and should be treated as an -array of whitespace-separated elements. - -``OCF_RESKEY_CRM_meta_notify_inactive_resource`` is an exception, as the -matching **uname** variable does not exist since inactive resources -are not running on any node. - -Thus, in order to indicate that **clone:0** will be started on **sles-1**, -**clone:2** will be started on **sles-3**, and **clone:3** will be started -on **sles-2**, the cluster would set: - -.. topic:: Notification variables - - .. code-block:: none - - OCF_RESKEY_CRM_meta_notify_start_resource="clone:0 clone:2 clone:3" - OCF_RESKEY_CRM_meta_notify_start_uname="sles-1 sles-3 sles-2" - -.. note:: - - Pacemaker will log but otherwise ignore failures of notify actions. - -Interpretation of Notification Variables -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Pre-notification (stop):** - -* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` -* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -**Post-notification (stop) / Pre-notification (start):** - -* Active resources - - * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` -* Inactive resources - - * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -**Post-notification (start):** - -* Active resources: - - * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Inactive resources: - - * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -Extra Notifications for Promotable Clones -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. index:: - single: clone; environment variables - single: promotable; environment variables - -.. table:: **Extra environment variables supplied for promotable clones** - :widths: 1 1 - - +------------------------------------------------+---------------------------------------------------------------------------------+ - | Variable | Description | - +================================================+=================================================================================+ - | OCF_RESKEY_CRM_meta_notify_promoted_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_promoted_resource | - | | single: OCF_RESKEY_CRM_meta_notify_promoted_resource | - | | | - | | Resources that are running in the promoted role | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_unpromoted_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_unpromoted_resource | - | | single: OCF_RESKEY_CRM_meta_notify_unpromoted_resource | - | | | - | | Resources that are running in the unpromoted role | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_promote_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_promote_resource | - | | single: OCF_RESKEY_CRM_meta_notify_promote_resource | - | | | - | | Resources to be promoted | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_demote_resource | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_demote_resource | - | | single: OCF_RESKEY_CRM_meta_notify_demote_resource | - | | | - | | Resources to be demoted | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_promote_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_promote_uname | - | | single: OCF_RESKEY_CRM_meta_notify_promote_uname | - | | | - | | Nodes on which resources will be promoted | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_demote_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_demote_uname | - | | single: OCF_RESKEY_CRM_meta_notify_demote_uname | - | | | - | | Nodes on which resources will be demoted | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_promoted_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_promoted_uname | - | | single: OCF_RESKEY_CRM_meta_notify_promoted_uname | - | | | - | | Nodes on which resources are running in the promoted role | - +------------------------------------------------+---------------------------------------------------------------------------------+ - | OCF_RESKEY_CRM_meta_notify_unpromoted_uname | .. index:: | - | | single: environment variable; OCF_RESKEY_CRM_meta_notify_unpromoted_uname | - | | single: OCF_RESKEY_CRM_meta_notify_unpromoted_uname | - | | | - | | Nodes on which resources are running in the unpromoted role | - +------------------------------------------------+---------------------------------------------------------------------------------+ - -Interpretation of Promotable Notification Variables -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Pre-notification (demote):** - -* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` -* Promoted resources: ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` -* Unpromoted resources: ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` -* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -**Post-notification (demote) / Pre-notification (stop):** - -* Active resources: ``$OCF_RESKEY_CRM_meta_notify_active_resource`` -* Promoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` - -* Unpromoted resources: ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` -* Inactive resources: ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` -* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` - -**Post-notification (stop) / Pre-notification (start)** - -* Active resources: - - * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -* Promoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` - -* Unpromoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -* Inactive resources: - - * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` -* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -**Post-notification (start) / Pre-notification (promote)** - -* Active resources: - - * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Promoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` - -* Unpromoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Inactive resources: - - * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` -* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - -**Post-notification (promote)** - -* Active resources: - - * ``$OCF_RESKEY_CRM_meta_notify_active_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Promoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_promoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` - -* Unpromoted resources: - - * ``$OCF_RESKEY_CRM_meta_notify_unpromoted_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` - -* Inactive resources: - - * ``$OCF_RESKEY_CRM_meta_notify_inactive_resource`` - * plus ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - * minus ``$OCF_RESKEY_CRM_meta_notify_start_resource`` - -* Resources to be started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources to be promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources to be demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources to be stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` -* Resources that were started: ``$OCF_RESKEY_CRM_meta_notify_start_resource`` -* Resources that were promoted: ``$OCF_RESKEY_CRM_meta_notify_promote_resource`` -* Resources that were demoted: ``$OCF_RESKEY_CRM_meta_notify_demote_resource`` -* Resources that were stopped: ``$OCF_RESKEY_CRM_meta_notify_stop_resource`` - Monitoring Promotable Clone Resources _____________________________________ diff --git a/doc/sphinx/Pacemaker_Explained/constraints.rst b/doc/sphinx/Pacemaker_Explained/constraints.rst index a78d6c2..cff65c5 100644 --- a/doc/sphinx/Pacemaker_Explained/constraints.rst +++ b/doc/sphinx/Pacemaker_Explained/constraints.rst @@ -38,109 +38,146 @@ configuration might be simpler. Location Properties ___________________ -.. table:: **Attributes of a rsc_location Element** +.. list-table:: **Attributes of a rsc_location Element** :class: longtable - :widths: 1 1 4 - - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | Attribute | Default | Description | - +====================+=========+==============================================================================================+ - | id | | .. index:: | - | | | single: rsc_location; attribute, id | - | | | single: attribute; id (rsc_location) | - | | | single: id; rsc_location attribute | - | | | | - | | | A unique name for the constraint (required) | - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | rsc | | .. index:: | - | | | single: rsc_location; attribute, rsc | - | | | single: attribute; rsc (rsc_location) | - | | | single: rsc; rsc_location attribute | - | | | | - | | | The name of the resource to which this constraint | - | | | applies. A location constraint must either have a | - | | | ``rsc``, have a ``rsc-pattern``, or contain at | - | | | least one resource set. | - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | rsc-pattern | | .. index:: | - | | | single: rsc_location; attribute, rsc-pattern | - | | | single: attribute; rsc-pattern (rsc_location) | - | | | single: rsc-pattern; rsc_location attribute | - | | | | - | | | A pattern matching the names of resources to which | - | | | this constraint applies. The syntax is the same as | - | | | `POSIX <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>`_ | - | | | extended regular expressions, with the addition of an | - | | | initial ``!`` indicating that resources *not* matching | - | | | the pattern are selected. If the regular expression | - | | | contains submatches, and the constraint is governed by | - | | | a :ref:`rule <rules>`, the submatches can be | - | | | referenced as ``%1`` through ``%9`` in the rule's | - | | | ``score-attribute`` or a rule expression's ``attribute`` | - | | | (see :ref:`s-rsc-pattern-rules`). A location constraint | - | | | must either have a ``rsc``, have a ``rsc-pattern``, or | - | | | contain at least one resource set. | - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | node | | .. index:: | - | | | single: rsc_location; attribute, node | - | | | single: attribute; node (rsc_location) | - | | | single: node; rsc_location attribute | - | | | | - | | | The name of the node to which this constraint applies. | - | | | A location constraint must either have a ``node`` and | - | | | ``score``, or contain at least one rule. | - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | score | | .. index:: | - | | | single: rsc_location; attribute, score | - | | | single: attribute; score (rsc_location) | - | | | single: score; rsc_location attribute | - | | | | - | | | Positive values indicate a preference for running the | - | | | affected resource(s) on ``node`` -- the higher the value, | - | | | the stronger the preference. Negative values indicate | - | | | the resource(s) should avoid this node (a value of | - | | | **-INFINITY** changes "should" to "must"). A location | - | | | constraint must either have a ``node`` and ``score``, | - | | | or contain at least one rule. | - +--------------------+---------+----------------------------------------------------------------------------------------------+ - | resource-discovery | always | .. index:: | - | | | single: rsc_location; attribute, resource-discovery | - | | | single: attribute; resource-discovery (rsc_location) | - | | | single: resource-discovery; rsc_location attribute | - | | | | - | | | Whether Pacemaker should perform resource discovery | - | | | (that is, check whether the resource is already running) | - | | | for this resource on this node. This should normally be | - | | | left as the default, so that rogue instances of a | - | | | service can be stopped when they are running where they | - | | | are not supposed to be. However, there are two | - | | | situations where disabling resource discovery is a good | - | | | idea: when a service is not installed on a node, | - | | | discovery might return an error (properly written OCF | - | | | agents will not, so this is usually only seen with other | - | | | agent types); and when Pacemaker Remote is used to scale | - | | | a cluster to hundreds of nodes, limiting resource | - | | | discovery to allowed nodes can significantly boost | - | | | performance. | - | | | | - | | | * ``always:`` Always perform resource discovery for | - | | | the specified resource on this node. | - | | | | - | | | * ``never:`` Never perform resource discovery for the | - | | | specified resource on this node. This option should | - | | | generally be used with a -INFINITY score, although | - | | | that is not strictly required. | - | | | | - | | | * ``exclusive:`` Perform resource discovery for the | - | | | specified resource only on this node (and other nodes | - | | | similarly marked as ``exclusive``). Multiple location | - | | | constraints using ``exclusive`` discovery for the | - | | | same resource across different nodes creates a subset | - | | | of nodes resource-discovery is exclusive to. If a | - | | | resource is marked for ``exclusive`` discovery on one | - | | | or more nodes, that resource is only allowed to be | - | | | placed within that subset of nodes. | - +--------------------+---------+----------------------------------------------------------------------------------------------+ + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. rsc_location_id: + + .. index:: + single: rsc_location; attribute, id + single: attribute; id (rsc_location) + single: id; rsc_location attribute + + id + - :ref:`id <id>` + - + - A unique name for the constraint (required) + * - .. rsc_location_rsc: + + .. index:: + single: rsc_location; attribute, rsc + single: attribute; rsc (rsc_location) + single: rsc; rsc_location attribute + + rsc + - :ref:`id <id>` + - + - The name of the resource to which this constraint applies. A location + constraint must either have a ``rsc``, have a ``rsc-pattern``, or + contain at least one resource set. + * - .. rsc_pattern: + + .. index:: + single: rsc_location; attribute, rsc-pattern + single: attribute; rsc-pattern (rsc_location) + single: rsc-pattern; rsc_location attribute + + rsc-pattern + - :ref:`text <text>` + - + - A pattern matching the names of resources to which this constraint + applies. The syntax is the same as `POSIX + <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>`_ + extended regular expressions, with the addition of an initial ``!`` + indicating that resources *not* matching the pattern are selected. If + the regular expression contains submatches, and the constraint contains + a :ref:`rule <rules>`, the submatches can be referenced as ``%1`` + through ``%9`` in the rule's ``score-attribute`` or a rule expression's + ``attribute`` (see :ref:`s-rsc-pattern-rules`). A location constraint + must either have a ``rsc``, have a ``rsc-pattern``, or contain at least + one resource set. + * - .. rsc_location_node: + + .. index:: + single: rsc_location; attribute, node + single: attribute; node (rsc_location) + single: node; rsc_location attribute + + node + - :ref:`text <text>` + - + - The name of the node to which this constraint applies. A location + constraint must either have a ``node`` and ``score``, or contain at + least one rule. + * - .. rsc_location_score: + + .. index:: + single: rsc_location; attribute, score + single: attribute; score (rsc_location) + single: score; rsc_location attribute + + score + - :ref:`score <score>` + - + - Positive values indicate a preference for running the affected + resource(s) on ``node`` -- the higher the value, the stronger the + preference. Negative values indicate the resource(s) should avoid this + node (a value of **-INFINITY** changes "should" to "must"). A location + constraint must either have a ``node`` and ``score``, or contain at + least one rule. + * - .. rsc_location_role: + + .. index:: + single: rsc_location; attribute, role + single: attribute; role (rsc_location) + single: role; rsc_location attribute + + role + - :ref:`enumeration <enumeration>` + - ``Started`` + - This is significant only for + :ref:`promotable clones <s-resource-promotable>`, is allowed only if + ``rsc`` or ``rsc-pattern`` is set, and is ignored if the constraint + contains a rule. Allowed values: + + * ``Started`` or ``Unpromoted``: The constraint affects the location of + all instances of the resource. (A promoted instance must start in the + unpromoted role before being promoted, so any location requirement for + unpromoted instances also affects promoted instances.) + * ``Promoted``: The constraint does not affect the location of + instances, but instead affects which of the instances will be + promoted. + + * - .. resource_discovery: + + .. index:: + single: rsc_location; attribute, resource-discovery + single: attribute; resource-discovery (rsc_location) + single: resource-discovery; rsc_location attribute + + resource-discovery + - :ref:`enumeration <enumeration>` + - always + - Whether Pacemaker should perform resource discovery (that is, check + whether the resource is already running) for this resource on this node. + This should normally be left as the default, so that rogue instances of + a service can be stopped when they are running where they are not + supposed to be. However, there are two situations where disabling + resource discovery is a good idea: when a service is not installed on a + node, discovery might return an error (properly written OCF agents will + not, so this is usually only seen with other agent types); and when + Pacemaker Remote is used to scale a cluster to hundreds of nodes, + limiting resource discovery to allowed nodes can significantly boost + performance. Allowed values: + + * ``always:`` Always perform resource discovery for the specified + resource on this node. + * ``never:`` Never perform resource discovery for the specified resource + on this node. This option should generally be used with a -INFINITY + score, although that is not strictly required. + * ``exclusive:`` Perform resource discovery for the specified resource + only on this node (and other nodes similarly marked as ``exclusive``). + Multiple location constraints using ``exclusive`` discovery for the + same resource across different nodes creates a subset of nodes + resource-discovery is exclusive to. If a resource is marked for + ``exclusive`` discovery on one or more nodes, that resource is only + allowed to be placed within that subset of nodes. .. warning:: diff --git a/doc/sphinx/Pacemaker_Explained/fencing.rst b/doc/sphinx/Pacemaker_Explained/fencing.rst index 109b4da..302699f 100644 --- a/doc/sphinx/Pacemaker_Explained/fencing.rst +++ b/doc/sphinx/Pacemaker_Explained/fencing.rst @@ -147,8 +147,6 @@ These limitations could be revisited if there is sufficient user demand. .. index:: single: fencing; special instance attributes -.. _fencing-attributes: - Special Meta-Attributes for Fencing Resources ############################################# @@ -171,6 +169,8 @@ fencing resource. | | | | :ref:`unfencing <unfencing>`. | +----------------------+---------+--------------------+----------------------------------------+ +.. _fencing-attributes: + Special Instance Attributes for Fencing Resources ################################################# @@ -190,319 +190,316 @@ for ``pacemaker-fenced``. | | | | priority to lowest. | +----------------------+---------+--------------------+----------------------------------------+ -.. table:: **Additional Properties of Fencing Resources** +.. list-table:: **Additional Properties of Fencing Resources** :class: longtable :widths: 2 1 2 4 - - +----------------------+---------+--------------------+----------------------------------------+ - | Field | Type | Default | Description | - +======================+=========+====================+========================================+ - | stonith-timeout | time | | .. index:: | - | | | | single: stonith-timeout | - | | | | | - | | | | This is not used by Pacemaker (see the | - | | | | ``pcmk_reboot_timeout``, | - | | | | ``pcmk_off_timeout``, etc. properties | - | | | | instead), but it may be used by | - | | | | Linux-HA fence agents. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_host_map | string | | .. index:: | - | | | | single: pcmk_host_map | - | | | | | - | | | | A mapping of node names to ports | - | | | | for devices that do not understand | - | | | | the node names. | - | | | | | - | | | | Example: ``node1:1;node2:2,3`` tells | - | | | | the cluster to use port 1 for | - | | | | ``node1`` and ports 2 and 3 for | - | | | | ``node2``. If ``pcmk_host_check`` is | - | | | | explicitly set to ``static-list``, | - | | | | either this or ``pcmk_host_list`` must | - | | | | be set. The port portion of the map | - | | | | may contain special characters such as | - | | | | spaces if preceded by a backslash | - | | | | *(since 2.1.2)*. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_host_list | string | | .. index:: | - | | | | single: pcmk_host_list | - | | | | | - | | | | A list of machines controlled by this | - | | | | device. If ``pcmk_host_check`` is | - | | | | explicitly set to ``static-list``, | - | | | | either this or ``pcmk_host_map`` must | - | | | | be set. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_host_check | string | Value appropriate | .. index:: | - | | | to other | single: pcmk_host_check | - | | | parameters (see | | - | | | "Default Check | The method Pacemaker should use to | - | | | Type" below) | determine which nodes can be targeted | - | | | | by this device. Allowed values: | - | | | | | - | | | | * ``static-list:`` targets are listed | - | | | | in the ``pcmk_host_list`` or | - | | | | ``pcmk_host_map`` attribute | - | | | | * ``dynamic-list:`` query the device | - | | | | via the agent's ``list`` action | - | | | | * ``status:`` query the device via the | - | | | | agent's ``status`` action | - | | | | * ``none:`` assume the device can | - | | | | fence any node | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_delay_max | time | 0s | .. index:: | - | | | | single: pcmk_delay_max | - | | | | | - | | | | Enable a delay of no more than the | - | | | | time specified before executing | - | | | | fencing actions. Pacemaker derives the | - | | | | overall delay by taking the value of | - | | | | pcmk_delay_base and adding a random | - | | | | delay value such that the sum is kept | - | | | | below this maximum. This is sometimes | - | | | | used in two-node clusters to ensure | - | | | | that the nodes don't fence each other | - | | | | at the same time. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_delay_base | time | 0s | .. index:: | - | | | | single: pcmk_delay_base | - | | | | | - | | | | Enable a static delay before executing | - | | | | fencing actions. This can be used, for | - | | | | example, in two-node clusters to | - | | | | ensure that the nodes don't fence each | - | | | | other, by having separate fencing | - | | | | resources with different values. The | - | | | | node that is fenced with the shorter | - | | | | delay will lose a fencing race. The | - | | | | overall delay introduced by pacemaker | - | | | | is derived from this value plus a | - | | | | random delay such that the sum is kept | - | | | | below the maximum delay. A single | - | | | | device can have different delays per | - | | | | node using a host map *(since 2.1.2)*, | - | | | | for example ``node1:0s;node2:5s.`` | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_action_limit | integer | 1 | .. index:: | - | | | | single: pcmk_action_limit | - | | | | | - | | | | The maximum number of actions that can | - | | | | be performed in parallel on this | - | | | | device. A value of -1 means unlimited. | - | | | | Node fencing actions initiated by the | - | | | | cluster (as opposed to an administrator| - | | | | running the ``stonith_admin`` tool or | - | | | | the fencer running recurring device | - | | | | monitors and ``status`` and ``list`` | - | | | | commands) are additionally subject to | - | | | | the ``concurrent-fencing`` cluster | - | | | | property. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_host_argument | string | ``port`` otherwise | .. index:: | - | | | ``plug`` if | single: pcmk_host_argument | - | | | supported | | - | | | according to the | *Advanced use only.* Which parameter | - | | | metadata of the | should be supplied to the fence agent | - | | | fence agent | to identify the node to be fenced. | - | | | | Some devices support neither the | - | | | | standard ``plug`` nor the deprecated | - | | | | ``port`` parameter, or may provide | - | | | | additional ones. Use this to specify | - | | | | an alternate, device-specific | - | | | | parameter. A value of ``none`` tells | - | | | | the cluster not to supply any | - | | | | additional parameters. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_reboot_action | string | reboot | .. index:: | - | | | | single: pcmk_reboot_action | - | | | | | - | | | | *Advanced use only.* The command to | - | | | | send to the resource agent in order to | - | | | | reboot a node. Some devices do not | - | | | | support the standard commands or may | - | | | | provide additional ones. Use this to | - | | | | specify an alternate, device-specific | - | | | | command. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_reboot_timeout | time | 60s | .. index:: | - | | | | single: pcmk_reboot_timeout | - | | | | | - | | | | *Advanced use only.* Specify an | - | | | | alternate timeout to use for | - | | | | ``reboot`` actions instead of the | - | | | | value of ``stonith-timeout``. Some | - | | | | devices need much more or less time to | - | | | | complete than normal. Use this to | - | | | | specify an alternate, device-specific | - | | | | timeout. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_reboot_retries | integer | 2 | .. index:: | - | | | | single: pcmk_reboot_retries | - | | | | | - | | | | *Advanced use only.* The maximum | - | | | | number of times to retry the | - | | | | ``reboot`` command within the timeout | - | | | | period. Some devices do not support | - | | | | multiple connections, and operations | - | | | | may fail if the device is busy with | - | | | | another task, so Pacemaker will | - | | | | automatically retry the operation, if | - | | | | there is time remaining. Use this | - | | | | option to alter the number of times | - | | | | Pacemaker retries before giving up. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_off_action | string | off | .. index:: | - | | | | single: pcmk_off_action | - | | | | | - | | | | *Advanced use only.* The command to | - | | | | send to the resource agent in order to | - | | | | shut down a node. Some devices do not | - | | | | support the standard commands or may | - | | | | provide additional ones. Use this to | - | | | | specify an alternate, device-specific | - | | | | command. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_off_timeout | time | 60s | .. index:: | - | | | | single: pcmk_off_timeout | - | | | | | - | | | | *Advanced use only.* Specify an | - | | | | alternate timeout to use for | - | | | | ``off`` actions instead of the | - | | | | value of ``stonith-timeout``. Some | - | | | | devices need much more or less time to | - | | | | complete than normal. Use this to | - | | | | specify an alternate, device-specific | - | | | | timeout. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_off_retries | integer | 2 | .. index:: | - | | | | single: pcmk_off_retries | - | | | | | - | | | | *Advanced use only.* The maximum | - | | | | number of times to retry the | - | | | | ``off`` command within the timeout | - | | | | period. Some devices do not support | - | | | | multiple connections, and operations | - | | | | may fail if the device is busy with | - | | | | another task, so Pacemaker will | - | | | | automatically retry the operation, if | - | | | | there is time remaining. Use this | - | | | | option to alter the number of times | - | | | | Pacemaker retries before giving up. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_list_action | string | list | .. index:: | - | | | | single: pcmk_list_action | - | | | | | - | | | | *Advanced use only.* The command to | - | | | | send to the resource agent in order to | - | | | | list nodes. Some devices do not | - | | | | support the standard commands or may | - | | | | provide additional ones. Use this to | - | | | | specify an alternate, device-specific | - | | | | command. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_list_timeout | time | 60s | .. index:: | - | | | | single: pcmk_list_timeout | - | | | | | - | | | | *Advanced use only.* Specify an | - | | | | alternate timeout to use for | - | | | | ``list`` actions instead of the | - | | | | value of ``stonith-timeout``. Some | - | | | | devices need much more or less time to | - | | | | complete than normal. Use this to | - | | | | specify an alternate, device-specific | - | | | | timeout. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_list_retries | integer | 2 | .. index:: | - | | | | single: pcmk_list_retries | - | | | | | - | | | | *Advanced use only.* The maximum | - | | | | number of times to retry the | - | | | | ``list`` command within the timeout | - | | | | period. Some devices do not support | - | | | | multiple connections, and operations | - | | | | may fail if the device is busy with | - | | | | another task, so Pacemaker will | - | | | | automatically retry the operation, if | - | | | | there is time remaining. Use this | - | | | | option to alter the number of times | - | | | | Pacemaker retries before giving up. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_monitor_action | string | monitor | .. index:: | - | | | | single: pcmk_monitor_action | - | | | | | - | | | | *Advanced use only.* The command to | - | | | | send to the resource agent in order to | - | | | | report extended status. Some devices do| - | | | | not support the standard commands or | - | | | | may provide additional ones. Use this | - | | | | to specify an alternate, | - | | | | device-specific command. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_monitor_timeout | time | 60s | .. index:: | - | | | | single: pcmk_monitor_timeout | - | | | | | - | | | | *Advanced use only.* Specify an | - | | | | alternate timeout to use for | - | | | | ``monitor`` actions instead of the | - | | | | value of ``stonith-timeout``. Some | - | | | | devices need much more or less time to | - | | | | complete than normal. Use this to | - | | | | specify an alternate, device-specific | - | | | | timeout. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_monitor_retries | integer | 2 | .. index:: | - | | | | single: pcmk_monitor_retries | - | | | | | - | | | | *Advanced use only.* The maximum | - | | | | number of times to retry the | - | | | | ``monitor`` command within the timeout | - | | | | period. Some devices do not support | - | | | | multiple connections, and operations | - | | | | may fail if the device is busy with | - | | | | another task, so Pacemaker will | - | | | | automatically retry the operation, if | - | | | | there is time remaining. Use this | - | | | | option to alter the number of times | - | | | | Pacemaker retries before giving up. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_status_action | string | status | .. index:: | - | | | | single: pcmk_status_action | - | | | | | - | | | | *Advanced use only.* The command to | - | | | | send to the resource agent in order to | - | | | | report status. Some devices do | - | | | | not support the standard commands or | - | | | | may provide additional ones. Use this | - | | | | to specify an alternate, | - | | | | device-specific command. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_status_timeout | time | 60s | .. index:: | - | | | | single: pcmk_status_timeout | - | | | | | - | | | | *Advanced use only.* Specify an | - | | | | alternate timeout to use for | - | | | | ``status`` actions instead of the | - | | | | value of ``stonith-timeout``. Some | - | | | | devices need much more or less time to | - | | | | complete than normal. Use this to | - | | | | specify an alternate, device-specific | - | | | | timeout. | - +----------------------+---------+--------------------+----------------------------------------+ - | pcmk_status_retries | integer | 2 | .. index:: | - | | | | single: pcmk_status_retries | - | | | | | - | | | | *Advanced use only.* The maximum | - | | | | number of times to retry the | - | | | | ``status`` command within the timeout | - | | | | period. Some devices do not support | - | | | | multiple connections, and operations | - | | | | may fail if the device is busy with | - | | | | another task, so Pacemaker will | - | | | | automatically retry the operation, if | - | | | | there is time remaining. Use this | - | | | | option to alter the number of times | - | | | | Pacemaker retries before giving up. | - +----------------------+---------+--------------------+----------------------------------------+ + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _primitive_stonith_timeout: + + .. index:: + single: stonith-timeout (primitive instance attribute) + + stonith-timeout + - :ref:`timeout <timeout>` + - + - This is not used by Pacemaker (see the ``pcmk_reboot_timeout``, + ``pcmk_off_timeout``, etc., properties instead), but it may be used by + Linux-HA fence agents. + * - .. _pcmk_host_map: + + .. index:: + single: pcmk_host_map + + pcmk_host_map + - :ref:`text <text>` + - + - A mapping of node names to ports for devices that do not understand the + node names. For example, ``node1:1;node2:2,3`` tells the cluster to use + port 1 for ``node1`` and ports 2 and 3 for ``node2``. If + ``pcmk_host_check`` is explicitly set to ``static-list``, either this or + ``pcmk_host_list`` must be set. The port portion of the map may contain + special characters such as spaces if preceded by a backslash *(since 2.1.2)*. + * - .. _pcmk_host_list: + + .. index:: + single: pcmk_host_list + + pcmk_host_list + - :ref:`text <text>` + - + - Comma-separated list of nodes that can be targeted by this device (for + example, ``node1,node2,node3``). If pcmk_host_check is ``static-list``, + either this or ``pcmk_host_map`` must be set. + * - .. _pcmk_host_check: + + .. index:: + single: pcmk_host_check + + pcmk_host_check + - :ref:`text <text>` + - See :ref:`pcmk_host_check_default` + - The method Pacemaker should use to determine which nodes can be targeted + by this device. Allowed values: + + * ``static-list:`` targets are listed in the ``pcmk_host_list`` or ``pcmk_host_map`` attribute + * ``dynamic-list:`` query the device via the agent's ``list`` action + * ``status:`` query the device via the agent's ``status`` action + * ``none:`` assume the device can fence any node + * - .. _pcmk_delay_max: + + .. index:: + single: pcmk_delay_max + + pcmk_delay_max + - :ref:`duration <duration>` + - 0s + - Enable a delay of no more than the time specified before executing + fencing actions. Pacemaker derives the overall delay by taking the value + of pcmk_delay_base and adding a random delay value such that the sum is + kept below this maximum. This is sometimes used in two-node clusters to + ensure that the nodes don't fence each other at the same time. + * - .. _pcmk_delay_base: + + .. index:: + single: pcmk_delay_base + + pcmk_delay_base + - :ref:`text <text>` + - 0s + - Enable a static delay before executing fencing actions. This can be + used, for example, in two-node clusters to ensure that the nodes don't + fence each other, by having separate fencing resources with different + values. The node that is fenced with the shorter delay will lose a + fencing race. The overall delay introduced by pacemaker is derived from + this value plus a random delay such that the sum is kept below the + maximum delay. A single device can have different delays per node using + a host map *(since 2.1.2)*, for example ``node1:0s;node2:5s.`` + * - .. _pcmk_action_limit: + + .. index:: + single: pcmk_action_limit + + pcmk_action_limit + - :ref:`integer <integer>` + - 1 + - The maximum number of actions that can be performed in parallel on this + device. A value of -1 means unlimited. Node fencing actions initiated by + the cluster (as opposed to an administrator running the + ``stonith_admin`` tool or the fencer running recurring device monitors + and ``status`` and ``list`` commands) are additionally subject to the + ``concurrent-fencing`` cluster property. + * - .. _pcmk_host_argument: + + .. index:: + single: pcmk_host_argument + + pcmk_host_argument + - :ref:`text <text>` + - ``port`` otherwise ``plug`` if supported according to the metadata of + the fence agent + - *Advanced use only.* Which parameter should be supplied to the fence + agent to identify the node to be fenced. Some devices support neither + the standard ``plug`` nor the deprecated ``port`` parameter, or may + provide additional ones. Use this to specify an alternate, + device-specific parameter. A value of ``none`` tells the cluster not to + supply any additional parameters. + * - .. _pcmk_reboot_action: + + .. index:: + single: pcmk_reboot_action + + pcmk_reboot_action + - :ref:`text <text>` + - ``reboot`` + - *Advanced use only.* The command to send to the resource agent in order + to reboot a node. Some devices do not support the standard commands or + may provide additional ones. Use this to specify an alternate, + device-specific command. + * - .. _pcmk_reboot_timeout: + + .. index:: + single: pcmk_reboot_timeout + + pcmk_reboot_timeout + - :ref:`timeout <timeout>` + - 60s + - *Advanced use only.* Specify an alternate timeout (in seconds) to use + for ``reboot`` actions instead of the value of ``stonith-timeout``. Some + devices need much more or less time to complete than normal. Use this to + specify an alternate, device-specific timeout. + * - .. _pcmk_reboot_retries: + + .. index:: + single: pcmk_reboot_retries + + pcmk_reboot_retries + - :ref:`integer <integer>` + - 2 + - *Advanced use only.* The maximum number of times to retry the ``reboot`` + command within the timeout period. Some devices do not support multiple + connections, and operations may fail if the device is busy with another + task, so Pacemaker will automatically retry the operation, if there is + time remaining. Use this option to alter the number of times Pacemaker + retries before giving up. + * - .. _pcmk_off_action: + + .. index:: + single: pcmk_off_action + + pcmk_off_action + - :ref:`text <text>` + - ``off`` + - *Advanced use only.* The command to send to the resource agent in order + to shut down a node. Some devices do not support the standard commands or + may provide additional ones. Use this to specify an alternate, + device-specific command. + * - .. _pcmk_off_timeout: + + .. index:: + single: pcmk_off_timeout + + pcmk_off_timeout + - :ref:`timeout <timeout>` + - 60s + - *Advanced use only.* Specify an alternate timeout (in seconds) to use + for ``off`` actions instead of the value of ``stonith-timeout``. Some + devices need much more or less time to complete than normal. Use this to + specify an alternate, device-specific timeout. + * - .. _pcmk_off_retries: + + .. index:: + single: pcmk_off_retries + + pcmk_off_retries + - :ref:`integer <integer>` + - 2 + - *Advanced use only.* The maximum number of times to retry the ``off`` + command within the timeout period. Some devices do not support multiple + connections, and operations may fail if the device is busy with another + task, so Pacemaker will automatically retry the operation, if there is + time remaining. Use this option to alter the number of times Pacemaker + retries before giving up. + * - .. _pcmk_list_action: + + .. index:: + single: pcmk_list_action + + pcmk_list_action + - :ref:`text <text>` + - ``list`` + - *Advanced use only.* The command to send to the resource agent in order + to list nodes. Some devices do not support the standard commands or may + provide additional ones. Use this to specify an alternate, + device-specific command. + * - .. _pcmk_list_timeout: + + .. index:: + single: pcmk_list_timeout + + pcmk_list_timeout + - :ref:`timeout <timeout>` + - 60s + - *Advanced use only.* Specify an alternate timeout (in seconds) to use + for ``list`` actions instead of the value of ``stonith-timeout``. Some + devices need much more or less time to complete than normal. Use this to + specify an alternate, device-specific timeout. + * - .. _pcmk_list_retries: + + .. index:: + single: pcmk_list_retries + + pcmk_list_retries + - :ref:`integer <integer>` + - 2 + - *Advanced use only.* The maximum number of times to retry the ``list`` + command within the timeout period. Some devices do not support multiple + connections, and operations may fail if the device is busy with another + task, so Pacemaker will automatically retry the operation, if there is + time remaining. Use this option to alter the number of times Pacemaker + retries before giving up. + * - .. _pcmk_monitor_action: + + .. index:: + single: pcmk_monitor_action + + pcmk_monitor_action + - :ref:`text <text>` + - ``monitor`` + - *Advanced use only.* The command to send to the resource agent in order + to report extended status. Some devices do not support the standard + commands or may provide additional ones. Use this to specify an + alternate, device-specific command. + * - .. _pcmk_monitor_timeout: + + .. index:: + single: pcmk_monitor_timeout + + pcmk_monitor_timeout + - :ref:`timeout <timeout>` + - 60s + - *Advanced use only.* Specify an alternate timeout (in seconds) to use + for ``monitor`` actions instead of the value of ``stonith-timeout``. Some + devices need much more or less time to complete than normal. Use this to + specify an alternate, device-specific timeout. + * - .. _pcmk_monitor_retries: + + .. index:: + single: pcmk_monitor_retries + + pcmk_monitor_retries + - :ref:`integer <integer>` + - 2 + - *Advanced use only.* The maximum number of times to retry the ``monitor`` + command within the timeout period. Some devices do not support multiple + connections, and operations may fail if the device is busy with another + task, so Pacemaker will automatically retry the operation, if there is + time remaining. Use this option to alter the number of times Pacemaker + retries before giving up. + * - .. _pcmk_status_action: + + .. index:: + single: pcmk_status_action + + pcmk_status_action + - :ref:`text <text>` + - ``status`` + - *Advanced use only.* The command to send to the resource agent in order + to report status. Some devices do not support the standard commands or + may provide additional ones. Use this to specify an alternate, + device-specific command. + * - .. _pcmk_status_timeout: + + .. index:: + single: pcmk_status_timeout + + pcmk_status_timeout + - :ref:`timeout <timeout>` + - 60s + - *Advanced use only.* Specify an alternate timeout (in seconds) to use + for ``status`` actions instead of the value of ``stonith-timeout``. Some + devices need much more or less time to complete than normal. Use this to + specify an alternate, device-specific timeout. + * - .. _pcmk_status_retries: + + .. index:: + single: pcmk_status_retries + + pcmk_status_retries + - :ref:`integer <integer>` + - 2 + - *Advanced use only.* The maximum number of times to retry the ``status`` + command within the timeout period. Some devices do not support multiple + connections, and operations may fail if the device is busy with another + task, so Pacemaker will automatically retry the operation, if there is + time remaining. Use this option to alter the number of times Pacemaker + retries before giving up. + +.. _pcmk_host_check_default: Default Check Type ################## diff --git a/doc/sphinx/Pacemaker_Explained/local-options.rst b/doc/sphinx/Pacemaker_Explained/local-options.rst index 91eda66..915a65b 100644 --- a/doc/sphinx/Pacemaker_Explained/local-options.rst +++ b/doc/sphinx/Pacemaker_Explained/local-options.rst @@ -8,6 +8,188 @@ Host-Local Configuration your Pacemaker build settings. Check your Pacemaker configuration file to find the correct paths. +Configuration Value Types +######################### + +Throughout this document, configuration values will be designated as having one +of the following types: + +.. list-table:: **Configuration Value Types** + :class: longtable + :widths: 1 3 + :header-rows: 1 + + * - Type + - Description + * - .. _boolean: + + .. index:: + pair: type; boolean + + boolean + - Case-insensitive text value where ``1``, ``yes``, ``y``, ``on``, + and ``true`` evaluate as true and ``0``, ``no``, ``n``, ``off``, + ``false``, and unset evaluate as false + * - .. _date_time: + + .. index:: + pair: type; date/time + + date/time + - Textual timestamp like ``Sat Dec 21 11:47:45 2013`` + * - .. _duration: + + .. index:: + pair: type; duration + + duration + - A time duration, specified either like a :ref:`timeout <timeout>` or an + `ISO 8601 duration <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_. + A duration may be up to approximately 49 days but is intended for much + smaller time periods. + * - .. _enumeration: + + .. index:: + pair: type; enumeration + + enumeration + - Text that must be one of a set of defined values (which will be listed + in the description) + * - .. _epoch_time: + + .. index:: + pair: type; epoch_time + + epoch_time + - Time as the integer number of seconds since the Unix epoch, + ``1970-01-01 00:00:00 +0000 (UTC)``. + * - .. _id: + + .. index:: + pair: type; id + + id + - A text string starting with a letter or underbar, followed by any + combination of letters, numbers, dashes, dots, and/or underbars; when + used for a property named ``id``, the string must be unique across all + ``id`` properties in the CIB + * - .. _integer: + + .. index:: + pair: type; integer + + integer + - 32-bit signed integer value (-2,147,483,648 to 2,147,483,647) + * - .. _iso8601: + + .. index:: + pair: type; iso8601 + + ISO 8601 + - An `ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ date/time. + * - .. _nonnegative_integer: + + .. index:: + pair: type; nonnegative integer + + nonnegative integer + - 32-bit nonnegative integer value (0 to 2,147,483,647) + * - .. _percentage: + + .. index:: + pair: type; percentage + + percentage + - Floating-point number followed by an optional percent sign ('%') + * - .. _port: + + .. index:: + pair: type; port + + port + - Integer TCP port number (0 to 65535) + * - .. _range: + + .. index:: + pair: type; range + + range + - A range may be a single nonnegative integer or a dash-separated range of + nonnegative integers. Either the first or last value may be omitted to + leave the range open-ended. Examples: ``0``, ``3-``, ``-5``, ``4-6``. + * - .. _score: + + .. index:: + pair: type; score + + score + - A Pacemaker score can be an integer between -1,000,000 and 1,000,000, or + a string alias: ``INFINITY`` or ``+INFINITY`` is equivalent to + 1,000,000, ``-INFINITY`` is equivalent to -1,000,000, and ``red``, + ``yellow``, and ``green`` are equivalent to integers as described in + :ref:`node-health`. + * - .. _text: + + .. index:: + pair: type; text + + text + - A text string + * - .. _timeout: + + .. index:: + pair: type; timeout + + timeout + - A time duration, specified as a bare number (in which case it is + considered to be in seconds) or a number with a unit (``ms`` or ``msec`` + for milliseconds, ``us`` or ``usec`` for microseconds, ``s`` or ``sec`` + for seconds, ``m`` or ``min`` for minutes, ``h`` or ``hr`` for hours) + optionally with whitespace before and/or after the number. + * - .. _version: + + .. index:: + pair: type; version + + version + - Version number (any combination of alphanumeric characters, dots, and + dashes, starting with a number). + + +Scores +______ + +Scores are integral to how Pacemaker works. Practically everything from moving +a resource to deciding which resource to stop in a degraded cluster is achieved +by manipulating scores in some way. + +Scores are calculated per resource and node. Any node with a negative score for +a resource can't run that resource. The cluster places a resource on the node +with the highest score for it. + +Score addition and subtraction follow these rules: + +* Any value (including ``INFINITY``) - ``INFINITY`` = ``-INFINITY`` +* ``INFINITY`` + any value other than ``-INFINITY`` = ``INFINITY`` + +.. note:: + + What if you want to use a score higher than 1,000,000? Typically this possibility + arises when someone wants to base the score on some external metric that might + go above 1,000,000. + + The short answer is you can't. + + The long answer is it is sometimes possible work around this limitation + creatively. You may be able to set the score to some computed value based on + the external metric rather than use the metric directly. For nodes, you can + store the metric as a node attribute, and query the attribute when computing + the score (possibly as part of a custom resource agent). + + +Local Options +############# + Pacemaker supports several host-local configuration options. These options can be configured on each node in the main Pacemaker configuration file (|PCMK_CONFIG_FILE|) in the format ``<NAME>="<VALUE>"``. They work by setting @@ -22,6 +204,18 @@ environment variables when Pacemaker daemons start up. - Type - Default - Description + + * - .. _cib_pam_service: + + .. index:: + pair: node option; CIB_pam_service + + CIB_pam_service + - :ref:`text <text>` + - login + - PAM service to use for remote CIB client authentication (passed to + ``pam_start``). + * - .. _pcmk_logfacility: .. index:: @@ -50,7 +244,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_logpriority: .. index:: - pair:: node option; PCMK_logpriority + pair: node option; PCMK_logpriority PCMK_logpriority - :ref:`enumeration <enumeration>` @@ -72,7 +266,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_logfile: .. index:: - pair:: node option; PCMK_logfile + pair: node option; PCMK_logfile PCMK_logfile - :ref:`text <text>` @@ -81,12 +275,15 @@ environment variables when Pacemaker daemons start up. specified file (in addition to the system log, if enabled). These messages may have extended information, and will include messages of info severity. This log is of more use to developers and advanced system - administrators, and when reporting problems. + administrators, and when reporting problems. Note: The default is + |PCMK_CONTAINER_LOG_FILE| (inside the container) for bundled container + nodes; this would typically be mapped to a different path on the host + running the container. * - .. _pcmk_logfile_mode: .. index:: - pair:: node option; PCMK_logfile_mode + pair: node option; PCMK_logfile_mode PCMK_logfile_mode - :ref:`text <text>` @@ -97,7 +294,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_debug: .. index:: - pair:: node option; PCMK_debug + pair: node option; PCMK_debug PCMK_debug - :ref:`enumeration <enumeration>` @@ -119,7 +316,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_stderr: .. index:: - pair:: node option; PCMK_stderr + pair: node option; PCMK_stderr PCMK_stderr - :ref:`boolean <boolean>` @@ -135,7 +332,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_trace_functions: .. index:: - pair:: node option; PCMK_trace_functions + pair: node option; PCMK_trace_functions PCMK_trace_functions - :ref:`text <text>` @@ -149,7 +346,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_trace_files: .. index:: - pair:: node option; PCMK_trace_files + pair: node option; PCMK_trace_files PCMK_trace_files - :ref:`text <text>` @@ -162,7 +359,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_trace_formats: .. index:: - pair:: node option; PCMK_trace_formats + pair: node option; PCMK_trace_formats PCMK_trace_formats - :ref:`text <text>` @@ -176,7 +373,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_trace_tags: .. index:: - pair:: node option; PCMK_trace_tags + pair: node option; PCMK_trace_tags PCMK_trace_tags - :ref:`text <text>` @@ -189,7 +386,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_blackbox: .. index:: - pair:: node option; PCMK_blackbox + pair: node option; PCMK_blackbox PCMK_blackbox - :ref:`enumeration <enumeration>` @@ -214,7 +411,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_trace_blackbox: .. index:: - pair:: node option; PCMK_trace_blackbox + pair: node option; PCMK_trace_blackbox PCMK_trace_blackbox - :ref:`enumeration <enumeration>` @@ -228,7 +425,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_node_start_state: .. index:: - pair:: node option; PCMK_node_start_state + pair: node option; PCMK_node_start_state PCMK_node_start_state - :ref:`enumeration <enumeration>` @@ -241,19 +438,19 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_node_action_limit: .. index:: - pair:: node option; PCMK_node_action_limit + pair: node option; PCMK_node_action_limit PCMK_node_action_limit - :ref:`nonnegative integer <nonnegative_integer>` - - Specify the maximum number of jobs that can be scheduled on this node. If - set, this overrides the ``node-action-limit`` cluster property for this - node. + set, this overrides the :ref:`node-action-limit <node_action_limit>` + cluster option on this node. * - .. _pcmk_shutdown_delay: .. index:: - pair:: node option; PCMK_shutdown_delay + pair: node option; PCMK_shutdown_delay PCMK_shutdown_delay - :ref:`timeout <timeout>` @@ -264,7 +461,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_fail_fast: .. index:: - pair:: node option; PCMK_fail_fast + pair: node option; PCMK_fail_fast PCMK_fail_fast - :ref:`boolean <boolean>` @@ -276,7 +473,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_panic_action: .. index:: - pair:: node option; PCMK_panic_action + pair: node option; PCMK_panic_action PCMK_panic_action - :ref:`enumeration <enumeration>` @@ -292,7 +489,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_authkey_location: .. index:: - pair:: node option; PCMK_authkey_location + pair: node option; PCMK_authkey_location PCMK_authkey_location - :ref:`text <text>` @@ -306,7 +503,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_remote_address: .. index:: - pair:: node option; PCMK_remote_address + pair: node option; PCMK_remote_address PCMK_remote_address - :ref:`text <text>` @@ -323,7 +520,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_remote_port: .. index:: - pair:: node option; PCMK_remote_port + pair: node option; PCMK_remote_port PCMK_remote_port - :ref:`port <port>` @@ -334,7 +531,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_remote_pid1: .. index:: - pair:: node option; PCMK_remote_pid1 + pair: node option; PCMK_remote_pid1 PCMK_remote_pid1 - :ref:`enumeration <enumeration>` @@ -362,7 +559,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_tls_priorities: .. index:: - pair:: node option; PCMK_tls_priorities + pair: node option; PCMK_tls_priorities PCMK_tls_priorities - :ref:`text <text>` @@ -383,7 +580,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_dh_min_bits: .. index:: - pair:: node option; PCMK_dh_min_bits + pair: node option; PCMK_dh_min_bits PCMK_dh_min_bits - :ref:`nonnegative integer <nonnegative_integer>` @@ -407,7 +604,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_dh_max_bits: .. index:: - pair:: node option; PCMK_dh_max_bits + pair: node option; PCMK_dh_max_bits PCMK_dh_max_bits - :ref:`nonnegative integer <nonnegative_integer>` @@ -428,7 +625,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_ipc_type: .. index:: - pair:: node option; PCMK_ipc_type + pair: node option; PCMK_ipc_type PCMK_ipc_type - :ref:`enumeration <enumeration>` @@ -443,7 +640,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_ipc_buffer: .. index:: - pair:: node option; PCMK_ipc_buffer + pair: node option; PCMK_ipc_buffer PCMK_ipc_buffer - :ref:`nonnegative integer <nonnegative_integer>` @@ -456,7 +653,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_cluster_type: .. index:: - pair:: node option; PCMK_cluster_type + pair: node option; PCMK_cluster_type PCMK_cluster_type - :ref:`enumeration <enumeration>` @@ -470,7 +667,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_schema_directory: .. index:: - pair:: node option; PCMK_schema_directory + pair: node option; PCMK_schema_directory PCMK_schema_directory - :ref:`text <text>` @@ -478,10 +675,22 @@ environment variables when Pacemaker daemons start up. - *Advanced Use Only:* Specify an alternate location for RNG schemas and XSL transforms. + * - .. _pcmk_remote_schema_directory: + + .. index:: + pair: node option; PCMK_remote_schema_directory + + PCMK_remote_schema_directory + - :ref:`text <text>` + - |PCMK__REMOTE_SCHEMA_DIR| + - *Advanced Use Only:* Specify an alternate location on Pacemaker Remote + nodes for storing newer RNG schemas and XSL transforms fetched from + the cluster. + * - .. _pcmk_valgrind_enabled: .. index:: - pair:: node option; PCMK_valgrind_enabled + pair: node option; PCMK_valgrind_enabled PCMK_valgrind_enabled - :ref:`enumeration <enumeration>` @@ -492,7 +701,7 @@ environment variables when Pacemaker daemons start up. * - .. _pcmk_callgrind_enabled: .. index:: - pair:: node option; PCMK_callgrind_enabled + pair: node option; PCMK_callgrind_enabled PCMK_callgrind_enabled - :ref:`enumeration <enumeration>` @@ -501,10 +710,36 @@ environment variables when Pacemaker daemons start up. ``valgrind`` with the ``callgrind`` tool enabled. Allowed values are the same as for ``PCMK_debug``. + * - .. _sbd_sync_resource_startup: + + .. index:: + pair:: node option; SBD_SYNC_RESOURCE_STARTUP + + SBD_SYNC_RESOURCE_STARTUP + - :ref:`boolean <boolean>` + - + - If true, ``pacemakerd`` waits for a ping from ``sbd`` during startup + before starting other Pacemaker daemons, and during shutdown after + stopping other Pacemaker daemons but before exiting. Default value is set + based on the ``--with-sbd-sync-default`` configure script option. + + * - .. _sbd_watchdog_timeout: + + .. index:: + pair:: node option; SBD_WATCHDOG_TIMEOUT + + SBD_WATCHDOG_TIMEOUT + - :ref:`duration <duration>` + - + - If the ``stonith-watchdog-timeout`` cluster property is set to a negative + or invalid value, use double this value as the default if positive, or + use 0 as the default otherwise. This value must be greater than the value + of ``stonith-watchdog-timeout`` if both are set. + * - .. _valgrind_opts: .. index:: - pair:: node option; VALGRIND_OPTS + pair: node option; VALGRIND_OPTS VALGRIND_OPTS - :ref:`text <text>` diff --git a/doc/sphinx/Pacemaker_Explained/nodes.rst b/doc/sphinx/Pacemaker_Explained/nodes.rst index 378b067..b700010 100644 --- a/doc/sphinx/Pacemaker_Explained/nodes.rst +++ b/doc/sphinx/Pacemaker_Explained/nodes.rst @@ -105,6 +105,44 @@ To read back the value that was just set: The ``--type nodes`` indicates that this is a permanent node attribute; ``--type status`` would indicate a transient node attribute. +.. warning:: + + Attribute values with newline or tab characters are currently displayed with + newlines as ``"\n"`` and tabs as ``"\t"``, when ``crm_attribute`` or + ``attrd_updater`` query commands use ``--output-as=text`` or leave + ``--output-as`` unspecified: + + .. code-block:: none + + # crm_attribute -N node1 -n test_attr -v "$(echo -e "a\nb\tc")" -t status + # crm_attribute -N node1 -n test_attr --query -t status + scope=status name=test_attr value=a\nb\tc + + This format is deprecated. In a future release, the values will be displayed + with literal whitespace characters: + + .. code-block:: none + + # crm_attribute -N node1 -n test_attr --query -t status + scope=status name=test_attr value=a + b c + + Users should either avoid attribute values with newlines and tabs, or ensure + that they can handle both formats. + + However, it's best to use ``--output-as=xml`` when parsing attribute values + from output. Newlines, tabs, and special characters are replaced with XML + character references that a conforming XML processor can recognize and + convert to literals *(since 2.1.8)*: + + .. code-block:: none + + # crm_attribute -N node1 -n test_attr --query -t status --output-as=xml + <pacemaker-result api-version="2.35" request="crm_attribute -N laptop -n test_attr --query -t status --output-as=xml"> + <attribute name="test_attr" value="a b	c" scope="status"/> + <status code="0" message="OK"/> + </pacemaker-result> + .. _special_node_attributes: diff --git a/doc/sphinx/Pacemaker_Explained/operations.rst b/doc/sphinx/Pacemaker_Explained/operations.rst index b1ad65d..c831f81 100644 --- a/doc/sphinx/Pacemaker_Explained/operations.rst +++ b/doc/sphinx/Pacemaker_Explained/operations.rst @@ -38,113 +38,160 @@ two operations for the same resource with the same name and interval. Operation Properties #################### -Operation properties may be specified directly in the ``op`` element as -XML attributes, or in a separate ``meta_attributes`` block as ``nvpair`` elements. -XML attributes take precedence over ``nvpair`` elements if both are specified. +The ``id``, ``name``, ``interval``, and ``role`` operation properties may be +specified only as XML attributes of the ``op`` element. Other operation +properties may be specified in any of the following ways, from highest +precedence to lowest: -.. table:: **Properties of an Operation** +* directly in the ``op`` element as an XML attribute +* in an ``nvpair`` element within a ``meta_attributes`` element within the + ``op`` element +* in an ``nvpair`` element within a ``meta_attributes`` element within + :ref:`operation defaults <s-operation-defaults>` + +If not specified, the default from the table below is used. + +.. list-table:: **Operation Properties** :class: longtable - :widths: 1 2 3 - - +----------------+-----------------------------------+-----------------------------------------------------+ - | Field | Default | Description | - +================+===================================+=====================================================+ - | id | | .. index:: | - | | | single: id; action property | - | | | single: action; property, id | - | | | | - | | | A unique name for the operation. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | name | | .. index:: | - | | | single: name; action property | - | | | single: action; property, name | - | | | | - | | | The action to perform. This can be any action | - | | | supported by the agent; common values include | - | | | ``monitor``, ``start``, and ``stop``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | interval | 0 | .. index:: | - | | | single: interval; action property | - | | | single: action; property, interval | - | | | | - | | | How frequently (in seconds) to perform the | - | | | operation. A value of 0 means "when needed". | - | | | A positive value defines a *recurring action*, | - | | | which is typically used with | - | | | :ref:`monitor <s-resource-monitoring>`. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | timeout | | .. index:: | - | | | single: timeout; action property | - | | | single: action; property, timeout | - | | | | - | | | How long to wait before declaring the action | - | | | has failed | - +----------------+-----------------------------------+-----------------------------------------------------+ - | on-fail | Varies by action: | .. index:: | - | | | single: on-fail; action property | - | | * ``stop``: ``fence`` if | single: action; property, on-fail | - | | ``stonith-enabled`` is true | | - | | or ``block`` otherwise | The action to take if this action ever fails. | - | | * ``demote``: ``on-fail`` of the | Allowed values: | - | | ``monitor`` action with | | - | | ``role`` set to ``Promoted``, | * ``ignore:`` Pretend the resource did not fail. | - | | if present, enabled, and | * ``block:`` Don't perform any further operations | - | | configured to a value other | on the resource. | - | | than ``demote``, or ``restart`` | * ``stop:`` Stop the resource and do not start | - | | otherwise | it elsewhere. | - | | * all other actions: ``restart`` | * ``demote:`` Demote the resource, without a | - | | | full restart. This is valid only for ``promote`` | - | | | actions, and for ``monitor`` actions with both | - | | | a nonzero ``interval`` and ``role`` set to | - | | | ``Promoted``; for any other action, a | - | | | configuration error will be logged, and the | - | | | default behavior will be used. *(since 2.0.5)* | - | | | * ``restart:`` Stop the resource and start it | - | | | again (possibly on a different node). | - | | | * ``fence:`` STONITH the node on which the | - | | | resource failed. | - | | | * ``standby:`` Move *all* resources away from the | - | | | node on which the resource failed. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | enabled | TRUE | .. _op_enabled: | - | | | | - | | | .. index:: | - | | | single: enabled; action property | - | | | single: action; property, enabled | - | | | | - | | | If ``false``, ignore this operation definition. | - | | | This does not suppress all actions of this type, | - | | | but is typically used to pause a recurring monitor. | - | | | This can complement the resource being unmanaged | - | | | (:ref:`is-managed <is_managed>` set to ``false``), | - | | | which does not stop recurring operations. | - | | | Maintenance mode, which does stop configured this | - | | | monitors, overrides this setting. Allowed values: | - | | | ``true``, ``false``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | record-pending | TRUE | .. index:: | - | | | single: record-pending; action property | - | | | single: action; property, record-pending | - | | | | - | | | If ``true``, the intention to perform the operation | - | | | is recorded so that GUIs and CLI tools can indicate | - | | | that an operation is in progress. This is best set | - | | | as an *operation default* | - | | | (see :ref:`s-operation-defaults`). Allowed values: | - | | | ``true``, ``false``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | role | | .. index:: | - | | | single: role; action property | - | | | single: action; property, role | - | | | | - | | | Run the operation only on node(s) that the cluster | - | | | thinks should be in the specified role. This only | - | | | makes sense for recurring ``monitor`` operations. | - | | | Allowed (case-sensitive) values: ``Stopped``, | - | | | ``Started``, and in the case of :ref:`promotable | - | | | clone resources <s-resource-promotable>`, | - | | | ``Unpromoted`` and ``Promoted``. | - +----------------+-----------------------------------+-----------------------------------------------------+ + :widths: 2 2 3 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _op_id: + + .. index:: + pair: op; id + single: id; action property + single: action; property, id + + id + - :ref:`id <id>` + - + - A unique identifier for the XML element *(required)* + * - .. _op_name: + + .. index:: + pair: op; name + single: name; action property + single: action; property, name + + name + - :ref:`text <text>` + - + - An action name supported by the resource agent *(required)* + * - .. _op_interval: + + .. index:: + pair: op; interval + single: interval; action property + single: action; property, interval + + interval + - :ref:`duration <duration>` + - 0 + - If this is a positive value, Pacemaker will schedule recurring instances + of this operation at the given interval (which makes sense only with + :ref:`name <op_name>` set to :ref:`monitor <s-resource-monitoring>`). If + this is 0, Pacemaker will apply other properties configured for this + operation to instances that are scheduled as needed during normal + cluster operation. *(required)* + * - .. _op_role: + + .. index:: + pair: op; role + single: role; action property + single: action; property, role + + role + - :ref:`enumeration <enumeration>` + - + - If this is set, the operation configuration applies only on nodes where + the cluster expects the resource to be in the specified role. This makes + sense only for recurring monitors. Allowed values: ``Started``, + ``Stopped``, and in the case of :ref:`promotable clone resources + <s-resource-promotable>`, ``Unpromoted`` and ``Promoted``. + * - .. _op_timeout: + + .. index:: + pair: op; timeout + single: timeout; action property + single: action; property, timeout + + timeout + - :ref:`timeout <timeout>` + - 20s + - If resource agent execution does not complete within this amount of + time, the action will be considered failed. **Note:** timeouts for + fencing agents are handled specially (see the :ref:`fencing` chapter). + * - .. _op_on_fail: + + .. index:: + pair: op; on-fail + single: on-fail; action property + single: action; property, on-fail + + on-fail + - :ref:`enumeration <enumeration>` + - * If ``name`` is ``stop``: ``fence`` if + :ref:`stonith-enabled <stonith_enabled>` is true, otherwise ``block`` + * If ``name`` is ``demote``: ``on-fail`` of the ``monitor`` action with + ``role`` set to ``Promoted``, if present, enabled, and configured to a + value other than ``demote``, or ``restart`` otherwise + * Otherwise: ``restart`` + - How the cluster should respond to a failure of this action. Allowed + values: + + * ``ignore:`` Pretend the resource did not fail + * ``block:`` Do not perform any further operations on the resource + * ``stop:`` Stop the resource and leave it stopped + * ``demote:`` Demote the resource, without a full restart. This is valid + only for ``promote`` actions, and for ``monitor`` actions with both a + nonzero ``interval`` and ``role`` set to ``Promoted``; for any other + action, a configuration error will be logged, and the default behavior + will be used. *(since 2.0.5)* + * ``restart:`` Stop the resource, and start it again if allowed + (possibly on a different node) + * ``fence:`` Fence the node on which the resource failed + * ``standby:`` Put the node on which the resource failed in standby mode + (forcing *all* resources away) + * - .. _op_enabled: + + .. index:: + pair: op; enabled + single: enabled; action property + single: action; property, enabled + + enabled + - :ref:`boolean <boolean>` + - true + - If ``false``, ignore this operation definition. This does not suppress + all actions of this type, but is typically used to pause a recurring + monitor. This can complement the resource being unmanaged + (:ref:`is-managed <is_managed>` set to ``false``), which does not stop + recurring operations. Maintenance mode, which does stop configured + monitors, overrides this setting. + * - .. _op_record_pending: + + .. index:: + pair: op; record-pending + single: record-pending; action property + single: action; property, record-pending + + record-pending + - :ref:`boolean <boolean>` + - true + - Operation results are always recorded when the operation completes + (successful or not). If this is ``true``, operations will also be + recorded when initiated, so that status output can indicate that the + operation is in progress. + +.. note:: + + Only one action can be configured for any given combination of ``name`` and + ``interval``. .. note:: diff --git a/doc/sphinx/Pacemaker_Explained/resources.rst b/doc/sphinx/Pacemaker_Explained/resources.rst index a971c44..99bd84f 100644 --- a/doc/sphinx/Pacemaker_Explained/resources.rst +++ b/doc/sphinx/Pacemaker_Explained/resources.rst @@ -339,193 +339,291 @@ Meta-attributes are used by the cluster to decide how a resource should behave and can be easily set using the ``--meta`` option of the **crm_resource** command. -.. table:: **Meta-attributes of a Primitive Resource** +.. list-table:: **Meta-attributes of a Primitive Resource** :class: longtable - :widths: 2 2 3 - - +----------------------------+----------------------------------+------------------------------------------------------+ - | Field | Default | Description | - +============================+==================================+======================================================+ - | priority | 0 | .. index:: | - | | | single: priority; resource option | - | | | single: resource; option, priority | - | | | | - | | | If not all resources can be active, the cluster | - | | | will stop lower priority resources in order to | - | | | keep higher priority ones active. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | critical | true | .. index:: | - | | | single: critical; resource option | - | | | single: resource; option, critical | - | | | | - | | | Use this value as the default for ``influence`` in | - | | | all :ref:`colocation constraints | - | | | <s-resource-colocation>` involving this resource, | - | | | as well as the implicit colocation constraints | - | | | created if this resource is in a | - | | | :ref:`group <group-resources>`. For details, see | - | | | :ref:`s-coloc-influence`. *(since 2.1.0)* | - +----------------------------+----------------------------------+------------------------------------------------------+ - | target-role | Started | .. index:: | - | | | single: target-role; resource option | - | | | single: resource; option, target-role | - | | | | - | | | What state should the cluster attempt to keep this | - | | | resource in? Allowed values: | - | | | | - | | | * ``Stopped:`` Force the resource to be stopped | - | | | * ``Started:`` Allow the resource to be started | - | | | (and in the case of | - | | | :ref:`promotable <s-resource-promotable>` clone | - | | | resources, promoted if appropriate) | - | | | * ``Unpromoted:`` Allow the resource to be started, | - | | | but only in the unpromoted role if the resource is | - | | | :ref:`promotable <s-resource-promotable>` | - | | | * ``Promoted:`` Equivalent to ``Started`` | - +----------------------------+----------------------------------+------------------------------------------------------+ - | is-managed | TRUE | .. _is_managed: | - | | | | - | | | .. index:: | - | | | single: is-managed; resource option | - | | | single: resource; option, is-managed | - | | | | - | | | If false, the cluster will not start or stop the | - | | | resource on any node. Recurring actions for the | - | | | resource are unaffected. Maintenance mode overrides | - | | | this setting. Allowed values: ``true``, ``false`` | - +----------------------------+----------------------------------+------------------------------------------------------+ - | maintenance | FALSE | .. _rsc_maintenance: | - | | | | - | | | .. index:: | - | | | single: maintenance; resource option | - | | | single: resource; option, maintenance | - | | | | - | | | If true, the cluster will not start or stop the | - | | | resource on any node, and will pause any recurring | - | | | monitors (except those specifying ``role`` as | - | | | ``Stopped``). If true, the | - | | | :ref:`maintenance-mode <maintenance_mode>` cluster | - | | | option or :ref:`maintenance <node_maintenance>` | - | | | node attribute override this. Allowed values: | - | | | ``true``, ``false`` | - +----------------------------+----------------------------------+------------------------------------------------------+ - | resource-stickiness | 1 for individual clone | .. _resource-stickiness: | - | | instances, 0 for all | | - | | other resources | .. index:: | - | | | single: resource-stickiness; resource option | - | | | single: resource; option, resource-stickiness | - | | | | - | | | A score that will be added to the current node when | - | | | a resource is already active. This allows running | - | | | resources to stay where they are, even if they | - | | | would be placed elsewhere if they were being | - | | | started from a stopped state. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | requires | ``quorum`` for resources | .. _requires: | - | | with a ``class`` of ``stonith``, | | - | | otherwise ``unfencing`` if | .. index:: | - | | unfencing is active in the | single: requires; resource option | - | | cluster, otherwise ``fencing`` | single: resource; option, requires | - | | if ``stonith-enabled`` is true, | | - | | otherwise ``quorum`` | Conditions under which the resource can be | - | | | started. Allowed values: | - | | | | - | | | * ``nothing:`` can always be started | - | | | * ``quorum:`` The cluster can only start this | - | | | resource if a majority of the configured nodes | - | | | are active | - | | | * ``fencing:`` The cluster can only start this | - | | | resource if a majority of the configured nodes | - | | | are active *and* any failed or unknown nodes | - | | | have been :ref:`fenced <fencing>` | - | | | * ``unfencing:`` The cluster can only start this | - | | | resource if a majority of the configured nodes | - | | | are active *and* any failed or unknown nodes have | - | | | been fenced *and* only on nodes that have been | - | | | :ref:`unfenced <unfencing>` | - +----------------------------+----------------------------------+------------------------------------------------------+ - | migration-threshold | INFINITY | .. index:: | - | | | single: migration-threshold; resource option | - | | | single: resource; option, migration-threshold | - | | | | - | | | How many failures may occur for this resource on | - | | | a node, before this node is marked ineligible to | - | | | host this resource. A value of 0 indicates that this | - | | | feature is disabled (the node will never be marked | - | | | ineligible); by constrast, the cluster treats | - | | | INFINITY (the default) as a very large but finite | - | | | number. This option has an effect only if the | - | | | failed operation specifies ``on-fail`` as | - | | | ``restart`` (the default), and additionally for | - | | | failed ``start`` operations, if the cluster | - | | | property ``start-failure-is-fatal`` is ``false``. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | failure-timeout | 0 | .. index:: | - | | | single: failure-timeout; resource option | - | | | single: resource; option, failure-timeout | - | | | | - | | | How many seconds to wait before acting as if the | - | | | failure had not occurred, and potentially allowing | - | | | the resource back to the node on which it failed. | - | | | A value of 0 indicates that this feature is | - | | | disabled. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | multiple-active | stop_start | .. index:: | - | | | single: multiple-active; resource option | - | | | single: resource; option, multiple-active | - | | | | - | | | What should the cluster do if it ever finds the | - | | | resource active on more than one node? Allowed | - | | | values: | - | | | | - | | | * ``block``: mark the resource as unmanaged | - | | | * ``stop_only``: stop all active instances and | - | | | leave them that way | - | | | * ``stop_start``: stop all active instances and | - | | | start the resource in one location only | - | | | * ``stop_unexpected``: stop all active instances | - | | | except where the resource should be active (this | - | | | should be used only when extra instances are not | - | | | expected to disrupt existing instances, and the | - | | | resource agent's monitor of an existing instance | - | | | is capable of detecting any problems that could be | - | | | caused; note that any resources ordered after this | - | | | will still need to be restarted) *(since 2.1.3)* | - +----------------------------+----------------------------------+------------------------------------------------------+ - | allow-migrate | TRUE for ocf:pacemaker:remote | Whether the cluster should try to "live migrate" | - | | resources, FALSE otherwise | this resource when it needs to be moved (see | - | | | :ref:`live-migration`) | - +----------------------------+----------------------------------+------------------------------------------------------+ - | allow-unhealthy-nodes | FALSE | Whether the resource should be able to run on a node | - | | | even if the node's health score would otherwise | - | | | prevent it (see :ref:`node-health`) *(since 2.1.3)* | - +----------------------------+----------------------------------+------------------------------------------------------+ - | container-attribute-target | | Specific to bundle resources; see | - | | | :ref:`s-bundle-attributes` | - +----------------------------+----------------------------------+------------------------------------------------------+ - | remote-node | | The name of the Pacemaker Remote guest node this | - | | | resource is associated with, if any. If | - | | | specified, this both enables the resource as a | - | | | guest node and defines the unique name used to | - | | | identify the guest node. The guest must be | - | | | configured to run the Pacemaker Remote daemon | - | | | when it is started. **WARNING:** This value | - | | | cannot overlap with any resource or node IDs. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | remote-port | 3121 | If ``remote-node`` is specified, the port on the | - | | | guest used for its Pacemaker Remote connection. | - | | | The Pacemaker Remote daemon on the guest must | - | | | be configured to listen on this port. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | remote-addr | value of ``remote-node`` | If ``remote-node`` is specified, the IP | - | | | address or hostname used to connect to the | - | | | guest via Pacemaker Remote. The Pacemaker Remote | - | | | daemon on the guest must be configured to accept | - | | | connections on this address. | - +----------------------------+----------------------------------+------------------------------------------------------+ - | remote-connect-timeout | 60s | If ``remote-node`` is specified, how long before | - | | | a pending guest connection will time out. | - +----------------------------+----------------------------------+------------------------------------------------------+ + :widths: 2 2 3 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + + * - .. _meta_priority: + + .. index:: + single: priority; resource option + single: resource; option, priority + + priority + - :ref:`score <score>` + - 0 + - If not all resources can be active, the cluster will stop lower-priority + resources in order to keep higher-priority ones active. + + * - .. _meta_critical: + + .. index:: + single: critical; resource option + single: resource; option, critical + + critical + - :ref:`boolean <boolean>` + - true + - Use this value as the default for ``influence`` in all + :ref:`colocation constraints <s-resource-colocation>` involving this + resource, as well as in the implicit colocation constraints created if + this resource is in a :ref:`group <group-resources>`. For details, see + :ref:`s-coloc-influence`. *(since 2.1.0)* + + * - .. _meta_target_role: + + .. index:: + single: target-role; resource option + single: resource; option, target-role + + target-role + - :ref:`enumeration <enumeration>` + - Started + - What state should the cluster attempt to keep this resource in? Allowed + values: + + * ``Stopped:`` Force the resource to be stopped + * ``Started:`` Allow the resource to be started (and in the case of + :ref:`promotable <s-resource-promotable>` clone resources, promoted if + appropriate) + * ``Unpromoted:`` Allow the resource to be started, but only in the + unpromoted role if the resource is + :ref:`promotable <s-resource-promotable>` + * ``Promoted:`` Equivalent to ``Started`` + + * - .. _meta_is_managed: + .. _is_managed: + + .. index:: + single: is-managed; resource option + single: resource; option, is-managed + + is-managed + - :ref:`boolean <boolean>` + - true + - If false, the cluster will not start, stop, promote, or demote the + resource on any node. Recurring actions for the resource are + unaffected. Maintenance mode overrides this setting. + + * - .. _meta_maintenance: + .. _rsc_maintenance: + + .. index:: + single: maintenance; resource option + single: resource; option, maintenance + + maintenance + - :ref:`boolean <boolean>` + - false + - If true, the cluster will not start, stop, promote, or demote the + resource on any node, and will pause any recurring monitors (except those + specifying ``role`` as ``Stopped``). If true, the + :ref:`maintenance-mode <maintenance_mode>` cluster option or + :ref:`maintenance <node_maintenance>` node attribute overrides this. + + * - .. _meta_resource_stickiness: + .. _resource-stickiness: + + .. index:: + single: resource-stickiness; resource option + single: resource; option, resource-stickiness + + resource-stickiness + - :ref:`score <score>` + - 1 for individual clone instances, 0 for all other resources + - A score that will be added to the current node when a resource is already + active. This allows running resources to stay where they are, even if + they would be placed elsewhere if they were being started from a stopped + state. + + * - .. _meta_requires: + .. _requires: + + .. index:: + single: requires; resource option + single: resource; option, requires + + requires + - :ref:`enumeration <enumeration>` + - ``quorum`` for resources with a ``class`` of ``stonith``, otherwise + ``unfencing`` if unfencing is active in the cluster, otherwise + ``fencing`` if ``stonith-enabled`` is true, otherwise ``quorum`` + - Conditions under which the resource can be started. Allowed values: + + * ``nothing:`` The cluster can always start this resource. + * ``quorum:`` The cluster can start this resource only if a majority of + the configured nodes are active. + * ``fencing:`` The cluster can start this resource only if a majority of + the configured nodes are active *and* any failed or unknown nodes have + been :ref:`fenced <fencing>`. + * ``unfencing:`` The cluster can only start this resource if a majority + of the configured nodes are active *and* any failed or unknown nodes + have been fenced *and* only on nodes that have been + :ref:`unfenced <unfencing>`. + + * - .. _meta_migration_threshold: + + .. index:: + single: migration-threshold; resource option + single: resource; option, migration-threshold + + migration-threshold + - :ref:`score <score>` + - INFINITY + - How many failures may occur for this resource on a node, before this node + is marked ineligible to host this resource. A value of 0 indicates that + this feature is disabled (the node will never be marked ineligible); by + contrast, the cluster treats ``INFINITY`` (the default) as a very large + but finite number. This option has an effect only if the failed operation + specifies ``on-fail`` as ``restart`` (the default), and additionally for + failed ``start`` operations, if the cluster property + ``start-failure-is-fatal`` is ``false``. + + * - .. _meta_failure_timeout: + + .. index:: + single: failure-timeout; resource option + single: resource; option, failure-timeout + + failure-timeout + - :ref:`duration <duration>` + - 0 + - How many seconds to wait before acting as if the failure had not + occurred, and potentially allowing the resource back to the node on which + it failed. A value of 0 indicates that this feature is disabled. + + * - .. _meta_multiple_active: + + .. index:: + single: multiple-active; resource option + single: resource; option, multiple-active + + multiple-active + - :ref:`enumeration <enumeration>` + - stop_start + - What should the cluster do if it ever finds the resource active on more + than one node? Allowed values: + + * ``block``: mark the resource as unmanaged + * ``stop_only``: stop all active instances and leave them that way + * ``stop_start``: stop all active instances and start the resource in one + location only + * ``stop_unexpected``: stop all active instances except where the + resource should be active (this should be used only when extra + instances are not expected to disrupt existing instances, and the + resource agent's monitor of an existing instance is capable of + detecting any problems that could be caused; note that any resources + ordered after this will still need to be restarted) *(since 2.1.3)* + + * - .. _meta_allow_migrate: + + .. index:: + single: allow-migrate; resource option + single: resource; option, allow-migrate + + allow-migrate + - :ref:`boolean <boolean>` + - true for ``ocf:pacemaker:remote`` resources, false otherwise + - Whether the cluster should try to "live migrate" this resource when it + needs to be moved (see :ref:`live-migration`) + + * - .. _meta_allow_unhealthy_nodes: + + .. index:: + single: allow-unhealthy-nodes; resource option + single: resource; option, allow-unhealthy-nodes + + allow-unhealthy-nodes + - :ref:`boolean <boolean>` + - false + - Whether the resource should be able to run on a node even if the node's + health score would otherwise prevent it (see :ref:`node-health`) *(since + 2.1.3)* + + * - .. _meta_container_attribute_target: + + .. index:: + single: container-attribute-target; resource option + single: resource; option, container-attribute-target + + container-attribute-target + - :ref:`enumeration <enumeration>` + - + - Specific to bundle resources; see :ref:`s-bundle-attributes` + + * - .. _meta_remote_node: + + .. index:: + single: remote-node; resource option + single: resource; option, remote-node + + remote-node + - :ref:`text <text>` + - + - The name of the Pacemaker Remote guest node this resource is associated + with, if any. If specified, this both enables the resource as a guest + node and defines the unique name used to identify the guest node. The + guest must be configured to run the Pacemaker Remote daemon when it is + started. **WARNING:** This value cannot overlap with any resource or node + IDs. + + * - .. _meta_remote_addr: + + .. index:: + single: remote-addr; resource option + single: resource; option, remote-addr + + remote-addr + - :ref:`text <text>` + - value of ``remote-node`` + - If ``remote-node`` is specified, the IP address or hostname used to + connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on + the guest must be configured to accept connections on this address. + + * - .. _meta_remote_port: + + .. index:: + single: remote-port; resource option + single: resource; option, remote-port + + remote-port + - :ref:`port <port>` + - 3121 + - If ``remote-node`` is specified, the port on the guest used for its + Pacemaker Remote connection. The Pacemaker Remote daemon on the guest + must be configured to listen on this port. + + * - .. _meta_remote_connect_timeout: + + .. index:: + single: remote-connect-timeout; resource option + single: resource; option, remote-connect-timeout + + remote-connect-timeout + - :ref:`timeout <timeout>` + - 60s + - If ``remote-node`` is specified, how long before a pending guest + connection will time out. + + * - .. _meta_remote_allow_migrate: + + .. index:: + single: remote-allow-migrate; resource option + single: resource; option, remote-allow-migrate + + remote-allow-migrate + - :ref:`boolean <boolean>` + - true + - If ``remote-node`` is specified, this acts as the ``allow-migrate`` + meta-attribute for the implicit remote connection resource + (``ocf:pacemaker:remote``). + As an example of setting resource options, if you performed the following commands on an LSB Email resource: diff --git a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst index 06c00f0..39f736f 100644 --- a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst +++ b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst @@ -262,10 +262,16 @@ Then instead of duplicating the rule for all your other resources, you can inste .. important:: The cluster will insist that the ``rule`` exists somewhere. Attempting - to add a reference to a non-existing rule will cause a validation - failure, as will attempting to remove a ``rule`` that is referenced + to add a reference to a nonexistent ``id`` will cause a validation failure, + as will attempting to remove a ``rule`` with an ``id`` that is referenced elsewhere. + Some rule syntax is allowed only in + :ref:`certain contexts <rule_conditions>`. Validation cannot ensure that the + referenced rule is allowed in the context of the rule containing ``id-ref``, + so such errors will be caught (and logged) only after the new configuration + is accepted. It is the administrator's reponsibility to check for these. + The same principle applies for ``meta_attributes`` and ``instance_attributes`` as illustrated in the example below: @@ -288,7 +294,7 @@ The same principle applies for ``meta_attributes`` and <op id="health-check" name="monitor" interval="30min"/> </operations> </primitive> - <primitive id="myOtherlRsc" class="ocf" type="Other" provider="me"> + <primitive id="myOtherRsc" class="ocf" type="Other" provider="me"> <instance_attributes id-ref="mySpecialRsc-attrs"/> <meta_attributes id-ref="mySpecialRsc-options"/> <operations id-ref="health-checks"/> diff --git a/doc/sphinx/Pacemaker_Explained/rules.rst b/doc/sphinx/Pacemaker_Explained/rules.rst index e9d85e0..13134da 100644 --- a/doc/sphinx/Pacemaker_Explained/rules.rst +++ b/doc/sphinx/Pacemaker_Explained/rules.rst @@ -6,226 +6,103 @@ Rules ----- -Rules can be used to make your configuration more dynamic, allowing values to -change depending on the time or the value of a node attribute. Examples of -things rules are useful for: +Rules make a configuration more dynamic, allowing values to depend on +conditions such as time of day or the value of a node attribute. For example, +rules can: * Set a higher value for :ref:`resource-stickiness <resource-stickiness>` - during working hours, to minimize downtime, and a lower value on weekends, to + during working hours to minimize downtime, and a lower value on weekends to allow resources to move to their most preferred locations when people aren't - around to notice. + around * Automatically place the cluster into maintenance mode during a scheduled - maintenance window. + maintenance window -* Assign certain nodes and resources to a particular department via custom - node attributes and meta-attributes, and add a single location constraint - that restricts the department's resources to run only on those nodes. - -Each constraint type or property set that supports rules may contain one or more -``rule`` elements specifying conditions under which the constraint or properties -take effect. Examples later in this chapter will make this clearer. +* Restrict a particular department's resources to run on certain nodes, as + determined by custom resource meta-attributes and node attributes .. index:: - pair: XML element; rule - -Rule Properties -############### - -.. table:: **Attributes of a rule Element** - :widths: 1 1 3 - - +-----------------+-------------+-------------------------------------------+ - | Attribute | Default | Description | - +=================+=============+===========================================+ - | id | | .. index:: | - | | | pair: rule; id | - | | | | - | | | A unique name for this element (required) | - +-----------------+-------------+-------------------------------------------+ - | role | ``Started`` | .. index:: | - | | | pair: rule; role | - | | | | - | | | The rule is in effect only when the | - | | | resource is in the specified role. | - | | | Allowed values are ``Started``, | - | | | ``Unpromoted``, and ``Promoted``. A rule | - | | | with a ``role`` of ``Promoted`` cannot | - | | | determine the initial location of a clone | - | | | instance and will only affect which of | - | | | the active instances will be promoted. | - +-----------------+-------------+-------------------------------------------+ - | score | | .. index:: | - | | | pair: rule; score | - | | | | - | | | If this rule is used in a location | - | | | constraint and evaluates to true, apply | - | | | this score to the constraint. Only one of | - | | | ``score`` and ``score-attribute`` may be | - | | | used. | - +-----------------+-------------+-------------------------------------------+ - | score-attribute | | .. index:: | - | | | pair: rule; score-attribute | - | | | | - | | | If this rule is used in a location | - | | | constraint and evaluates to true, use the | - | | | value of this node attribute as the score | - | | | to apply to the constraint. Only one of | - | | | ``score`` and ``score-attribute`` may be | - | | | used. | - +-----------------+-------------+-------------------------------------------+ - | boolean-op | ``and`` | .. index:: | - | | | pair: rule; boolean-op | - | | | | - | | | If this rule contains more than one | - | | | condition, a value of ``and`` specifies | - | | | that the rule evaluates to true only if | - | | | all conditions are true, and a value of | - | | | ``or`` specifies that the rule evaluates | - | | | to true if any condition is true. | - +-----------------+-------------+-------------------------------------------+ - -A ``rule`` element must contain one or more conditions. A condition may be an -``expression`` element, a ``date_expression`` element, or another ``rule`` element. + pair: rule; XML element + pair: rule; options +Rule Options +############ -.. index:: - single: rule; node attribute expression - single: node attribute; rule expression - pair: XML element; expression +Each context that supports rules may contain a single ``rule`` element. -.. _node_attribute_expressions: +.. list-table:: **Attributes of a rule Element** + :class: longtable + :widths: 2 2 2 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + + * - .. _rule_id: + + .. index:: + pair: rule; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _boolean_op: + + .. index:: + pair: rule; boolean-op + + boolean-op + - :ref:`enumeration <enumeration>` + - ``and`` + - How to combine conditions if this rule contains more than one. Allowed + values: + + * ``and``: the rule is satisfied only if all conditions are satisfied + * ``or``: the rule is satisfied if any condition is satisfied + +.. _rule_conditions: -Node Attribute Expressions -########################## +.. index:: + single: rule; conditions + single: rule; contexts -Expressions are rule conditions based on the values of node attributes. +Rule Conditions and Contexts +############################ -.. table:: **Attributes of an expression Element** - :class: longtable - :widths: 1 2 3 - - +--------------+---------------------------------+-------------------------------------------+ - | Attribute | Default | Description | - +==============+=================================+===========================================+ - | id | | .. index:: | - | | | pair: expression; id | - | | | | - | | | A unique name for this element (required) | - +--------------+---------------------------------+-------------------------------------------+ - | attribute | | .. index:: | - | | | pair: expression; attribute | - | | | | - | | | The node attribute to test (required) | - +--------------+---------------------------------+-------------------------------------------+ - | type | The default type for | .. index:: | - | | ``lt``, ``gt``, ``lte``, and | pair: expression; type | - | | ``gte`` operations is ``number``| | - | | if either value contains a | How the node attributes should be | - | | decimal point character, or | compared. Allowed values are ``string``, | - | | ``integer`` otherwise. The | ``integer`` *(since 2.0.5)*, ``number``, | - | | default type for all other | and ``version``. ``integer`` truncates | - | | operations is ``string``. If a | floating-point values if necessary before | - | | numeric parse fails for either | performing a 64-bit integer comparison. | - | | value, then the values are | ``number`` performs a double-precision | - | | compared as type ``string``. | floating-point comparison | - | | | *(32-bit integer before 2.0.5)*. | - +--------------+---------------------------------+-------------------------------------------+ - | operation | | .. index:: | - | | | pair: expression; operation | - | | | | - | | | The comparison to perform (required). | - | | | Allowed values: | - | | | | - | | | * ``lt:`` True if the node attribute value| - | | | is less than the comparison value | - | | | * ``gt:`` True if the node attribute value| - | | | is greater than the comparison value | - | | | * ``lte:`` True if the node attribute | - | | | value is less than or equal to the | - | | | comparison value | - | | | * ``gte:`` True if the node attribute | - | | | value is greater than or equal to the | - | | | comparison value | - | | | * ``eq:`` True if the node attribute value| - | | | is equal to the comparison value | - | | | * ``ne:`` True if the node attribute value| - | | | is not equal to the comparison value | - | | | * ``defined:`` True if the node has the | - | | | named attribute | - | | | * ``not_defined:`` True if the node does | - | | | not have the named attribute | - +--------------+---------------------------------+-------------------------------------------+ - | value | | .. index:: | - | | | pair: expression; value | - | | | | - | | | User-supplied value for comparison | - | | | (required for operations other than | - | | | ``defined`` and ``not_defined``) | - +--------------+---------------------------------+-------------------------------------------+ - | value-source | ``literal`` | .. index:: | - | | | pair: expression; value-source | - | | | | - | | | How the ``value`` is derived. Allowed | - | | | values: | - | | | | - | | | * ``literal``: ``value`` is a literal | - | | | string to compare against | - | | | * ``param``: ``value`` is the name of a | - | | | resource parameter to compare against | - | | | (only valid in location constraints) | - | | | * ``meta``: ``value`` is the name of a | - | | | resource meta-attribute to compare | - | | | against (only valid in location | - | | | constraints) | - +--------------+---------------------------------+-------------------------------------------+ +A ``rule`` element must contain one or more conditions. A condition is any of +the following, which will be described in more detail later: -.. _node-attribute-expressions-special: +* a :ref:`date/time expression <date_expression>` +* a :ref:`node attribute expression <node_attribute_expressions>` +* a :ref:`resource type expression <rsc_expression>` +* an :ref:`operation type expression <op_expression>` +* another ``rule`` (allowing for complex combinations of conditions) -In addition to custom node attributes defined by the administrator, the cluster -defines special, built-in node attributes for each node that can also be used -in rule expressions. +Each type of condition is allowed only in certain contexts. Although any given +context may contain only one ``rule`` element, that element may contain any +number of conditions, including other ``rule`` elements. -.. table:: **Built-in Node Attributes** - :widths: 1 4 +Rules may be used in the following contexts, which also will be described in +more detail later: - +---------------+-----------------------------------------------------------+ - | Name | Value | - +===============+===========================================================+ - | #uname | :ref:`Node name <node_name>` | - +---------------+-----------------------------------------------------------+ - | #id | Node ID | - +---------------+-----------------------------------------------------------+ - | #kind | Node type. Possible values are ``cluster``, ``remote``, | - | | and ``container``. Kind is ``remote`` for Pacemaker Remote| - | | nodes created with the ``ocf:pacemaker:remote`` resource, | - | | and ``container`` for Pacemaker Remote guest nodes and | - | | bundle nodes | - +---------------+-----------------------------------------------------------+ - | #is_dc | ``true`` if this node is the cluster's Designated | - | | Controller (DC), ``false`` otherwise | - +---------------+-----------------------------------------------------------+ - | #cluster-name | The value of the ``cluster-name`` cluster property, if set| - +---------------+-----------------------------------------------------------+ - | #site-name | The value of the ``site-name`` node attribute, if set, | - | | otherwise identical to ``#cluster-name`` | - +---------------+-----------------------------------------------------------+ - | #role | The role the relevant promotable clone resource has on | - | | this node. Valid only within a rule for a location | - | | constraint for a promotable clone resource. | - +---------------+-----------------------------------------------------------+ - -.. Add_to_above_table_if_released: - - +---------------+-----------------------------------------------------------+ - | #ra-version | The installed version of the resource agent on the node, | - | | as defined by the ``version`` attribute of the | - | | ``resource-agent`` tag in the agent's metadata. Valid only| - | | within rules controlling resource options. This can be | - | | useful during rolling upgrades of a backward-incompatible | - | | resource agent. *(since x.x.x)* | +* a :ref:`location constraint <location_rule>` +* a :ref:`cluster_property_set <cluster_options>` element (within the + ``crm_config`` element) +* an :ref:`instance_attributes <option_rule>` element (within an ``alert``, + ``bundle``, ``clone``, ``group``, ``node``, ``op``, ``primitive``, + ``recipient``, or ``template`` element) +* a :ref:`meta_attributes <option_rule>` element (within an ``alert``, + ``bundle``, ``clone``, ``group``, ``op``, ``op_defaults``, ``primitive``, + ``recipient``, ``rsc_defaults``, or ``template`` element) +* a :ref:`utilization <option_rule>` element (within a ``node``, ``primitive``, + or ``template`` element) +.. _date_expression: + .. index:: single: rule; date/time expression pair: XML element; date_expression @@ -233,66 +110,77 @@ in rule expressions. Date/Time Expressions ##################### -Date/time expressions are rule conditions based (as the name suggests) on the -current date and time. +The ``date_expression`` element configures a rule condition based on the +current date and time. It is allowed in rules in any context. -A ``date_expression`` element may optionally contain a ``date_spec`` or -``duration`` element depending on the context. - -.. table:: **Attributes of a date_expression Element** - :widths: 1 4 - - +---------------+-----------------------------------------------------------+ - | Attribute | Description | - +===============+===========================================================+ - | id | .. index:: | - | | pair: id; date_expression | - | | | - | | A unique name for this element (required) | - +---------------+-----------------------------------------------------------+ - | start | .. index:: | - | | pair: start; date_expression | - | | | - | | A date/time conforming to the | - | | `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ | - | | specification. May be used when ``operation`` is | - | | ``in_range`` (in which case at least one of ``start`` or | - | | ``end`` must be specified) or ``gt`` (in which case | - | | ``start`` is required). | - +---------------+-----------------------------------------------------------+ - | end | .. index:: | - | | pair: end; date_expression | - | | | - | | A date/time conforming to the | - | | `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ | - | | specification. May be used when ``operation`` is | - | | ``in_range`` (in which case at least one of ``start`` or | - | | ``end`` must be specified) or ``lt`` (in which case | - | | ``end`` is required). | - +---------------+-----------------------------------------------------------+ - | operation | .. index:: | - | | pair: operation; date_expression | - | | | - | | Compares the current date/time with the start and/or end | - | | date, depending on the context. Allowed values: | - | | | - | | * ``gt:`` True if the current date/time is after ``start``| - | | * ``lt:`` True if the current date/time is before ``end`` | - | | * ``in_range:`` True if the current date/time is after | - | | ``start`` (if specified) and before either ``end`` (if | - | | specified) or ``start`` plus the value of the | - | | ``duration`` element (if one is contained in the | - | | ``date_expression``). If both ``end`` and ``duration`` | - | | are specified, ``duration`` is ignored. | - | | * ``date_spec:`` True if the current date/time matches | - | | the specification given in the contained ``date_spec`` | - | | element (described below) | - +---------------+-----------------------------------------------------------+ - - -.. note:: There is no ``eq``, ``neq``, ``gte``, or ``lte`` operation, since - they would be valid only for a single second. +It may contain a ``date_spec`` or ``duration`` element depending on the +``operation`` as described below. +.. list-table:: **Attributes of a date_expression Element** + :class: longtable + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _date_expression_id: + + .. index:: + pair: date_expression; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _date_expression_start: + + .. index:: + pair: date_expression; start + + start + - :ref:`ISO 8601 <iso8601>` + - + - The beginning of the desired time range. Meaningful with an + ``operation`` of ``in_range`` or ``gt``. + * - .. _date_expression_end: + + .. index:: + pair: date_expression; end + + end + - :ref:`ISO 8601 <iso8601>` + - + - The end of the desired time range. Meaningful with an ``operation`` of + ``in_range`` or ``lt``. + * - .. _date_expression_operation: + + .. index:: + pair: date_expression; operation + + operation + - :ref:`enumeration <enumeration>` + - ``in_range`` + - Specifies how to compare the current date/time against a desired time + range. Allowed values: + + * ``gt:`` The expression is satisfied if the current date/time is after + ``start`` (which is required) + * ``lt:`` The expression is satisfied if the current date/time is before + ``end`` (which is required) + * ``in_range:`` The expression is satisfied if the current date/time is + greater than or equal to ``start`` (if specified) and less than or + equal to either ``end`` (if specified) or ``start`` plus the value of + the :ref:`duration <duration_element>` element (if one is contained in + the ``date_expression``). At least one of ``start`` or ``end`` must be + specified. If both ``end`` and ``duration`` are specified, + ``duration`` is ignored. + * ``date_spec:`` The expression is satisfied if the current date/time + matches the specification given in the contained + :ref:`date_spec <date_spec>` element (which is required) + +.. _date_spec: .. index:: single: date specification @@ -301,87 +189,142 @@ A ``date_expression`` element may optionally contain a ``date_spec`` or Date Specifications ___________________ -A ``date_spec`` element is used to create a cron-like expression relating -to time. Each field can contain a single number or range. Any field not -supplied is ignored. - -.. table:: **Attributes of a date_spec Element** - :widths: 1 3 - - +---------------+-----------------------------------------------------------+ - | Attribute | Description | - +===============+===========================================================+ - | id | .. index:: | - | | pair: id; date_spec | - | | | - | | A unique name for this element (required) | - +---------------+-----------------------------------------------------------+ - | seconds | .. index:: | - | | pair: seconds; date_spec | - | | | - | | Allowed values: 0-59 | - +---------------+-----------------------------------------------------------+ - | minutes | .. index:: | - | | pair: minutes; date_spec | - | | | - | | Allowed values: 0-59 | - +---------------+-----------------------------------------------------------+ - | hours | .. index:: | - | | pair: hours; date_spec | - | | | - | | Allowed values: 0-23 (where 0 is midnight and 23 is | - | | 11 p.m.) | - +---------------+-----------------------------------------------------------+ - | monthdays | .. index:: | - | | pair: monthdays; date_spec | - | | | - | | Allowed values: 1-31 (depending on month and year) | - +---------------+-----------------------------------------------------------+ - | weekdays | .. index:: | - | | pair: weekdays; date_spec | - | | | - | | Allowed values: 1-7 (where 1 is Monday and 7 is Sunday) | - +---------------+-----------------------------------------------------------+ - | yeardays | .. index:: | - | | pair: yeardays; date_spec | - | | | - | | Allowed values: 1-366 (depending on the year) | - +---------------+-----------------------------------------------------------+ - | months | .. index:: | - | | pair: months; date_spec | - | | | - | | Allowed values: 1-12 | - +---------------+-----------------------------------------------------------+ - | weeks | .. index:: | - | | pair: weeks; date_spec | - | | | - | | Allowed values: 1-53 (depending on weekyear) | - +---------------+-----------------------------------------------------------+ - | years | .. index:: | - | | pair: years; date_spec | - | | | - | | Year according to the Gregorian calendar | - +---------------+-----------------------------------------------------------+ - | weekyears | .. index:: | - | | pair: weekyears; date_spec | - | | | - | | Year in which the week started; for example, 1 January | - | | 2005 can be specified in ISO 8601 as "2005-001 Ordinal", | - | | "2005-01-01 Gregorian" or "2004-W53-6 Weekly" and thus | - | | would match ``years="2005"`` or ``weekyears="2004"`` | - +---------------+-----------------------------------------------------------+ - | moon | .. index:: | - | | pair: moon; date_spec | - | | | - | | Allowed values are 0-7 (where 0 is the new moon and 4 is | - | | full moon). *(deprecated since 2.1.6)* | - +---------------+-----------------------------------------------------------+ - -For example, ``monthdays="1"`` matches the first day of every month, and -``hours="09-17"`` matches the hours between 9 a.m. and 5 p.m. (inclusive). - -At this time, multiple ranges (e.g. ``weekdays="1,2"`` or ``weekdays="1-2,5-6"``) -are not supported. +A ``date_spec`` element is used within a ``date_expression`` to specify a +combination of dates and times that satisfy the expression. + +.. list-table:: **Attributes of a date_spec Element** + :class: longtable + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _date_spec_id: + + .. index:: + pair: date_spec; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _date_spec_seconds: + + .. index:: + pair: date_spec; seconds + + seconds + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current time's + second is within this range. Allowed integers: 0 to 59. + * - .. _date_spec_minutes: + + .. index:: + pair: date_spec; minutes + + minutes + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current time's + minute is within this range. Allowed integers: 0 to 59. + * - .. _date_spec_hours: + + .. index:: + pair: date_spec; hours + + hours + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current time's + hour is within this range. Allowed integers: 0 to 23 where 0 is midnight + and 23 is 11 p.m. + * - .. _date_spec_monthdays: + + .. index:: + pair: date_spec; monthdays + + monthdays + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + day of the month is in this range. Allowed integers: 1 to 31. + * - .. _date_spec_weekdays: + + .. index:: + pair: date_spec; weekdays + + weekdays + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + ordinal day of the week is in this range. Allowed integers: 1-7 (where 1 + is Monday and 7 is Sunday). + * - .. _date_spec_yeardays: + + .. index:: + pair: date_spec; yeardays + + yeardays + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + ordinal day of the year is in this range. Allowed integers: 1-366. + * - .. _date_spec_months: + + .. index:: + pair: date_spec; months + + months + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + month is in this range. Allowed integers: 1-12 where 1 is January and 12 + is December. + * - .. _date_spec_weeks: + + .. index:: + pair: date_spec; weeks + + weeks + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + ordinal week of the year is in this range. Allowed integers: 1-53. + * - .. _date_spec_years: + + .. index:: + pair: date_spec; years + + years + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + year according to the Gregorian calendar is in this range. + * - .. _date_spec_weekyears: + + .. index:: + pair: date_spec; weekyears + + weekyears + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + year in which the week started (according to the ISO 8601 standard) is + in this range. + * - .. _date_spec_moon: + + .. index:: + pair: date_spec; moon + + moon + - :ref:`range <range>` + - + - If this is set, the expression is satisfied only if the current date's + phase of the moon is in this range. Allowed values are 0 to 7 where 0 is + the new moon and 4 is the full moon. *(deprecated since 2.1.6)* .. note:: Pacemaker can calculate when evaluation of a ``date_expression`` with an ``operation`` of ``gt``, ``lt``, or ``in_range`` will next change, @@ -400,6 +343,8 @@ are not supported. need to perform first, and the load of the machine. +.. _duration_element: + .. index:: single: duration pair: XML element; duration @@ -407,64 +352,97 @@ are not supported. Durations _________ -A ``duration`` is used to calculate a value for ``end`` when one is not -supplied to ``in_range`` operations. It contains one or more attributes each -containing a single number. Any attribute not supplied is ignored. - -.. table:: **Attributes of a duration Element** - :widths: 1 3 - - +---------------+-----------------------------------------------------------+ - | Attribute | Description | - +===============+===========================================================+ - | id | .. index:: | - | | pair: id; duration | - | | | - | | A unique name for this element (required) | - +---------------+-----------------------------------------------------------+ - | seconds | .. index:: | - | | pair: seconds; duration | - | | | - | | This many seconds will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | minutes | .. index:: | - | | pair: minutes; duration | - | | | - | | This many minutes will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | hours | .. index:: | - | | pair: hours; duration | - | | | - | | This many hours will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | days | .. index:: | - | | pair: days; duration | - | | | - | | This many days will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | weeks | .. index:: | - | | pair: weeks; duration | - | | | - | | This many weeks will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | months | .. index:: | - | | pair: months; duration | - | | | - | | This many months will be added to the total duration | - +---------------+-----------------------------------------------------------+ - | years | .. index:: | - | | pair: years; duration | - | | | - | | This many years will be added to the total duration | - +---------------+-----------------------------------------------------------+ - - -Example Time-Based Expressions -______________________________ - -A small sample of how time-based expressions can be used: - -.. topic:: True if now is any time in the year 2005 +A ``duration`` element is used within a ``date_expression`` to calculate an +ending value for ``in_range`` operations when ``end`` is not supplied. + +.. list-table:: **Attributes of a duration Element** + :class: longtable + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _duration_id: + + .. index:: + pair: duration; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _duration_seconds: + + .. index:: + pair: duration; seconds + + seconds + - :ref:`integer <integer>` + - 0 + - Number of seconds to add to the total duration + * - .. _duration_minutes: + + .. index:: + pair: duration; minutes + + minutes + - :ref:`integer <integer>` + - 0 + - Number of minutes to add to the total duration + * - .. _duration_hours: + + .. index:: + pair: duration; hours + + hours + - :ref:`integer <integer>` + - 0 + - Number of hours to add to the total duration + * - .. _duration_days: + + .. index:: + pair: duration; days + + days + - :ref:`integer <integer>` + - 0 + - Number of days to add to the total duration + * - .. _duration_weeks: + + .. index:: + pair: duration; weeks + + weeks + - :ref:`integer <integer>` + - 0 + - Number of weeks to add to the total duration + * - .. _duration_months: + + .. index:: + pair: duration; months + + months + - :ref:`integer <integer>` + - 0 + - Number of months to add to the total duration + * - .. _duration_years: + + .. index:: + pair: duration; years + + years + - :ref:`integer <integer>` + - 0 + - Number of years to add to the total duration + + +Example Date/Time Expressions +_____________________________ + + +.. topic:: Satisfied if the current year is 2005 .. code-block:: xml @@ -497,7 +475,7 @@ A small sample of how time-based expressions can be used: Note that the ``16`` matches all the way through ``16:59:59``, because the numeric value of the hour still matches. -.. topic:: 9 a.m. to 6 p.m. Monday through Friday or anytime Saturday +.. topic:: 9 a.m. to 6 p.m. Monday through Friday, or anytime Saturday .. code-block:: xml @@ -538,63 +516,227 @@ A small sample of how time-based expressions can be used: </date_expression> <date_expression id="date_expr6-2" operation="in_range" start="2005-03-01" end="2005-04-01"/> + </date_expression> </rule> .. note:: Because no time is specified with the above dates, 00:00:00 is implied. This means that the range includes all of 2005-03-01 but - none of 2005-04-01. You may wish to write ``end`` as - ``"2005-03-31T23:59:59"`` to avoid confusion. + only the first second of 2005-04-01. You may wish to write ``end`` + as ``"2005-03-31T23:59:59"`` to avoid confusion. .. index:: + single: rule; node attribute expression + single: node attribute; rule expression + pair: XML element; expression + +.. _node_attribute_expressions: + +Node Attribute Expressions +########################## + +The ``expression`` element configures a rule condition based on the value of a +node attribute. It is allowed in rules in location constraints and in +``instance_attributes`` elements within ``bundle``, ``clone``, ``group``, +``op``, ``primitive``, and ``template`` elements. + +.. list-table:: **Attributes of an expression Element** + :class: longtable + :widths: 1 1 3 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + + * - .. _expression_id: + + .. index:: + pair: expression; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _expression_attribute: + + .. index:: + pair: expression; attribute + + attribute + - :ref:`text <text>` + - + - Name of the node attribute to test (required) + * - .. _expression_operation: + + .. index:: + pair: expression; operation + + operation + - :ref:`enumeration <enumeration>` + - + - The comparison to perform (required). Allowed values: + + * ``defined:`` The expression is satisfied if the node has the named + attribute + * ``not_defined:`` The expression is satisfied if the node does not have + the named attribute + * ``lt:`` The expression is satisfied if the node attribute value is + less than the reference value + * ``gt:`` The expression is satisfied if the node attribute value is + greater than the reference value + * ``lte:`` The expression is satisfied if the node attribute value is + less than or equal to the reference value + * ``gte:`` The expression is satisfied if the node attribute value is + greater than or equal to the reference value + * ``eq:`` The expression is satisfied if the node attribute value is + equal to the reference value + * ``ne:`` The expression is satisfied if the node attribute value is not + equal to the reference value + * - .. _expression_type: + + .. index:: + pair: expression; type + + type + - :ref:`enumeration <enumeration>` + - The default type for ``lt``, ``gt``, ``lte``, and ``gte`` operations is + ``number`` if either value contains a decimal point character, or + ``integer`` otherwise. The default type for all other operations is + ``string``. If a numeric parse fails for either value, then the values + are compared as type ``string``. + - How to interpret values. Allowed values are ``string``, ``integer`` + *(since 2.0.5)*, ``number``, and ``version``. ``integer`` truncates + floating-point values if necessary before performing a 64-bit integer + comparison. ``number`` performs a double-precision floating-point + comparison *(32-bit integer before 2.0.5)*. + * - .. _expression_value: + + .. index:: + pair: expression; value + + value + - :ref:`text <text>` + - + - Reference value to compare node attribute against (used only with, and + required for, operations other than ``defined`` and ``not_defined``) + * - .. _expression_value_source: + + .. index:: + pair: expression; value-source + + value-source + - :ref:`enumeration <enumeration>` + - ``literal`` + - How the reference value is obtained. Allowed values: + + * ``literal``: ``value`` contains the literal reference value to compare + * ``param``: ``value`` contains the name of a resource parameter to + compare (valid only in the context of a location constraint) + * ``meta``: ``value`` is the name of a resource meta-attribute to + compare (valid only in the context of a location constraint) + +.. _node-attribute-expressions-special: + +In addition to custom node attributes defined by the administrator, the cluster +defines special, built-in node attributes for each node that can also be used +in rule expressions. + +.. list-table:: **Built-in Node Attributes** + :class: longtable + :widths: 1 4 + :header-rows: 1 + + * - Name + - Description + * - #uname + - :ref:`Node name <node_name>` + * - #id + - Node ID + * - #kind + - Node type (``cluster`` for cluster nodes, ``remote`` for Pacemaker + Remote nodes created with the ``ocf:pacemaker:remote`` resource, and + ``container`` for Pacemaker Remote guest nodes and bundle nodes) + * - #is_dc + - ``true`` if this node is the cluster's Designated Controller (DC), + ``false`` otherwise + * - #cluster-name + - The value of the ``cluster-name`` cluster property, if set + * - #site-name + - The value of the ``site-name`` node attribute, if set, otherwise + identical to ``#cluster-name`` + + +.. _rsc_expression: + +.. index:: single: rule; resource expression single: resource; rule expression pair: XML element; rsc_expression -Resource Expressions -#################### - -An ``rsc_expression`` *(since 2.0.5)* is a rule condition based on a resource -agent's properties. This rule is only valid within an ``rsc_defaults`` or -``op_defaults`` context. None of the matching attributes of ``class``, -``provider``, and ``type`` are required. If one is omitted, all values of that -attribute will match. For instance, omitting ``type`` means every type will -match. - -.. table:: **Attributes of a rsc_expression Element** - :widths: 1 3 - - +---------------+-----------------------------------------------------------+ - | Attribute | Description | - +===============+===========================================================+ - | id | .. index:: | - | | pair: id; rsc_expression | - | | | - | | A unique name for this element (required) | - +---------------+-----------------------------------------------------------+ - | class | .. index:: | - | | pair: class; rsc_expression | - | | | - | | The standard name to be matched against resource agents | - +---------------+-----------------------------------------------------------+ - | provider | .. index:: | - | | pair: provider; rsc_expression | - | | | - | | If given, the vendor to be matched against resource | - | | agents (only relevant when ``class`` is ``ocf``) | - +---------------+-----------------------------------------------------------+ - | type | .. index:: | - | | pair: type; rsc_expression | - | | | - | | The name of the resource agent to be matched | - +---------------+-----------------------------------------------------------+ - -Example Resource-Based Expressions -__________________________________ +Resource Type Expressions +######################### + +The ``rsc_expression`` element *(since 2.0.5)* configures a rule condition +based on the agent used for a resource. It is allowed in rules in a +``meta_attributes`` element within a ``rsc_defaults`` or ``op_defaults`` +element. + +.. list-table:: **Attributes of a rsc_expression Element** + :class: longtable + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _rsc_expression_id: + + .. index:: + pair: rsc_expression; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _rsc_expression_class: + + .. index:: + pair: rsc_expression; class + + class + - :ref:`text <text>` + - + - If this is set, the expression is satisfied only if the resource's agent + standard matches this value + * - .. _rsc_expression_provider: + + .. index:: + pair: rsc_expression; provider + + provider + - :ref:`text <text>` + - + - If this is set, the expression is satisfied only if the resource's agent + provider matches this value + * - .. _rsc_expression_type: + + .. index:: + pair: rsc_expression; type -A small sample of how resource-based expressions can be used: + type + - :ref:`text <text>` + - + - If this is set, the expression is satisfied only if the resource's agent + type matches this value -.. topic:: True for all ``ocf:heartbeat:IPaddr2`` resources + +Example Resource Type Expressions +_________________________________ + +.. topic:: Satisfied for ``ocf:heartbeat:IPaddr2`` resources .. code-block:: xml @@ -602,7 +744,7 @@ A small sample of how resource-based expressions can be used: <rsc_expression id="rule_expr1" class="ocf" provider="heartbeat" type="IPaddr2"/> </rule> -.. topic:: Provider doesn't apply to non-OCF resources +.. topic:: Satisfied for ``stonith:fence_xvm`` resources .. code-block:: xml @@ -611,49 +753,64 @@ A small sample of how resource-based expressions can be used: </rule> +.. _op_expression: + .. index:: single: rule; operation expression single: operation; rule expression pair: XML element; op_expression -Operation Expressions -##################### +Operation Type Expressions +########################## + +The ``op_expression`` element *(since 2.0.5)* configures a rule condition based +on a resource operation name and interval. It is allowed in rules in a +``meta_attributes`` element within an ``op_defaults`` element. + +.. list-table:: **Attributes of an op_expression Element** + :class: longtable + :widths: 1 1 1 4 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _op_expression_id: + + .. index:: + pair: op_expression; id + + id + - :ref:`id <id>` + - + - A unique name for this element (required) + * - .. _op_expression_name: + + .. index:: + pair: op_expression; name + + name + - :ref:`text <text>` + - + - The expression is satisfied only if the operation's name matches this + value (required) + * - .. _op_expression_interval: + + .. index:: + pair: op_expression; interval + interval + - :ref:`duration <duration>` + - + - If this is set, the expression is satisfied only if the operation's + interval matches this value -An ``op_expression`` *(since 2.0.5)* is a rule condition based on an action of -some resource agent. This rule is only valid within an ``op_defaults`` context. - -.. table:: **Attributes of an op_expression Element** - :widths: 1 3 - - +---------------+-----------------------------------------------------------+ - | Attribute | Description | - +===============+===========================================================+ - | id | .. index:: | - | | pair: id; op_expression | - | | | - | | A unique name for this element (required) | - +---------------+-----------------------------------------------------------+ - | name | .. index:: | - | | pair: name; op_expression | - | | | - | | The action name to match against. This can be any action | - | | supported by the resource agent; common values include | - | | ``monitor``, ``start``, and ``stop`` (required). | - +---------------+-----------------------------------------------------------+ - | interval | .. index:: | - | | pair: interval; op_expression | - | | | - | | The interval of the action to match against. If not given,| - | | only the name attribute will be used to match. | - +---------------+-----------------------------------------------------------+ - -Example Operation-Based Expressions -___________________________________ - -A small sample of how operation-based expressions can be used: - -.. topic:: True for all monitor actions + +Example Operation Type Expressions +__________________________________ + +.. topic:: Expression is satisfied for all monitor actions .. code-block:: xml @@ -661,7 +818,7 @@ A small sample of how operation-based expressions can be used: <op_expression id="rule_expr1" name="monitor"/> </rule> -.. topic:: True for all monitor actions with a 10 second interval +.. topic:: Expression is satisfied for all monitor actions with a 10-second interval .. code-block:: xml @@ -670,15 +827,68 @@ A small sample of how operation-based expressions can be used: </rule> +.. _location_rule: + .. index:: pair: location constraint; rule Using Rules to Determine Resource Location ########################################## -A location constraint may contain one or more top-level rules. The cluster will -act as if there is a separate location constraint for each rule that evaluates -as true. +If a :ref:`location constraint <location-constraint>` contains a rule, the +cluster will apply the constraint to all nodes where the rule is satisfied. +This acts as if identical location constraints without rules were defined for +each of the nodes. + +In the context of a location constraint, ``rule`` elements may take additional +attributes. These have an effect only when set for the constraint's top-level +``rule``; they are ignored if set on a subrule. + +.. list-table:: **Extra Attributes of a rule Element in a Location Constraint** + :class: longtable + :widths: 2 2 1 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + + * - .. _rule_role: + + .. index:: + pair: rule; role + + role + - :ref:`enumeration <enumeration>` + - ``Started`` + - If this is set in the constraint's top-level rule, the constraint acts + as if ``role`` were set to this in the ``rsc_location`` element. + + * - .. _rule_score: + + .. index:: + pair: rule; score + + score + - :ref:`score <score>` + - + - If this is set in the constraint's top-level rule, the constraint acts + as if ``score`` were set to this in the ``rsc_location`` element. + Only one of ``score`` and ``score-attribute`` may be set. + + * - .. _rule_score_attribute: + + .. index:: + pair: rule; score-attribute + + score-attribute + - :ref:`text <text>` + - + - If this is set in the constraint's top-level rule, the constraint acts + as if ``score`` were set to the value of this node attribute on each + node where the rule is satisfied. Only one of ``score`` and + ``score-attribute`` may be set. Consider the following simple location constraint: @@ -689,7 +899,7 @@ Consider the following simple location constraint: <rsc_location id="ban-apache-on-node3" rsc="webserver" score="-INFINITY" node="node3"/> -The same constraint can be more verbosely written using a rule: +The same constraint can be written more verbosely using a rule: .. topic:: Prevent resource ``webserver`` from running on node ``node3`` using a rule @@ -703,15 +913,14 @@ The same constraint can be more verbosely written using a rule: </rsc_location> The advantage of using the expanded form is that one could add more expressions -(for example, limiting the constraint to certain days of the week), or activate -the constraint by some node attribute other than node name. +(for example, limiting the constraint to certain days of the week). Location Rules Based on Other Node Properties _____________________________________________ -The expanded form allows us to match on node properties other than its name. -If we rated each machine's CPU power such that the cluster had the following -nodes section: +The expanded form allows us to match node attributes other than its name. As an +example, consider this configuration of custom node attributes specifying each +node's CPU capacity: .. topic:: Sample node section with node attributes @@ -730,8 +939,7 @@ nodes section: </node> </nodes> -then we could prevent resources from running on underpowered machines with this -rule: +We can use a rule to prevent a resource from running on underpowered machines: .. topic:: Rule using a node attribute (to be used inside a location constraint) @@ -746,11 +954,13 @@ Using ``score-attribute`` Instead of ``score`` ______________________________________________ When using ``score-attribute`` instead of ``score``, each node matched by the -rule has its score adjusted differently, according to its value for the named -node attribute. Thus, in the previous example, if a rule inside a location -constraint for a resource used ``score-attribute="cpu_mips"``, ``c001n01`` -would have its preference to run the resource increased by ``1234`` whereas -``c001n02`` would have its preference increased by ``5678``. +rule has its score adjusted according to its value for the named node +attribute. + +In the previous example, if the location constraint rule used +``score-attribute="cpu_mips"`` instead of ``score="-INFINITY"``, node +``c001n01`` would have its preference to run the resource increased by 1234 +whereas node ``c001n02`` would have its preference increased by 5678. .. _s-rsc-pattern-rules: @@ -758,16 +968,15 @@ would have its preference to run the resource increased by ``1234`` whereas Specifying location scores using pattern submatches ___________________________________________________ -Location constraints may use ``rsc-pattern`` to apply the constraint to all -resources whose IDs match the given pattern (see :ref:`s-rsc-pattern`). The -pattern may contain up to 9 submatches in parentheses, whose values may be used -as ``%1`` through ``%9`` in a rule's ``score-attribute`` or a rule expression's -``attribute``. +Location constraints may use :ref:`rsc-pattern <s-rsc-pattern>` to apply the +constraint to all resources whose IDs match the given pattern. The pattern may +contain up to 9 submatches in parentheses, whose values may be used as ``%1`` +through ``%9`` in a ``rule`` element's ``score-attribute`` or an ``expression`` +element's ``attribute``. -As an example, the following configuration (only relevant parts are shown) -gives the resources **server-httpd** and **ip-httpd** a preference of 100 on -**node1** and 50 on **node2**, and **ip-gateway** a preference of -100 on -**node1** and 200 on **node2**. +For example, the following configuration excerpt gives the resources +**server-httpd** and **ip-httpd** a preference of 100 on node1 and 50 on node2, +and **ip-gateway** a preference of -100 on node1 and 200 on node2. .. topic:: Location constraint using submatches @@ -807,6 +1016,8 @@ gives the resources **server-httpd** and **ip-httpd** a preference of 100 on </constraints> +.. _option_rule: + .. index:: pair: cluster option; rule pair: instance attribute; rule @@ -820,37 +1031,34 @@ Using Rules to Define Options Rules may be used to control a variety of options: -* :ref:`Cluster options <cluster_options>` (``cluster_property_set`` elements) -* :ref:`Node attributes <node_attributes>` (``instance_attributes`` or +* :ref:`Cluster options <cluster_options>` (as ``cluster_property_set`` + elements) +* :ref:`Node attributes <node_attributes>` (as ``instance_attributes`` or ``utilization`` elements inside a ``node`` element) -* :ref:`Resource options <resource_options>` (``utilization``, +* :ref:`Resource options <resource_options>` (as ``utilization``, ``meta_attributes``, or ``instance_attributes`` elements inside a resource definition element or ``op`` , ``rsc_defaults``, ``op_defaults``, or ``template`` element) -* :ref:`Operation properties <operation_properties>` (``meta_attributes`` +* :ref:`Operation options <operation_properties>` (as ``meta_attributes`` elements inside an ``op`` or ``op_defaults`` element) +* :ref:`Alert options <alerts>` (as ``instance_attributes`` or + ``meta_attributes`` elements inside an ``alert`` or ``recipient`` element) -.. note:: - - Attribute-based expressions for meta-attributes can only be used within - ``operations`` and ``op_defaults``. They will not work with resource - configuration or ``rsc_defaults``. Additionally, attribute-based - expressions cannot be used with cluster options. Using Rules to Control Resource Options _______________________________________ Often some cluster nodes will be different from their peers. Sometimes, -these differences -- e.g. the location of a binary or the names of network -interfaces -- require resources to be configured differently depending +these differences (for example, the location of a binary, or the names of +network interfaces) require resources to be configured differently depending on the machine they're hosted on. -By defining multiple ``instance_attributes`` objects for the resource and +By defining multiple ``instance_attributes`` elements for the resource and adding a rule to each, we can easily handle these special cases. In the example below, ``mySpecialRsc`` will use eth1 and port 9999 when run on -``node1``, eth2 and port 8888 on ``node2`` and default to eth0 and port 9999 -for all other nodes. +node1, eth2 and port 8888 on node2 and default to eth0 and port 9999 for all +other nodes. .. topic:: Defining different resource options based on the node name @@ -878,20 +1086,20 @@ for all other nodes. </instance_attributes> </primitive> -The order in which ``instance_attributes`` objects are evaluated is determined -by their score (highest to lowest). If not supplied, the score defaults to -zero. Objects with an equal score are processed in their listed order. If the -``instance_attributes`` object has no rule, or a ``rule`` that evaluates to -``true``, then for any parameter the resource does not yet have a value for, -the resource will use the parameter values defined by the ``instance_attributes``. +Multiple ``instance_attributes`` elements are evaluated from highest score to +lowest. If not supplied, the score defaults to zero. Objects with equal scores +are processed in their listed order. If an ``instance_attributes`` object has +no rule or a satisfied ``rule``, then for any parameter the resource does not +yet have a value for, the resource will use the value defined by the +``instance_attributes``. For example, given the configuration above, if the resource is placed on ``node1``: * ``special-node1`` has the highest score (3) and so is evaluated first; its - rule evaluates to ``true``, so ``interface`` is set to ``eth1``. -* ``special-node2`` is evaluated next with score 2, but its rule evaluates to - ``false``, so it is ignored. + rule is satisfied, so ``interface`` is set to ``eth1``. +* ``special-node2`` is evaluated next with score 2, but its rule is not + satisfied, so it is ignored. * ``defaults`` is evaluated last with score 1, and has no rule, so its values are examined; ``interface`` is already defined, so the value here is not used, but ``port`` is not yet defined, so ``port`` is set to ``9999``. @@ -899,11 +1107,12 @@ For example, given the configuration above, if the resource is placed on Using Rules to Control Resource Defaults ________________________________________ -Rules can be used for resource and operation defaults. The following example -illustrates how to set a different ``resource-stickiness`` value during and -outside work hours. This allows resources to automatically move back to their -most preferred hosts, but at a time that (in theory) does not interfere with -business activities. +Rules can be used for resource and operation defaults. + +The following example illustrates how to set a different +``resource-stickiness`` value during and outside work hours. This allows +resources to automatically move back to their most preferred hosts, but at a +time that (in theory) does not interfere with business activities. .. topic:: Change ``resource-stickiness`` during working hours @@ -923,20 +1132,8 @@ business activities. </meta_attributes> </rsc_defaults> -Rules may be used similarly in ``instance_attributes`` or ``utilization`` -blocks. - -Any single block may directly contain only a single rule, but that rule may -itself contain any number of rules. - -``rsc_expression`` and ``op_expression`` blocks may additionally be used to -set defaults on either a single resource or across an entire class of resources -with a single rule. ``rsc_expression`` may be used to select resource agents -within both ``rsc_defaults`` and ``op_defaults``, while ``op_expression`` may -only be used within ``op_defaults``. If multiple rules succeed for a given -resource agent, the last one specified will be the one that takes effect. As -with any other rule, boolean operations may be used to make more complicated -expressions. +``rsc_expression`` is valid within both ``rsc_defaults`` and ``op_defaults``; +``op_expression`` is valid only within ``op_defaults``. .. topic:: Default all IPaddr2 resources to stopped diff --git a/doc/sphinx/Pacemaker_Explained/utilization.rst b/doc/sphinx/Pacemaker_Explained/utilization.rst index 87eef60..9f3b640 100644 --- a/doc/sphinx/Pacemaker_Explained/utilization.rst +++ b/doc/sphinx/Pacemaker_Explained/utilization.rst @@ -3,39 +3,34 @@ Utilization and Placement Strategy ---------------------------------- -Pacemaker decides where to place a resource according to the resource -assignment scores on every node. The resource will be assigned to the -node where the resource has the highest score. +Pacemaker decides where a resource should run by assigning a score to every +node, considering factors such as the resource's constraints and stickiness, +then assigning the resource to the node with the highest score. -If the resource assignment scores on all the nodes are equal, by the default -placement strategy, Pacemaker will choose a node with the least number of -assigned resources for balancing the load. If the number of resources on each -node is equal, the first eligible node listed in the CIB will be chosen to run -the resource. +If more than one node has the highest score, Pacemaker by default chooses +the one with the least number of assigned resources, or if that is also the +same, the one listed first in the CIB. This results in simple load balancing. -Often, in real-world situations, different resources use significantly -different proportions of a node's capacities (memory, I/O, etc.). -We cannot balance the load ideally just according to the number of resources -assigned to a node. Besides, if resources are placed such that their combined -requirements exceed the provided capacity, they may fail to start completely or -run with degraded performance. +Sometimes, simple load balancing is insufficient. Different resources can use +significantly different amounts of a node's memory, CPU, and other capacities. +Some combinations of resources may strain a node's capacity, causing them to +fail or have degraded performance. Or, an administrator may prefer to +concentrate resources rather than balance them, to minimize energy consumption +by spare nodes. -To take these factors into account, Pacemaker allows you to configure: - -#. The capacity a certain node provides. - -#. The capacity a certain resource requires. - -#. An overall strategy for placement of resources. +Pacemaker offers flexibility by allowing you to configure *utilization +attributes* specifying capacities that each node provides and each resource +requires, as well as a *placement strategy*. Utilization attributes ###################### -To configure the capacity that a node provides or a resource requires, -you can use *utilization attributes* in ``node`` and ``resource`` objects. -You can name utilization attributes according to your preferences and define as -many name/value pairs as your configuration needs. However, the attributes' -values must be integers. +You can define any number of utilization attributes to represent capacities of +interest (CPU, memory, I/O bandwidth, etc.). Their values must be integers. + +The nature and units of the capacities are irrelevant to Pacemaker. It just +makes sure that each node has sufficient capacity to run the resources assigned +to it. .. topic:: Specifying CPU and RAM capacities of two nodes @@ -77,16 +72,9 @@ values must be integers. </utilization> </primitive> -A node is considered eligible for a resource if it has sufficient free -capacity to satisfy the resource's requirements. The nature of the required -or provided capacities is completely irrelevant to Pacemaker -- it just makes -sure that all capacity requirements of a resource are satisfied before placing -a resource to a node. - -Utilization attributes used on a node object can also be *transient* *(since 2.1.6)*. -These attributes are added to a ``transient_attributes`` section for the node -and are forgotten by the cluster when the node goes offline. The ``attrd_updater`` -tool can be used to set these attributes. +Utilization attributes for a node may be permanent or *(since 2.1.6)* +transient. Permanent attributes persist after Pacemaker is restarted, while +transient attributes do not. .. topic:: Transient utilization attribute for node cluster-1 @@ -98,98 +86,70 @@ tool can be used to set these attributes. </utilization> </transient_attributes> +Utilization attributes may be configured only on primitive resources. Pacemaker +will consider a collective resource's utilization based on the primitives it +contains. + .. note:: Utilization is supported for bundles *(since 2.1.3)*, but only for bundles - with an inner primitive. Any resource utilization values should be specified - for the inner primitive, but any priority meta-attribute should be specified - for the outer bundle. + with an inner primitive. Placement Strategy ################## -After you have configured the capacities your nodes provide and the -capacities your resources require, you need to set the ``placement-strategy`` -in the global cluster options, otherwise the capacity configurations have -*no effect*. +The ``placement-strategy`` cluster option determines how utilization attributes +are used. Its allowed values are: -Four values are available for the ``placement-strategy``: +* ``default``: The cluster ignores utilization values, and places resources + according to (from highest to lowest precedence) assignment scores, the + number of resources already assigned to each node, and the order nodes are + listed in the CIB. -* **default** +* ``utilization``: The cluster uses the same method as the default strategy to + assign a resource to a node, but only nodes with sufficient free capacity to + meet the resource's requirements are eligible. - Utilization values are not taken into account at all. - Resources are assigned according to assignment scores. If scores are equal, - resources are evenly distributed across nodes. +* ``balanced``: Only nodes with sufficient free capacity are eligible to run a + resource, and the cluster load-balances based on the sum of resource + utilization values rather than the number of resources. -* **utilization** +* ``minimal``: Only nodes with sufficient free capacity are eligible to run a + resource, and the cluster concentrates resources on as few nodes as possible. - Utilization values are taken into account *only* when deciding whether a node - is considered eligible (i.e. whether it has sufficient free capacity to satisfy - the resource's requirements). Load-balancing is still done based on the - number of resources assigned to a node. -* **balanced** +To look at it another way, when deciding where to run a resource, the cluster +starts by considering all nodes, then applies these criteria one by one until +a single node remains: - Utilization values are taken into account when deciding whether a node - is eligible to serve a resource *and* when load-balancing, so an attempt is - made to spread the resources in a way that optimizes resource performance. +* If ``placement-strategy`` is ``utilization``, ``balanced``, or ``minimal``, + consider only nodes that have sufficient spare capacities to meet the + resource's requirements. -* **minimal** +* Consider only nodes with the highest score for the resource. Scores take into + account factors such as the node's health; the resource's stickiness, failure + count on the node, and migration threshold; and constraints. - Utilization values are taken into account *only* when deciding whether a node - is eligible to serve a resource. For load-balancing, an attempt is made to - concentrate the resources on as few nodes as possible, thereby enabling - possible power savings on the remaining nodes. - -Set ``placement-strategy`` with ``crm_attribute``: - - .. code-block:: none - - # crm_attribute --name placement-strategy --update balanced - -Now Pacemaker will ensure the load from your resources will be distributed -evenly throughout the cluster, without the need for convoluted sets of -colocation constraints. - -Assignment Details -################## +* If ``placement-strategy`` is ``balanced``, consider only nodes with the most + free capacity. -Which node is preferred to get consumed first when assigning resources? -_______________________________________________________________________ +* If ``placement-strategy`` is ``default``, ``utilization``, or ``balanced``, + consider only nodes with the least number of assigned resources. -* The node with the highest node weight gets consumed first. Node weight - is a score maintained by the cluster to represent node health. +* If more than one node is eligible after considering all other criteria, + choose the one listed first in the CIB. -* If multiple nodes have the same node weight: +How Multiple Capacities Combine +############################### - * If ``placement-strategy`` is ``default`` or ``utilization``, - the node that has the least number of assigned resources gets consumed first. +If only one type of utilization attribute has been defined, free capacity is a +simple numeric comparison. - * If their numbers of assigned resources are equal, - the first eligible node listed in the CIB gets consumed first. +If multiple utilization attributes have been defined, then the node that has +the highest value in the most attribute types has the most free capacity. - * If ``placement-strategy`` is ``balanced``, - the node that has the most free capacity gets consumed first. - - * If the free capacities of the nodes are equal, - the node that has the least number of assigned resources gets consumed first. - - * If their numbers of assigned resources are equal, - the first eligible node listed in the CIB gets consumed first. - - * If ``placement-strategy`` is ``minimal``, - the first eligible node listed in the CIB gets consumed first. - -Which node has more free capacity? -__________________________________ - -If only one type of utilization attribute has been defined, free capacity -is a simple numeric comparison. - -If multiple types of utilization attributes have been defined, then -the node that is numerically highest in the the most attribute types -has the most free capacity. For example: +For example: * If ``nodeA`` has more free ``cpus``, and ``nodeB`` has more free ``memory``, then their free capacities are equal. @@ -197,41 +157,46 @@ has the most free capacity. For example: * If ``nodeA`` has more free ``cpus``, while ``nodeB`` has more free ``memory`` and ``storage``, then ``nodeB`` has more free capacity. -Which resource is preferred to be assigned first? -_________________________________________________ +Order of Resource Assignment +############################ -* The resource that has the highest ``priority`` (see :ref:`resource_options`) gets - assigned first. +When assigning resources to nodes, the cluster chooses the next one to assign +by considering the following criteria one by one until a single resource is +selected: -* If their priorities are equal, check whether they are already running. The - resource that has the highest score on the node where it's running gets assigned - first, to prevent resource shuffling. +* Assign the resource with the highest :ref:`priority <meta_priority>`. -* If the scores above are equal or the resources are not running, the resource has - the highest score on the preferred node gets assigned first. +* If any resources are already active, assign the one with the highest score on + its current node. This avoids unnecessary resource shuffling. -* If the scores above are equal, the first runnable resource listed in the CIB - gets assigned first. +* Assign the resource with the highest score on its preferred node. -Limitations and Workarounds -########################### +* If more than one resource remains after considering all other criteria, + assign the one of them that is listed first in the CIB. + +.. note:: + + For bundles, only the priority set for the bundle itself matters. If the + bundle contains a primitive, the primitive's priority is ignored. + +Limitations +########### The type of problem Pacemaker is dealing with here is known as the -`knapsack problem <http://en.wikipedia.org/wiki/Knapsack_problem>`_ and falls into -the `NP-complete <http://en.wikipedia.org/wiki/NP-complete>`_ category of computer -science problems -- a fancy way of saying "it takes a really long time -to solve". +`knapsack problem <https://en.wikipedia.org/wiki/Knapsack_problem>`_ and falls +into the `NP-complete <https://en.wikipedia.org/wiki/NP-completeness>`_ +category of computer science problems -- a fancy way of saying "it takes a +really long time to solve". -Clearly in a HA cluster, it's not acceptable to spend minutes, let alone hours -or days, finding an optimal solution while services remain unavailable. +In a high-availability cluster, it is unacceptable to spend minutes, let alone +hours or days, finding an optimal solution while services are down. -So instead of trying to solve the problem completely, Pacemaker uses a -*best effort* algorithm for determining which node should host a particular -service. This means it arrives at a solution much faster than traditional -linear programming algorithms, but by doing so at the price of leaving some -services stopped. +Instead of trying to solve the problem completely, Pacemaker uses a "best +effort" algorithm. This arrives at a quick solution, but at the cost of +possibly leaving some resources stopped unnecessarily. -In the contrived example at the start of this chapter: +Using the example configuration at the start of this chapter, and the balanced +placement strategy: * ``rsc-small`` would be assigned to ``node1`` @@ -239,26 +204,23 @@ In the contrived example at the start of this chapter: * ``rsc-large`` would remain inactive -Which is not ideal. - -There are various approaches to dealing with the limitations of -pacemaker's placement strategy: +That is not ideal. There are various approaches to dealing with the limitations +of Pacemaker's placement strategy: * **Ensure you have sufficient physical capacity.** - It might sound obvious, but if the physical capacity of your nodes is (close to) - maxed out by the cluster under normal conditions, then failover isn't going to - go well. Even without the utilization feature, you'll start hitting timeouts and - getting secondary failures. + It might sound obvious, but if the physical capacity of your nodes is maxed + out even under normal conditions, failover isn't going to go well. Even + without the utilization feature, you'll start hitting timeouts and getting + secondary failures. -* **Build some buffer into the capabilities advertised by the nodes.** +* **Build some buffer into the capacities advertised by the nodes.** - Advertise slightly more resources than we physically have, on the (usually valid) - assumption that a resource will not use 100% of the configured amount of - CPU, memory and so forth *all* the time. This practice is sometimes called *overcommit*. + Advertise slightly more resources than we physically have, on the (usually + valid) assumption that resources will not always use 100% of their + configured utilization. This practice is sometimes called *overcommitting*. * **Specify resource priorities.** - If the cluster is going to sacrifice services, it should be the ones you care - about (comparatively) the least. Ensure that resource priorities are properly set - so that your most important resources are scheduled first. + If the cluster is going to sacrifice services, it should be the ones you + care about the least. diff --git a/doc/sphinx/Pacemaker_Remote/options.rst b/doc/sphinx/Pacemaker_Remote/options.rst index 4821829..00c56fb 100644 --- a/doc/sphinx/Pacemaker_Remote/options.rst +++ b/doc/sphinx/Pacemaker_Remote/options.rst @@ -53,6 +53,10 @@ and define its connection parameters. +------------------------+-----------------+-----------------------------------------------------------+ | remote-connect-timeout | 60s | How long before a pending guest connection will time out. | +------------------------+-----------------+-----------------------------------------------------------+ + | remote-allow-migrate | TRUE | The ``allow-migrate`` meta-attribute value for the | + | | | implicit remote connection resource | + | | | (``ocf:pacemaker:remote``). | + +------------------------+-----------------+-----------------------------------------------------------+ .. index:: pair: configuration; remote node diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in index 556eb72..a921b3a 100644 --- a/doc/sphinx/conf.py.in +++ b/doc/sphinx/conf.py.in @@ -31,15 +31,17 @@ rst_prolog=""" .. |CFS_DISTRO| replace:: AlmaLinux .. |CFS_DISTRO_VER| replace:: 9 .. |CRM_BLACKBOX_DIR| replace:: ``%CRM_BLACKBOX_DIR%`` +.. |CRM_CONFIG_DIR| replace:: ``%CRM_CONFIG_DIR%`` .. |CRM_DAEMON_GROUP| replace:: ``%CRM_DAEMON_GROUP%`` .. |CRM_DAEMON_USER| replace:: ``%CRM_DAEMON_USER%`` -.. |CRM_DAEMON_USER_RAW| replace:: %CRM_DAEMON_USER% .. |CRM_SCHEMA_DIRECTORY| replace:: %CRM_SCHEMA_DIRECTORY% .. |PCMK_AUTHKEY_FILE| replace:: %PACEMAKER_CONFIG_DIR%/authkey .. |PCMK_CONFIG_FILE| replace:: ``%CONFIGDIR%/pacemaker`` +.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% .. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env`` .. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log -.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% +.. |PCMK_CONTAINER_LOG_FILE| replace:: ``/var/log/pcmk-init.log`` +.. |PCMK__REMOTE_SCHEMA_DIR| replace:: %PCMK__REMOTE_SCHEMA_DIR% .. |REMOTE_DISTRO| replace:: AlmaLinux .. |REMOTE_DISTRO_VER| replace:: 9 """ |