diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 07:45:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 07:45:40 +0000 |
commit | 07d7f4cfa4b10de87a31b68191036ff446add675 (patch) | |
tree | 7162524d8aaf1aef62d2f4fa51f595ed113981ff /doc | |
parent | Adding upstream version 2.1.6. (diff) | |
download | pacemaker-07d7f4cfa4b10de87a31b68191036ff446add675.tar.xz pacemaker-07d7f4cfa4b10de87a31b68191036ff446add675.zip |
Adding upstream version 2.1.7.upstream/2.1.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'doc')
31 files changed, 2555 insertions, 1598 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am index 1400145..a40ddfe 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2003-2021 the Pacemaker project contributors +# Copyright 2003-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -13,7 +13,10 @@ include $(top_srcdir)/mk/release.mk # What formats to use for book uploads (i.e. "make www"; # use BOOK_FORMATS in sphinx subdirectory to change local builds) -BOOK_FORMATS ?= html singlehtml pdf epub +BOOK_FORMATS ?= html \ + singlehtml \ + pdf \ + epub # SNMP MIB mibdir = $(datadir)/snmp/mibs @@ -25,7 +28,8 @@ DEPRECATED_GENERATED = if BUILD_ASCIIDOC DEPRECATED_GENERATED += $(DEPRECATED_ORIGINAL:%.txt=%.html) endif -DEPRECATED_ALL = $(DEPRECATED_ORIGINAL) $(DEPRECATED_GENERATED) +DEPRECATED_ALL = $(DEPRECATED_ORIGINAL) \ + $(DEPRECATED_GENERATED) doc_DATA = $(DEPRECATED_ALL) noinst_SCRIPTS = abi-check @@ -73,14 +77,17 @@ deprecated-clean: # Annotated source code as HTML # Cleaning first ensures we don't index unrelated stuff like RPM sources +.PHONY: global global: $(MAKE) $(AM_MAKEFLAGS) -C .. clean-generic $(MAKE) $(AM_MAKEFLAGS) -C ../rpm rpm-clean cd .. && gtags -q && htags -sanhIT doc +.PHONY: global-upload global-upload: global rsync $(RSYNC_OPTS) HTML/ "$(RSYNC_DEST)/$(PACKAGE)/global/$(TAG)/" +.PHONY: global-clean global-clean: -rm -rf HTML @@ -93,43 +100,53 @@ global-clean: %.7.html: %.7 groff -mandoc `man -w ./$<` -T html > $@ +.PHONY: manhtml manhtml: $(MAKE) $(AM_MAKEFLAGS) -C .. all find .. -name "[a-z]*.[78]" -exec $(MAKE) $(AM_MAKEFLAGS) \{\}.html \; +.PHONY: manhtml-upload manhtml-upload: manhtml find .. -name "[a-z]*.[78].html" -exec \ rsync $(RSYNC_OPTS) \{\} "$(RSYNC_DEST)/$(PACKAGE)/man/" \; +.PHONY: manhtml-clean manhtml-clean: -find .. -name "[a-z]*.[78].html" -exec rm \{\} \; # API documentation as HTML +.PHONY: doxygen doxygen: Doxyfile doxygen Doxyfile +.PHONY: doxygen-upload doxygen-upload: doxygen rsync $(RSYNC_OPTS) api/html/ "$(RSYNC_DEST)/$(PACKAGE)/doxygen/$(TAG)/" +.PHONY: doxygen-clean doxygen-clean: -rm -rf api # ABI compatibility report as HTML +.PHONY: abi abi: abi-check ./abi-check $(PACKAGE) $(LAST_RELEASE) $(TAG) +.PHONY: abi-www abi-www: export RSYNC_DEST=$(RSYNC_DEST); ./abi-check -u $(PACKAGE) $(LAST_RELEASE) $(TAG) +.PHONY: abi-clean abi-clean: -rm -rf abi_dumps compat_reports # The main documentation books (which are actually in the sphinx subdirectory) +.PHONY: books-upload books-upload: $(MAKE) $(AM_MAKEFLAGS) -C sphinx clean $(MAKE) $(AM_MAKEFLAGS) -C sphinx \ @@ -142,11 +159,13 @@ books-upload: .PHONY: www www: clean-local deprecated-upload manhtml-upload global-upload doxygen-upload books-upload +.PHONY: clean-local clean-local: global-clean manhtml-clean doxygen-clean abi-clean deprecated-clean # "make check" will cause "make all" to be run, which means docs will get built # as a part of running tests if they haven't already. That seems unnecessary, so # override the default check-recursive rule with this one that just returns. If # we ever need to add tests to this directory, this rule will have to come out. +.PHONY: check-recursive check-recursive: @true diff --git a/doc/abi-check.in b/doc/abi-check.in index 5a5e253..6b6a8d3 100755 --- a/doc/abi-check.in +++ b/doc/abi-check.in @@ -1,6 +1,6 @@ #!@BASH_PATH@ # -# Copyright 2011-2022 the Pacemaker project contributors +# Copyright 2011-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # @@ -29,6 +29,12 @@ tag() { fi } +sed_in_place() { + cp -p "$1" "$1.$$" + sed -e "$2" "$1" > "$1.$$" + mv "$1.$$" "$1" +} + # Strip anything up to and including a dash from the argument version() { echo "$1" | sed s:.*-:: @@ -103,7 +109,7 @@ extract_one() { # Remove "doc" from SUBDIRS in Makefile (but why?) BUILD_ROOT="$(pwd)/$BUILD_ROOT" - sed -i.sed 's: doc::' "$BUILD_ROOT/Makefile.am" + sed_in_place "$BUILD_ROOT/Makefile.am" 's: doc::' # Run ABI dump abi_config "$PACKAGE" "$VERSION" "$BUILD_ROOT" "$DESC" diff --git a/doc/sphinx/Clusters_from_Scratch/apache.rst b/doc/sphinx/Clusters_from_Scratch/apache.rst index e4eddff..c5c155e 100644 --- a/doc/sphinx/Clusters_from_Scratch/apache.rst +++ b/doc/sphinx/Clusters_from_Scratch/apache.rst @@ -316,7 +316,7 @@ have to worry about whether you can handle the load after a failover. To do this, we create a location constraint. In the location constraint below, we are saying the ``WebSite`` resource -prefers the node ``pcmk-1`` with a score of ``50``. Here, the score indicates +prefers the node ``pcmk-2`` with a score of ``50``. Here, the score indicates how strongly we'd like the resource to run at this location. .. code-block:: console diff --git a/doc/sphinx/Clusters_from_Scratch/cluster-setup.rst b/doc/sphinx/Clusters_from_Scratch/cluster-setup.rst index 0a7a7a5..437b5f8 100644 --- a/doc/sphinx/Clusters_from_Scratch/cluster-setup.rst +++ b/doc/sphinx/Clusters_from_Scratch/cluster-setup.rst @@ -114,14 +114,14 @@ Start and enable the daemon by issuing the following commands on each node: # systemctl enable pcsd.service Created symlink from /etc/systemd/system/multi-user.target.wants/pcsd.service to /usr/lib/systemd/system/pcsd.service. -The installed packages will create an ``hacluster`` user with a disabled password. -While this is fine for running ``pcs`` commands locally, +The installed packages will create an |CRM_DAEMON_USER| user with a disabled +password. While this is fine for running ``pcs`` commands locally, the account needs a login password in order to perform such tasks as syncing the Corosync configuration, or starting and stopping the cluster on other nodes. This tutorial will make use of such commands, -so now we will set a password for the ``hacluster`` user, using the same password -on both nodes: +so now we will set a password for the |CRM_DAEMON_USER| user, using the same +password on both nodes: .. code-block:: console diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am index c4ade5c..e48e19a 100644 --- a/doc/sphinx/Makefile.am +++ b/doc/sphinx/Makefile.am @@ -55,7 +55,8 @@ DOTS = $(wildcard shared/images/*.dot) # Vector sources for generated PNGs (including SVG equivalents of DOTS, created # manually using dot) -SVGS = $(wildcard shared/images/pcmk-*.svg) $(DOTS:%.dot=%.svg) +SVGS = $(wildcard shared/images/pcmk-*.svg) \ + $(DOTS:%.dot=%.svg) # PNG images generated from SVGS # @@ -71,28 +72,33 @@ PNGS_Pacemaker_Remote = $(wildcard Pacemaker_Remote/images/*.png) STATIC_FILES = $(wildcard _static/*.css) -EXTRA_DIST = $(wildcard */*.rst) $(DOTS) $(SVGS) \ - $(PNGS_Clusters_from_Scratch) \ - $(PNGS_Pacemaker_Explained) \ - $(PNGS_Pacemaker_Remote) \ - $(wildcard Pacemaker_Python_API/_templates/*rst) \ - $(STATIC_FILES) \ +EXTRA_DIST = $(wildcard */*.rst) $(DOTS) $(SVGS) \ + $(PNGS_Clusters_from_Scratch) \ + $(PNGS_Pacemaker_Explained) \ + $(PNGS_Pacemaker_Remote) \ + $(wildcard Pacemaker_Python_API/_templates/*rst) \ + $(STATIC_FILES) \ conf.py.in # recursive, preserve symlinks/permissions/times, verbose, compress, # don't cross filesystems, sparse, show progress RSYNC_OPTS = -rlptvzxS --progress +PACKAGE_SERIES=$(shell echo "$VERSION" | awk -F. '{ print $1"."$2 }') + BOOK_RSYNC_DEST = $(RSYNC_DEST)/$(PACKAGE)/doc/$(PACKAGE_SERIES) BOOK = none -DEPS_intro = shared/pacemaker-intro.rst $(PNGS_GENERATED) +DEPS_intro = shared/pacemaker-intro.rst \ + $(PNGS_GENERATED) -DEPS_Clusters_from_Scratch = $(DEPS_intro) $(PNGS_Clusters_from_Scratch) +DEPS_Clusters_from_Scratch = $(DEPS_intro) \ + $(PNGS_Clusters_from_Scratch) DEPS_Pacemaker_Administration = $(DEPS_intro) DEPS_Pacemaker_Development = -DEPS_Pacemaker_Explained = $(DEPS_intro) $(PNGS_Pacemaker_Explained) +DEPS_Pacemaker_Explained = $(DEPS_intro) \ + $(PNGS_Pacemaker_Explained) DEPS_Pacemaker_Python_API = ../../python DEPS_Pacemaker_Remote = $(PNGS_Pacemaker_Remote) @@ -120,6 +126,14 @@ $(BOOKS:%=%/conf.py): conf.py.in -e 's/%BOOK_TITLE%/$(subst _, ,$(@:%/conf.py=%))/g' \ -e 's#%SRC_DIR%#$(abs_srcdir)#g' \ -e 's#%ABS_TOP_SRCDIR%#$(abs_top_srcdir)#g' \ + -e 's#%CONFIGDIR%#@CONFIGDIR@#g' \ + -e 's#%CRM_BLACKBOX_DIR%#@CRM_BLACKBOX_DIR@#g' \ + -e 's#%CRM_DAEMON_GROUP%#@CRM_DAEMON_GROUP@#g' \ + -e 's#%CRM_DAEMON_USER%#@CRM_DAEMON_USER@#g' \ + -e 's#%CRM_LOG_DIR%#@CRM_LOG_DIR@#g' \ + -e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \ + -e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \ + -e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \ $(<) > "$@" $(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst) @@ -160,15 +174,21 @@ if BUILD_SPHINX_DOCS done @rsync $(RSYNC_OPTS) "$(builddir)/build-$(PACKAGE_SERIES).txt" \ "$(RSYNC_DEST)/$(PACKAGE)/doc" +endif +.PHONY: all-local all-local: +if BUILD_SPHINX_DOCS @for book in $(BOOKS); do \ $(MAKE) $(AM_MAKEFLAGS) BOOK=$$book \ PAPER="$(PAPER)" SPHINXFLAGS="$(SPHINXFLAGS)" \ BOOK_FORMATS="$(BOOK_FORMATS)" $$book/_build; \ done +endif +.PHONY: install-data-local install-data-local: all-local +if BUILD_SPHINX_DOCS $(AM_V_at)for book in $(BOOKS); do \ for format in $(BOOK_FORMATS); do \ formatdir="$$book/_build/$$format"; \ @@ -183,13 +203,17 @@ install-data-local: all-local done; \ done; \ done +endif +.PHONY: uninstall-local uninstall-local: +if BUILD_SPHINX_DOCS $(AM_V_at)for book in $(BOOKS); do \ rm -rf "$(DESTDIR)/$(docdir)/$$book"; \ done endif +.PHONY: clean-local clean-local: $(AM_V_at)-rm -rf \ $(BOOKS:%="$(builddir)/%/_build") \ diff --git a/doc/sphinx/Pacemaker_Administration/administrative.rst b/doc/sphinx/Pacemaker_Administration/administrative.rst new file mode 100644 index 0000000..7c8b346 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/administrative.rst @@ -0,0 +1,150 @@ +.. index:: + single: administrative mode + +Administrative Modes +-------------------- + +Intrusive administration can be performed on a Pacemaker cluster without +causing resource failures, recovery, and fencing, by putting the cluster or a +subset of it into an administrative mode. + +Pacemaker supports several administrative modes: + +* Maintenance mode for the entire cluster, specific nodes, or specific + resources +* Unmanaged resources +* Disabled configuration items +* Standby mode for specific nodes + +Rules may be used to automatically set any of these modes for specific times or +other conditions. + + +.. index:: + pair: administrative mode; maintenance mode + +.. _maintenance_mode: + +Maintenance Mode +################ + +In maintenance mode, the cluster will not start or stop resources. Recurring +monitors for affected resources will be paused, except those specifying +``role`` as ``Stopped``. + +To put a specific resource into maintenance mode, set the resource's +``maintenance`` meta-attribute to ``true``. + +To put all active resources on a specific node into maintenance mode, set the +node's ``maintenance`` node attribute to ``true``. When enabled, this overrides +resource-specific maintenance mode. + +.. warning:: + + Restarting Pacemaker on a node that is in single-node maintenance mode will + likely lead to undesirable effects. If ``maintenance`` is set as a transient + attribute, it will be erased when Pacemaker is stopped, which will + immediately take the node out of maintenance mode and likely get it fenced. + If set as a permanent attribute, any resources active on the node will have + their local history erased when Pacemaker is restarted, so the cluster will + no longer consider them running on the node and thus will consider them + managed again, allowing them to be started elsewhere. + +To put all resources in the cluster into maintenance mode, set the +``maintenance-mode`` cluster option to ``true``. When enabled, this overrides +node- or resource- specific maintenance mode. + +Maintenance mode, at any level, overrides other administrative modes. + + +.. index:: + pair: administrative mode; unmanaged resources + +.. _unmanaged_resources: + +Unmanaged Resources +################### + +An unmanaged resource will not be started or stopped by the cluster. A resource +may become unmanaged in several ways: + +* The administrator may set the ``is-managed`` resource meta-attribute to + ``false`` (whether for a specific resource, or all resources without an + explicit setting via ``rsc_defaults``) +* :ref:`Maintenance mode <maintenance_mode>` causes affected resources to + become unmanaged (and overrides any ``is-managed`` setting) +* Certain types of failure cause affected resources to become unmanaged. These + include: + + * Failed stop operations when the ``stonith-enabled`` cluster property is set + to ``false`` + * Failure of an operation that has ``on-fail`` set to ``block`` + * A resource detected as incorrectly active on more than one node when its + ``multiple-active`` meta-attribute is set to ``block`` + * A resource constrained by a revoked ``rsc_ticket`` with ``loss-policy`` set + to ``freeze`` + * Resources with ``requires`` set (or defaulting) to anything other than + ``nothing`` in a partition that loses quorum when the ``no-quorum-policy`` + cluster option is set to ``freeze`` + +Recurring actions are not affected by unmanaging a resource. + +.. warning:: + + Manually starting an unmanaged resource on a different node is strongly + discouraged. It will at least cause the cluster to consider the resource + failed, and may require the resource's ``target-role`` to be set to + ``Stopped`` then ``Started`` in order for recovery to succeed. + + +.. index:: + pair: administrative mode; disabled configuration + +.. _disabled_configuration: + +Disabled Configuration +###################### + +Some configuration elements disable particular behaviors: + +* The ``stonith-enabled`` cluster option, when set to ``false``, disables node + fencing. This is highly discouraged, as it can lead to data unavailability, + loss, or corruption. + +* The ``stop-all-resources`` cluster option, when set to ``true``, causes all + resources to be stopped. + +* Certain elements support an ``enabled`` meta-attribute, which if set to + ``false``, causes the cluster to act as if the specific element is not + configured. These include ``op``, ``alert`` *(since 2.1.6)*, and + ``recipient`` *(since 2.1.6)*. ``enabled`` may be set for specific ``op`` + elements, or all operations without an explicit setting via ``op_defaults``. + + +.. index:: + pair: administrative mode; standby + +.. _standby: + +Standby Mode +############ + +When a node is put into standby, all resources will be moved away from the +node, and all recurring operations will be stopped on the node, except those +specifying ``role`` as ``Stopped`` (which will be newly initiated if +appropriate). + +A node may be put into standby mode by setting its ``standby`` node attribute +to ``true``. The attribute may be queried and set using the ``crm_standby`` +tool. + + +.. index:: + pair: administrative mode; rules + +Rules +##### + +Rules may be used to set administrative mode options automatically according to +various criteria such as date and time. See the "Rules" chapter of the +*Pacemaker Explained* document for details. diff --git a/doc/sphinx/Pacemaker_Administration/alerts.rst b/doc/sphinx/Pacemaker_Administration/alerts.rst index c0f54c6..42efc8d 100644 --- a/doc/sphinx/Pacemaker_Administration/alerts.rst +++ b/doc/sphinx/Pacemaker_Administration/alerts.rst @@ -287,7 +287,7 @@ Special concerns when writing alert agents: this into consideration, for example by queueing resource-intensive actions into some other instance, instead of directly executing them. -* Alert agents are run as the ``hacluster`` user, which has a minimal set +* Alert agents are run as the |CRM_DAEMON_USER| user, which has a minimal set of permissions. If an agent requires additional privileges, it is recommended to configure ``sudo`` to allow the agent to run the necessary commands as another user with the appropriate privileges. @@ -297,7 +297,7 @@ Special concerns when writing alert agents: user-configured ``timestamp-format``), ``CRM_alert_recipient,`` and all instance attributes. Mostly this is needed simply to protect against configuration errors, but if some user can modify the CIB without having - ``hacluster``-level access to the cluster nodes, it is a potential security + |CRM_DAEMON_USER| access to the cluster nodes, it is a potential security concern as well, to avoid the possibility of code injection. .. note:: **ocf:pacemaker:ClusterMon compatibility** @@ -308,4 +308,4 @@ Special concerns when writing alert agents: passed to alert agents are available prepended with ``CRM_notify_`` as well as ``CRM_alert_``. One break in compatibility is that ``ClusterMon`` ran external scripts as the ``root`` user, while alert agents are run as the - ``hacluster`` user. + |CRM_DAEMON_USER| user. diff --git a/doc/sphinx/Pacemaker_Administration/configuring.rst b/doc/sphinx/Pacemaker_Administration/configuring.rst index 415dd81..295c96a 100644 --- a/doc/sphinx/Pacemaker_Administration/configuring.rst +++ b/doc/sphinx/Pacemaker_Administration/configuring.rst @@ -189,48 +189,53 @@ cluster even if the machine itself is not in the same cluster. To do this, one simply sets up a number of environment variables and runs the same commands as when working on a cluster node. -.. table:: **Environment Variables Used to Connect to Remote Instances of the CIB** - - +----------------------+-----------+------------------------------------------------+ - | Environment Variable | Default | Description | - +======================+===========+================================================+ - | CIB_user | $USER | .. index:: | - | | | single: CIB_user | - | | | single: environment variable; CIB_user | - | | | | - | | | The user to connect as. Needs to be | - | | | part of the ``haclient`` group on | - | | | the target host. | - +----------------------+-----------+------------------------------------------------+ - | CIB_passwd | | .. index:: | - | | | single: CIB_passwd | - | | | single: environment variable; CIB_passwd | - | | | | - | | | The user's password. Read from the | - | | | command line if unset. | - +----------------------+-----------+------------------------------------------------+ - | CIB_server | localhost | .. index:: | - | | | single: CIB_server | - | | | single: environment variable; CIB_server | - | | | | - | | | The host to contact | - +----------------------+-----------+------------------------------------------------+ - | CIB_port | | .. index:: | - | | | single: CIB_port | - | | | single: environment variable; CIB_port | - | | | | - | | | The port on which to contact the server; | - | | | required. | - +----------------------+-----------+------------------------------------------------+ - | CIB_encrypted | TRUE | .. index:: | - | | | single: CIB_encrypted | - | | | single: environment variable; CIB_encrypted | - | | | | - | | | Whether to encrypt network traffic | - +----------------------+-----------+------------------------------------------------+ +.. list-table:: **Environment Variables Used to Connect to Remote Instances of the CIB** + :class: longtable + :widths: 2 2 5 + :header-rows: 1 + + * - Environment Variable + - Default + - Description + * - .. index:: + single: CIB_user + single: environment variable; CIB_user + + CIB_user + - |CRM_DAEMON_USER_RAW| + - The user to connect as. Needs to be part of the |CRM_DAEMON_GROUP| group + on the target host. + * - .. index:: + single: CIB_passwd + single: environment variable; CIB_passwd + + CIB_passwd + - + - The user's password. Read from the command line if unset. + * - .. index:: + single: CIB_server + single: environment variable; CIB_server + + CIB_server + - localhost + - The host to contact + * - .. index:: + single: CIB_port + single: environment variable; CIB_port + + CIB_port + - + - The port on which to contact the server; required + * - .. index:: + single: CIB_encrypted + single: environment variable; CIB_encrypted + + CIB_encrypted + - true + - Whether to encrypt network traffic So, if **c001n01** is an active cluster node and is listening on port 1234 -for connections, and **someuser** is a member of the **haclient** group, +for connections, and **someuser** is a member of the |CRM_DAEMON_GROUP| group, then the following would prompt for **someuser**'s password and return the cluster's current configuration: @@ -243,27 +248,9 @@ For security reasons, the cluster does not listen for remote connections by default. If you wish to allow remote access, you need to set the ``remote-tls-port`` (encrypted) or ``remote-clear-port`` (unencrypted) CIB properties (i.e., those kept in the ``cib`` tag, like ``num_updates`` and -``epoch``). - -.. table:: **Extra top-level CIB properties for remote access** - - +----------------------+-----------+------------------------------------------------------+ - | CIB Property | Default | Description | - +======================+===========+======================================================+ - | remote-tls-port | | .. index:: | - | | | single: remote-tls-port | - | | | single: CIB property; remote-tls-port | - | | | | - | | | Listen for encrypted remote connections | - | | | on this port. | - +----------------------+-----------+------------------------------------------------------+ - | remote-clear-port | | .. index:: | - | | | single: remote-clear-port | - | | | single: CIB property; remote-clear-port | - | | | | - | | | Listen for plaintext remote connections | - | | | on this port. | - +----------------------+-----------+------------------------------------------------------+ +``epoch``). Encrypted communication is keyless, which makes it subject to +man-in-the-middle attacks, and thus either option should be used only on +protected networks. .. important:: diff --git a/doc/sphinx/Pacemaker_Administration/index.rst b/doc/sphinx/Pacemaker_Administration/index.rst index 327ad31..af89380 100644 --- a/doc/sphinx/Pacemaker_Administration/index.rst +++ b/doc/sphinx/Pacemaker_Administration/index.rst @@ -22,6 +22,8 @@ Table of Contents cluster configuring tools + administrative + moving troubleshooting upgrading alerts diff --git a/doc/sphinx/Pacemaker_Explained/advanced-options.rst b/doc/sphinx/Pacemaker_Administration/moving.rst index 20ab79e..3d6a92a 100644 --- a/doc/sphinx/Pacemaker_Explained/advanced-options.rst +++ b/doc/sphinx/Pacemaker_Administration/moving.rst @@ -1,171 +1,11 @@ -Advanced Configuration ----------------------- - -.. index:: - single: start-delay; operation attribute - single: interval-origin; operation attribute - single: interval; interval-origin - single: operation; interval-origin - single: operation; start-delay - -Specifying When Recurring Actions are Performed -############################################### - -By default, recurring actions are scheduled relative to when the resource -started. In some cases, you might prefer that a recurring action start relative -to a specific date and time. For example, you might schedule an in-depth -monitor to run once every 24 hours, and want it to run outside business hours. - -To do this, set the operation's ``interval-origin``. The cluster uses this point -to calculate the correct ``start-delay`` such that the operation will occur -at ``interval-origin`` plus a multiple of the operation interval. - -For example, if the recurring operation's interval is 24h, its -``interval-origin`` is set to 02:00, and it is currently 14:32, then the -cluster would initiate the operation after 11 hours and 28 minutes. - -The value specified for ``interval`` and ``interval-origin`` can be any -date/time conforming to the -`ISO8601 standard <https://en.wikipedia.org/wiki/ISO_8601>`_. By way of -example, to specify an operation that would run on the first Monday of -2021 and every Monday after that, you would add: - -.. topic:: Example recurring action that runs relative to base date/time - - .. code-block:: xml - - <op id="intensive-monitor" name="monitor" interval="P7D" interval-origin="2021-W01-1"/> - -.. index:: - single: resource; failure recovery - single: operation; failure recovery - -.. _failure-handling: - -Handling Resource Failure -######################### - -By default, Pacemaker will attempt to recover failed resources by restarting -them. However, failure recovery is highly configurable. - -.. index:: - single: resource; failure count - single: operation; failure count - -Failure Counts -______________ - -Pacemaker tracks resource failures for each combination of node, resource, and -operation (start, stop, monitor, etc.). - -You can query the fail count for a particular node, resource, and/or operation -using the ``crm_failcount`` command. For example, to see how many times the -10-second monitor for ``myrsc`` has failed on ``node1``, run: - -.. code-block:: none - - # crm_failcount --query -r myrsc -N node1 -n monitor -I 10s - -If you omit the node, ``crm_failcount`` will use the local node. If you omit -the operation and interval, ``crm_failcount`` will display the sum of the fail -counts for all operations on the resource. - -You can use ``crm_resource --cleanup`` or ``crm_failcount --delete`` to clear -fail counts. For example, to clear the above monitor failures, run: - -.. code-block:: none - - # crm_resource --cleanup -r myrsc -N node1 -n monitor -I 10s - -If you omit the resource, ``crm_resource --cleanup`` will clear failures for -all resources. If you omit the node, it will clear failures on all nodes. If -you omit the operation and interval, it will clear the failures for all -operations on the resource. - -.. note:: - - Even when cleaning up only a single operation, all failed operations will - disappear from the status display. This allows us to trigger a re-check of - the resource's current status. - -Higher-level tools may provide other commands for querying and clearing -fail counts. - -The ``crm_mon`` tool shows the current cluster status, including any failed -operations. To see the current fail counts for any failed resources, call -``crm_mon`` with the ``--failcounts`` option. This shows the fail counts per -resource (that is, the sum of any operation fail counts for the resource). - -.. index:: - single: migration-threshold; resource meta-attribute - single: resource; migration-threshold - -Failure Response -________________ - -Normally, if a running resource fails, pacemaker will try to stop it and start -it again. Pacemaker will choose the best location to start it each time, which -may be the same node that it failed on. - -However, if a resource fails repeatedly, it is possible that there is an -underlying problem on that node, and you might desire trying a different node -in such a case. Pacemaker allows you to set your preference via the -``migration-threshold`` resource meta-attribute. [#]_ - -If you define ``migration-threshold`` to *N* for a resource, it will be banned -from the original node after *N* failures there. - -.. note:: - - The ``migration-threshold`` is per *resource*, even though fail counts are - tracked per *operation*. The operation fail counts are added together - to compare against the ``migration-threshold``. - -By default, fail counts remain until manually cleared by an administrator -using ``crm_resource --cleanup`` or ``crm_failcount --delete`` (hopefully after -first fixing the failure's cause). It is possible to have fail counts expire -automatically by setting the ``failure-timeout`` resource meta-attribute. - -.. important:: - - A successful operation does not clear past failures. If a recurring monitor - operation fails once, succeeds many times, then fails again days later, its - fail count is 2. Fail counts are cleared only by manual intervention or - failure timeout. - -For example, setting ``migration-threshold`` to 2 and ``failure-timeout`` to -``60s`` would cause the resource to move to a new node after 2 failures, and -allow it to move back (depending on stickiness and constraint scores) after one -minute. - -.. note:: - - ``failure-timeout`` is measured since the most recent failure. That is, older - failures do not individually time out and lower the fail count. Instead, all - failures are timed out simultaneously (and the fail count is reset to 0) if - there is no new failure for the timeout period. - -There are two exceptions to the migration threshold: when a resource either -fails to start or fails to stop. - -If the cluster property ``start-failure-is-fatal`` is set to ``true`` (which is -the default), start failures cause the fail count to be set to ``INFINITY`` and -thus always cause the resource to move immediately. - -Stop failures are slightly different and crucial. If a resource fails to stop -and fencing is enabled, then the cluster will fence the node in order to be -able to start the resource elsewhere. If fencing is disabled, then the cluster -has no way to continue and will not try to start the resource elsewhere, but -will try to stop it again after any failure timeout or clearing. +Moving Resources +---------------- .. index:: single: resource; move -Moving Resources -################ - Moving Resources Manually -_________________________ +######################### There are primarily two occasions when you would want to move a resource from its current location: when the whole node is under maintenance, and when a @@ -176,7 +16,7 @@ single resource needs to be moved. single: node; standby mode Standby Mode -~~~~~~~~~~~~ +____________ Since everything eventually comes down to a score, you could create constraints for every resource to prevent them from running on one node. While Pacemaker @@ -215,7 +55,7 @@ A cluster node in standby mode will not run resources, but still contributes to quorum, and may fence or be fenced by nodes. Moving One Resource -~~~~~~~~~~~~~~~~~~~ +___________________ When only one resource is required to move, we could do this by creating location constraints. However, once again we provide a user-friendly shortcut @@ -281,9 +121,10 @@ constraint will prevent the resource from running on that node until cluster node is no longer available! In some cases, such as when ``resource-stickiness`` is set to ``INFINITY``, it -is possible that you will end up with the problem described in -:ref:`node-score-equal`. The tool can detect some of these cases and deals with -them by creating both positive and negative constraints. For example: +is possible that you will end up with nodes with the same score, forcing the +cluster to choose one (which may not be the one you want). The tool can detect +some of these cases and deals with them by creating both positive and negative +constraints. For example: .. code-block:: xml @@ -293,7 +134,7 @@ them by creating both positive and negative constraints. For example: which has the same long-term consequences as discussed earlier. Moving Resources Due to Connectivity Changes -____________________________________________ +############################################ You can configure the cluster to move resources when external connectivity is lost in two steps. @@ -303,7 +144,7 @@ lost in two steps. single: ping resource Tell Pacemaker to Monitor Connectivity -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +______________________________________ First, add an ``ocf:pacemaker:ping`` resource to the cluster. The ``ping`` resource uses the system utility of the same name to a test whether a list of @@ -372,12 +213,12 @@ with a description of the most interesting parameters. deal with the connectivity status that ``ocf:pacemaker:ping`` is recording. Tell Pacemaker How to Interpret the Connectivity Data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +_____________________________________________________ .. important:: - Before attempting the following, make sure you understand - :ref:`rules`. + Before attempting the following, make sure you understand rules. See the + "Rules" chapter of the *Pacemaker Explained* document for details. There are a number of ways to use the connectivity data. @@ -462,125 +303,3 @@ nodes provided they have connectivity to at least three (again assuming that <expression id="ping-prefer" attribute="pingd" operation="defined"/> </rule> </rsc_location> - - -.. _live-migration: - -Migrating Resources -___________________ - -Normally, when the cluster needs to move a resource, it fully restarts the -resource (that is, it stops the resource on the current node and starts it on -the new node). - -However, some types of resources, such as many virtual machines, are able to -move to another location without loss of state (often referred to as live -migration or hot migration). In pacemaker, this is called resource migration. -Pacemaker can be configured to migrate a resource when moving it, rather than -restarting it. - -Not all resources are able to migrate; see the -:ref:`migration checklist <migration_checklist>` below. Even those that can, -won't do so in all situations. Conceptually, there are two requirements from -which the other prerequisites follow: - -* The resource must be active and healthy at the old location; and -* everything required for the resource to run must be available on both the old - and new locations. - -The cluster is able to accommodate both *push* and *pull* migration models by -requiring the resource agent to support two special actions: ``migrate_to`` -(performed on the current location) and ``migrate_from`` (performed on the -destination). - -In push migration, the process on the current location transfers the resource -to the new location where is it later activated. In this scenario, most of the -work would be done in the ``migrate_to`` action and, if anything, the -activation would occur during ``migrate_from``. - -Conversely for pull, the ``migrate_to`` action is practically empty and -``migrate_from`` does most of the work, extracting the relevant resource state -from the old location and activating it. - -There is no wrong or right way for a resource agent to implement migration, as -long as it works. - -.. _migration_checklist: - -.. topic:: Migration Checklist - - * The resource may not be a clone. - * The resource agent standard must be OCF. - * The resource must not be in a failed or degraded state. - * The resource agent must support ``migrate_to`` and ``migrate_from`` - actions, and advertise them in its meta-data. - * The resource must have the ``allow-migrate`` meta-attribute set to - ``true`` (which is not the default). - -If an otherwise migratable resource depends on another resource via an ordering -constraint, there are special situations in which it will be restarted rather -than migrated. - -For example, if the resource depends on a clone, and at the time the resource -needs to be moved, the clone has instances that are stopping and instances that -are starting, then the resource will be restarted. The scheduler is not yet -able to model this situation correctly and so takes the safer (if less optimal) -path. - -Also, if a migratable resource depends on a non-migratable resource, and both -need to be moved, the migratable resource will be restarted. - - -.. index:: - single: reload - single: reload-agent - -Reloading an Agent After a Definition Change -############################################ - -The cluster automatically detects changes to the configuration of active -resources. The cluster's normal response is to stop the service (using the old -definition) and start it again (with the new definition). This works, but some -resource agents are smarter and can be told to use a new set of options without -restarting. - -To take advantage of this capability, the resource agent must: - -* Implement the ``reload-agent`` action. What it should do depends completely - on your application! - - .. note:: - - Resource agents may also implement a ``reload`` action to make the managed - service reload its own *native* configuration. This is different from - ``reload-agent``, which makes effective changes in the resource's - *Pacemaker* configuration (specifically, the values of the agent's - reloadable parameters). - -* Advertise the ``reload-agent`` operation in the ``actions`` section of its - meta-data. - -* Set the ``reloadable`` attribute to 1 in the ``parameters`` section of - its meta-data for any parameters eligible to be reloaded after a change. - -Once these requirements are satisfied, the cluster will automatically know to -reload the resource (instead of restarting) when a reloadable parameter -changes. - -.. note:: - - Metadata will not be re-read unless the resource needs to be started. If you - edit the agent of an already active resource to set a parameter reloadable, - the resource may restart the first time the parameter value changes. - -.. note:: - - If both a reloadable and non-reloadable parameter are changed - simultaneously, the resource will be restarted. - -.. rubric:: Footnotes - -.. [#] The naming of this option was perhaps unfortunate as it is easily - confused with live migration, the process of moving a resource from one - node to another without stopping it. Xen virtual guests are the most - common example of resources that can be migrated in this manner. diff --git a/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst b/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst index 61ab4e6..3eda60a 100644 --- a/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst +++ b/doc/sphinx/Pacemaker_Administration/pcs-crmsh.rst @@ -118,14 +118,11 @@ Manage Resources .. topic:: Create a Resource .. code-block:: none - - crmsh # crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ - params ip=192.168.122.120 cidr_netmask=24 \ - op monitor interval=30s + crmsh # crm configure primitive ClusterIP IPaddr2 params ip=192.168.122.120 cidr_netmask=24 pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.122.120 cidr_netmask=24 -pcs determines the standard and provider (``ocf:heartbeat``) automatically -since ``IPaddr2`` is unique, and automatically creates operations (including +Both crmsh and pcs determine the standard and provider (``ocf:heartbeat``) automatically +since ``IPaddr2`` is unique, and automatically create operations (including monitor) based on the agent's meta-data. .. topic:: Show Configuration of All Resources @@ -270,6 +267,10 @@ edited and verified before committing to the live configuration: crmsh # crm configure ms WebDataClone WebData \ meta master-max=1 master-node-max=1 \ clone-max=2 clone-node-max=1 notify=true + crmsh # crm configure clone WebDataClone WebData \ + meta promotable=true \ + promoted-max=1 promoted-node-max=1 \ + clone-max=2 clone-node-max=1 notify=true pcs-0.9 # pcs resource master WebDataClone WebData \ master-max=1 master-node-max=1 \ clone-max=2 clone-node-max=1 notify=true @@ -277,6 +278,7 @@ edited and verified before committing to the live configuration: promoted-max=1 promoted-node-max=1 \ clone-max=2 clone-node-max=1 notify=true +crmsh supports both ways ('configure ms' is deprecated) to configure promotable clone since crmsh 4.4.0. pcs will generate the clone name automatically if it is omitted from the command line. diff --git a/doc/sphinx/Pacemaker_Development/c.rst b/doc/sphinx/Pacemaker_Development/c.rst index 66ce3b2..b03ddae 100644 --- a/doc/sphinx/Pacemaker_Development/c.rst +++ b/doc/sphinx/Pacemaker_Development/c.rst @@ -225,8 +225,8 @@ a ``GHashTable *`` member, the argument should be marked as ``[in,out]`` if the function inserts data into the table, even if the struct members themselves are not changed. However, an argument is not ``[in,out]`` if something reachable via the argument is modified via a separate argument. For example, both -``pe_resource_t`` and ``pe_node_t`` contain pointers to their -``pe_working_set_t`` and thus indirectly to each other, but if the function +``pcmk_resource_t`` and ``pcmk_node_t`` contain pointers to their +``pcmk_scheduler_t`` and thus indirectly to each other, but if the function modifies the resource via the resource argument, the node argument does not have to be ``[in,out]``. @@ -745,10 +745,20 @@ readability and logging consistency. Functions ######### +Function Naming +_______________ + Function names should be unique across the entire project, to allow for individual tracing via ``PCMK_trace_functions``, and make it easier to search code and follow detail logs. +A common function signature is a comparison function that returns 0 if its +arguments are equal for sorting purposes, -1 if the first argument should sort +first, and 1 is the second argument should sort first. Such a function should +have ``cmp`` in its name, to parallel ``strcmp()``; ``sort`` should only be +used in the names of functions that sort an entire list (typically using a +``cmp`` function). + Function Definitions ____________________ diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst index e14df26..5086fa8 100644 --- a/doc/sphinx/Pacemaker_Development/components.rst +++ b/doc/sphinx/Pacemaker_Development/components.rst @@ -301,7 +301,7 @@ directly. This allows them to run using a ``CIB_file`` without the cluster needing to be active. The main entry point for the scheduler code is -``lib/pacemaker/pcmk_sched_allocate.c:pcmk__schedule_actions()``. It sets +``lib/pacemaker/pcmk_scheduler.c:pcmk__schedule_actions()``. It sets defaults and calls a series of functions for the scheduling. Some key steps: * ``unpack_cib()`` parses most of the CIB XML into data structures, and @@ -315,7 +315,7 @@ defaults and calls a series of functions for the scheduling. Some key steps: the CIB status section. This is used to decide whether certain actions need to be done, such as deleting orphan resources, forcing a restart when a resource definition changes, etc. -* ``allocate_resources()`` assigns resources to nodes. +* ``assign_resources()`` assigns resources to nodes. * ``schedule_resource_actions()`` schedules resource-specific actions (which might or might not end up in the final graph). * ``pcmk__apply_orderings()`` processes ordering constraints in order to modify @@ -335,7 +335,7 @@ Working with the scheduler is difficult. Challenges include: * It produces an insane amount of log messages at debug and trace levels. You can put resource ID(s) in the ``PCMK_trace_tags`` environment variable to enable trace-level messages only when related to specific resources. -* Different parts of the main ``pe_working_set_t`` structure are finalized at +* Different parts of the main ``pcmk_scheduler_t`` structure are finalized at different points in the scheduling process, so you have to keep in mind whether information you're using at one point of the code can possibly change later. For example, data unpacked from the CIB can safely be used anytime @@ -347,24 +347,24 @@ Working with the scheduler is difficult. Challenges include: .. index:: - single: pe_working_set_t + single: pcmk_scheduler_t Cluster Working Set ___________________ -The main data object for the scheduler is ``pe_working_set_t``, which contains +The main data object for the scheduler is ``pcmk_scheduler_t``, which contains all information needed about nodes, resources, constraints, etc., both as the raw CIB XML and parsed into more usable data structures, plus the resulting -transition graph XML. The variable name is usually ``data_set``. +transition graph XML. The variable name is usually ``scheduler``. .. index:: - single: pe_resource_t + single: pcmk_resource_t Resources _________ -``pe_resource_t`` is the data object representing cluster resources. A resource -has a variant: primitive (a.k.a. native), group, clone, or bundle. +``pcmk_resource_t`` is the data object representing cluster resources. A +resource has a variant: primitive (a.k.a. native), group, clone, or bundle. The resource object has members for two sets of methods, ``resource_object_functions_t`` from the ``libpe_status`` public API, and @@ -374,45 +374,45 @@ The resource object has members for two sets of methods, The object functions have basic capabilities such as unpacking the resource XML, and determining the current or planned location of the resource. -The allocation functions have more obscure capabilities needed for scheduling, +The assignment functions have more obscure capabilities needed for scheduling, such as processing location and ordering constraints. For example, ``pcmk__create_internal_constraints()`` simply calls the ``internal_constraints()`` method for each top-level resource in the cluster. .. index:: - single: pe_node_t + single: pcmk_node_t Nodes _____ -Allocation of resources to nodes is done by choosing the node with the highest +Assignment of resources to nodes is done by choosing the node with the highest score for a given resource. The scheduler does a bunch of processing to -generate the scores, then the actual allocation is straightforward. +generate the scores, then the actual assignment is straightforward. -Node lists are frequently used. For example, ``pe_working_set_t`` has a +Node lists are frequently used. For example, ``pcmk_scheduler_t`` has a ``nodes`` member which is a list of all nodes in the cluster, and -``pe_resource_t`` has a ``running_on`` member which is a list of all nodes on -which the resource is (or might be) active. These are lists of ``pe_node_t`` +``pcmk_resource_t`` has a ``running_on`` member which is a list of all nodes on +which the resource is (or might be) active. These are lists of ``pcmk_node_t`` objects. -The ``pe_node_t`` object contains a ``struct pe_node_shared_s *details`` member -with all node information that is independent of resource allocation (the node -name, etc.). +The ``pcmk_node_t`` object contains a ``struct pe_node_shared_s *details`` +member with all node information that is independent of resource assignment +(the node name, etc.). The working set's ``nodes`` member contains the original of this information. -All other node lists contain copies of ``pe_node_t`` where only the ``details`` -member points to the originals in the working set's ``nodes`` list. In this -way, the other members of ``pe_node_t`` (such as ``weight``, which is the node -score) may vary by node list, while the common details are shared. +All other node lists contain copies of ``pcmk_node_t`` where only the +``details`` member points to the originals in the working set's ``nodes`` list. +In this way, the other members of ``pcmk_node_t`` (such as ``weight``, which is +the node score) may vary by node list, while the common details are shared. .. index:: - single: pe_action_t + single: pcmk_action_t single: pe_action_flags Actions _______ -``pe_action_t`` is the data object representing actions that might need to be +``pcmk_action_t`` is the data object representing actions that might need to be taken. These could be resource actions, cluster-wide actions such as fencing a node, or "pseudo-actions" which are abstractions used as convenient points for ordering other actions against. @@ -443,7 +443,7 @@ Colocation constraints come into play in these parts of the scheduler code: * When choosing roles for promotable clone instances, so colocations involving a specific role can affect which instances are promoted -The resource allocation functions have several methods related to colocations: +The resource assignment functions have several methods related to colocations: * ``apply_coloc_score():`` This applies a colocation's score to either the dependent's allowed node scores (if called while resources are being diff --git a/doc/sphinx/Pacemaker_Development/helpers.rst b/doc/sphinx/Pacemaker_Development/helpers.rst index 3fcb48d..6bd1926 100644 --- a/doc/sphinx/Pacemaker_Development/helpers.rst +++ b/doc/sphinx/Pacemaker_Development/helpers.rst @@ -476,14 +476,13 @@ The Pacemaker build process uses ``lcov`` and special make targets to generate an HTML coverage report that can be inspected with any web browser. To start, you'll need to install the ``lcov`` package which is included in most -distributions. Next, reconfigure and rebuild the source tree: +distributions. Next, reconfigure the source tree: .. code-block:: none $ ./configure --with-coverage - $ make -Then simply run ``make coverage``. This will do the same thing as ``make check``, +Then run ``make -C devel coverage``. This will do the same thing as ``make check``, but will generate a bunch of intermediate files as part of the compiler's output. Essentially, the coverage tools run all the unit tests and make a note if a given line if code is executed as a part of some test program. This will include not diff --git a/doc/sphinx/Pacemaker_Explained/acls.rst b/doc/sphinx/Pacemaker_Explained/acls.rst index 67d5d15..c3de39d 100644 --- a/doc/sphinx/Pacemaker_Explained/acls.rst +++ b/doc/sphinx/Pacemaker_Explained/acls.rst @@ -6,9 +6,9 @@ Access Control Lists (ACLs) --------------------------- -By default, the ``root`` user or any user in the ``haclient`` group can modify -Pacemaker's CIB without restriction. Pacemaker offers *access control lists -(ACLs)* to provide more fine-grained authorization. +By default, the ``root`` user or any user in the |CRM_DAEMON_GROUP| group can +modify Pacemaker's CIB without restriction. Pacemaker offers *access control +lists (ACLs)* to provide more fine-grained authorization. .. important:: @@ -24,7 +24,7 @@ In order to use ACLs: * The ``enable-acl`` :ref:`cluster option <cluster_options>` must be set to true. -* Desired users must have user accounts in the ``haclient`` group on all +* Desired users must have user accounts in the |CRM_DAEMON_GROUP| group on all cluster nodes in the cluster. * If your CIB was created before Pacemaker 1.1.12, it might need to be updated @@ -275,9 +275,9 @@ elements. .. important:: - The ``root`` and ``hacluster`` user accounts always have full access to the - CIB, regardless of ACLs. For all other user accounts, when ``enable-acl`` is - true, permission to all parts of the CIB is denied by default (permissions + The ``root`` and |CRM_DAEMON_USER| user accounts always have full access to + the CIB, regardless of ACLs. For all other user accounts, when ``enable-acl`` + is true, permission to all parts of the CIB is denied by default (permissions must be explicitly granted). ACL Examples @@ -436,8 +436,8 @@ the CIB, such as ``crm_attribute`` when managing permanent node attributes, ``crm_mon``, and ``cibadmin``. However, command-line tools that communicate directly with Pacemaker daemons -via IPC are not affected by ACLs. For example, users in the ``haclient`` group -may still do the following, regardless of ACLs: +via IPC are not affected by ACLs. For example, users in the |CRM_DAEMON_GROUP| +group may still do the following, regardless of ACLs: * Query transient node attribute values using ``crm_attribute`` and ``attrd_updater``. diff --git a/doc/sphinx/Pacemaker_Explained/cluster-options.rst b/doc/sphinx/Pacemaker_Explained/cluster-options.rst new file mode 100644 index 0000000..77bd7e6 --- /dev/null +++ b/doc/sphinx/Pacemaker_Explained/cluster-options.rst @@ -0,0 +1,921 @@ +Cluster-Wide Configuration +-------------------------- + +.. index:: + pair: XML element; cib + pair: XML element; configuration + +Configuration Layout +#################### + +The cluster is defined by the Cluster Information Base (CIB), which uses XML +notation. The simplest CIB, an empty one, looks like this: + +.. topic:: An empty configuration + + .. code-block:: xml + + <cib crm_feature_set="3.6.0" validate-with="pacemaker-3.5" epoch="1" num_updates="0" admin_epoch="0"> + <configuration> + <crm_config/> + <nodes/> + <resources/> + <constraints/> + </configuration> + <status/> + </cib> + +The empty configuration above contains the major sections that make up a CIB: + +* ``cib``: The entire CIB is enclosed with a ``cib`` element. Certain + fundamental settings are defined as attributes of this element. + + * ``configuration``: This section -- the primary focus of this document -- + contains traditional configuration information such as what resources the + cluster serves and the relationships among them. + + * ``crm_config``: cluster-wide configuration options + + * ``nodes``: the machines that host the cluster + + * ``resources``: the services run by the cluster + + * ``constraints``: indications of how resources should be placed + + * ``status``: This section contains the history of each resource on each + node. Based on this data, the cluster can construct the complete current + state of the cluster. The authoritative source for this section is the + local executor (pacemaker-execd process) on each cluster node, and the + cluster will occasionally repopulate the entire section. For this reason, + it is never written to disk, and administrators are advised against + modifying it in any way. + +In this document, configuration settings will be described as properties or +options based on how they are defined in the CIB: + +* Properties are XML attributes of an XML element. + +* Options are name-value pairs expressed as ``nvpair`` child elements of an XML + element. + +Normally, you will use command-line tools that abstract the XML, so the +distinction will be unimportant; both properties and options are cluster +settings you can tweak. + +Configuration Value Types +######################### + +Throughout this document, configuration values will be designated as having one +of the following types: + +.. list-table:: **Configuration Value Types** + :class: longtable + :widths: 1 3 + :header-rows: 1 + + * - Type + - Description + * - .. _boolean: + + .. index:: + pair: type; boolean + + boolean + - Case-insensitive text value where ``1``, ``yes``, ``y``, ``on``, + and ``true`` evaluate as true and ``0``, ``no``, ``n``, ``off``, + ``false``, and unset evaluate as false + * - .. _date_time: + + .. index:: + pair: type; date/time + + date/time + - Textual timestamp like ``Sat Dec 21 11:47:45 2013`` + * - .. _duration: + + .. index:: + pair: type; duration + + duration + - A time duration, specified either like a :ref:`timeout <timeout>` or an + `ISO 8601 duration <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_. + A duration may be up to approximately 49 days but is intended for much + smaller time periods. + * - .. _enumeration: + + .. index:: + pair: type; enumeration + + enumeration + - Text that must be one of a set of defined values (which will be listed + in the description) + * - .. _integer: + + .. index:: + pair: type; integer + + integer + - 32-bit signed integer value (-2,147,483,648 to 2,147,483,647) + * - .. _nonnegative_integer: + + .. index:: + pair: type; nonnegative integer + + nonnegative integer + - 32-bit nonnegative integer value (0 to 2,147,483,647) + * - .. _port: + + .. index:: + pair: type; port + + port + - Integer TCP port number (0 to 65535) + * - .. _score: + + .. index:: + pair: type; score + + score + - A Pacemaker score can be an integer between -1,000,000 and 1,000,000, or + a string alias: ``INFINITY`` or ``+INFINITY`` is equivalent to + 1,000,000, ``-INFINITY`` is equivalent to -1,000,000, and ``red``, + ``yellow``, and ``green`` are equivalent to integers as described in + :ref:`node-health`. + * - .. _text: + + .. index:: + pair: type; text + + text + - A text string + * - .. _timeout: + + .. index:: + pair: type; timeout + + timeout + - A time duration, specified as a bare number (in which case it is + considered to be in seconds) or a number with a unit (``ms`` or ``msec`` + for milliseconds, ``us`` or ``usec`` for microseconds, ``s`` or ``sec`` + for seconds, ``m`` or ``min`` for minutes, ``h`` or ``hr`` for hours) + optionally with whitespace before and/or after the number. + * - .. _version: + + .. index:: + pair: type; version + + version + - Version number (any combination of alphanumeric characters, dots, and + dashes, starting with a number). + + +Scores +______ + +Scores are integral to how Pacemaker works. Practically everything from moving +a resource to deciding which resource to stop in a degraded cluster is achieved +by manipulating scores in some way. + +Scores are calculated per resource and node. Any node with a negative score for +a resource can't run that resource. The cluster places a resource on the node +with the highest score for it. + +Score addition and subtraction follow these rules: + +* Any value (including ``INFINITY``) - ``INFINITY`` = ``-INFINITY`` +* ``INFINITY`` + any value other than ``-INFINITY`` = ``INFINITY`` + +.. note:: + + What if you want to use a score higher than 1,000,000? Typically this possibility + arises when someone wants to base the score on some external metric that might + go above 1,000,000. + + The short answer is you can't. + + The long answer is it is sometimes possible work around this limitation + creatively. You may be able to set the score to some computed value based on + the external metric rather than use the metric directly. For nodes, you can + store the metric as a node attribute, and query the attribute when computing + the score (possibly as part of a custom resource agent). + + +CIB Properties +############## + +Certain settings are defined by CIB properties (that is, attributes of the +``cib`` tag) rather than with the rest of the cluster configuration in the +``configuration`` section. + +The reason is simply a matter of parsing. These options are used by the +configuration database which is, by design, mostly ignorant of the content it +holds. So the decision was made to place them in an easy-to-find location. + +.. list-table:: **CIB Properties** + :class: longtable + :widths: 2 2 2 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _admin_epoch: + + .. index:: + pair: admin_epoch; cib + + admin_epoch + - :ref:`nonnegative integer <nonnegative_integer>` + - 0 + - When a node joins the cluster, the cluster asks the node with the + highest (``admin_epoch``, ``epoch``, ``num_updates``) tuple to replace + the configuration on all the nodes -- which makes setting them correctly + very important. ``admin_epoch`` is never modified by the cluster; you + can use this to make the configurations on any inactive nodes obsolete. + * - .. _epoch: + + .. index:: + pair: epoch; cib + + epoch + - :ref:`nonnegative integer <nonnegative_integer>` + - 0 + - The cluster increments this every time the CIB's configuration section + is updated. + * - .. _num_updates: + + .. index:: + pair: num_updates; cib + + num_updates + - :ref:`nonnegative integer <nonnegative_integer>` + - 0 + - The cluster increments this every time the CIB's configuration or status + sections are updated, and resets it to 0 when epoch changes. + * - .. _validate_with: + + .. index:: + pair: validate-with; cib + + validate-with + - :ref:`enumeration <enumeration>` + - + - Determines the type of XML validation that will be done on the + configuration. Allowed values are ``none`` (in which case the cluster + will not require that updates conform to expected syntax) and the base + names of schema files installed on the local machine (for example, + "pacemaker-3.9") + * - .. _remote_tls_port: + + .. index:: + pair: remote-tls-port; cib + + remote-tls-port + - :ref:`port <port>` + - + - If set, the CIB manager will listen for anonymously encrypted remote + connections on this port, to allow CIB administration from hosts not in + the cluster. No key is used, so this should be used only on a protected + network where man-in-the-middle attacks can be avoided. + * - .. _remote_clear_port: + + .. index:: + pair: remote-clear-port; cib + + remote-clear-port + - :ref:`port <port>` + - + - If set to a TCP port number, the CIB manager will listen for remote + connections on this port, to allow for CIB administration from hosts not + in the cluster. No encryption is used, so this should be used only on a + protected network. + * - .. _cib_last_written: + + .. index:: + pair: cib-last-written; cib + + cib-last-written + - :ref:`date/time <date_time>` + - + - Indicates when the configuration was last written to disk. Maintained by + the cluster; for informational purposes only. + * - .. _have_quorum: + + .. index:: + pair: have-quorum; cib + + have-quorum + - :ref:`boolean <boolean>` + - + - Indicates whether the cluster has quorum. If false, the cluster's + response is determined by ``no-quorum-policy`` (see below). Maintained + by the cluster. + * - .. _dc_uuid: + + .. index:: + pair: dc-uuid; cib + + dc-uuid + - :ref:`text <text>` + - + - Node ID of the cluster's current designated controller (DC). Used and + maintained by the cluster. + + +.. _cluster_options: + +Cluster Options +############### + +Cluster options, as you might expect, control how the cluster behaves when +confronted with various situations. + +They are grouped into sets within the ``crm_config`` section. In advanced +configurations, there may be more than one set. (This will be described later +in the chapter on :ref:`rules` where we will show how to have the cluster use +different sets of options during working hours than during weekends.) For now, +we will describe the simple case where each option is present at most once. + +You can obtain an up-to-date list of cluster options, including their default +values, by running the ``man pacemaker-schedulerd`` and +``man pacemaker-controld`` commands. + +.. list-table:: **Cluster Options** + :class: longtable + :widths: 2 2 2 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _cluster_name: + + .. index:: + pair: cluster option; cluster-name + + cluster-name + - :ref:`text <text>` + - + - An (optional) name for the cluster as a whole. This is mostly for users' + convenience for use as desired in administration, but can be used in the + Pacemaker configuration in :ref:`rules` (as the ``#cluster-name`` + :ref:`node attribute <node-attribute-expressions-special>`). It may also + be used by higher-level tools when displaying cluster information, and + by certain resource agents (for example, the ``ocf:heartbeat:GFS2`` + agent stores the cluster name in filesystem meta-data). + * - .. _dc_version: + + .. index:: + pair: cluster option; dc-version + + dc-version + - :ref:`version <version>` + - *detected* + - Version of Pacemaker on the cluster's designated controller (DC). + Maintained by the cluster, and intended for diagnostic purposes. + * - .. _cluster_infrastructure: + + .. index:: + pair: cluster option; cluster-infrastructure + + cluster-infrastructure + - :ref:`text <text>` + - *detected* + - The messaging layer with which Pacemaker is currently running. + Maintained by the cluster, and intended for informational and diagnostic + purposes. + * - .. _no_quorum_policy: + + .. index:: + pair: cluster option; no-quorum-policy + + no-quorum-policy + - :ref:`enumeration <enumeration>` + - stop + - What to do when the cluster does not have quorum. Allowed values: + + * ``ignore:`` continue all resource management + * ``freeze:`` continue resource management, but don't recover resources + from nodes not in the affected partition + * ``stop:`` stop all resources in the affected cluster partition + * ``demote:`` demote promotable resources and stop all other resources + in the affected cluster partition *(since 2.0.5)* + * ``suicide:`` fence all nodes in the affected cluster partition + * - .. _batch_limit: + + .. index:: + pair: cluster option; batch-limit + + batch-limit + - :ref:`integer <integer>` + - 0 + - The maximum number of actions that the cluster may execute in parallel + across all nodes. The ideal value will depend on the speed and load + of your network and cluster nodes. If zero, the cluster will impose a + dynamically calculated limit only when any node has high load. If -1, + the cluster will not impose any limit. + * - .. _migration_limit: + + .. index:: + pair: cluster option; migration-limit + + migration-limit + - :ref:`integer <integer>` + - -1 + - The number of :ref:`live migration <live-migration>` actions that the + cluster is allowed to execute in parallel on a node. A value of -1 means + unlimited. + * - .. _symmetric_cluster: + + .. index:: + pair: cluster option; symmetric-cluster + + symmetric-cluster + - :ref:`boolean <boolean>` + - true + - If true, resources can run on any node by default. If false, a resource + is allowed to run on a node only if a + :ref:`location constraint <location-constraint>` enables it. + * - .. _stop_all_resources: + + .. index:: + pair: cluster option; stop-all-resources + + stop-all-resources + - :ref:`boolean <boolean>` + - false + - Whether all resources should be disallowed from running (can be useful + during maintenance or troubleshooting) + * - .. _stop_orphan_resources: + + .. index:: + pair: cluster option; stop-orphan-resources + + stop-orphan-resources + - :ref:`boolean <boolean>` + - true + - Whether resources that have been deleted from the configuration should + be stopped. This value takes precedence over + :ref:`is-managed <is_managed>` (that is, even unmanaged resources will + be stopped when orphaned if this value is ``true``). + * - .. _stop_orphan_actions: + + .. index:: + pair: cluster option; stop-orphan-actions + + stop-orphan-actions + - :ref:`boolean <boolean>` + - true + - Whether recurring :ref:`operations <operation>` that have been deleted + from the configuration should be cancelled + * - .. _start_failure_is_fatal: + + .. index:: + pair: cluster option; start-failure-is-fatal + + start-failure-is-fatal + - :ref:`boolean <boolean>` + - true + - Whether a failure to start a resource on a particular node prevents + further start attempts on that node. If ``false``, the cluster will + decide whether the node is still eligible based on the resource's + current failure count and ``migration-threshold``. + * - .. _enable_startup_probes: + + .. index:: + pair: cluster option; enable-startup-probes + + enable-startup-probes + - :ref:`boolean <boolean>` + - true + - Whether the cluster should check the pre-existing state of resources + when the cluster starts + * - .. _maintenance_mode: + + .. index:: + pair: cluster option; maintenance-mode + + maintenance-mode + - :ref:`boolean <boolean>` + - false + - If true, the cluster will not start or stop any resource in the cluster, + and any recurring operations (expect those specifying ``role`` as + ``Stopped``) will be paused. If true, this overrides the + :ref:`maintenance <node_maintenance>` node attribute, + :ref:`is-managed <is_managed>` and :ref:`maintenance <rsc_maintenance>` + resource meta-attributes, and :ref:`enabled <op_enabled>` operation + meta-attribute. + * - .. _stonith_enabled: + + .. index:: + pair: cluster option; stonith-enabled + + stonith-enabled + - :ref:`boolean <boolean>` + - true + - Whether the cluster is allowed to fence nodes (for example, failed nodes + and nodes with resources that can't be stopped). + + If true, at least one fence device must be configured before resources + are allowed to run. + + If false, unresponsive nodes are immediately assumed to be running no + resources, and resource recovery on online nodes starts without any + further protection (which can mean *data loss* if the unresponsive node + still accesses shared storage, for example). See also the + :ref:`requires <requires>` resource meta-attribute. + * - .. _stonith_action: + + .. index:: + pair: cluster option; stonith-action + + stonith-action + - :ref:`enumeration <enumeration>` + - reboot + - Action the cluster should send to the fence agent when a node must be + fenced. Allowed values are ``reboot``, ``off``, and (for legacy agents + only) ``poweroff``. + * - .. _stonith_timeout: + + .. index:: + pair: cluster option; stonith-timeout + + stonith-timeout + - :ref:`duration <duration>` + - 60s + - How long to wait for ``on``, ``off``, and ``reboot`` fence actions to + complete by default. + * - .. _stonith_max_attempts: + + .. index:: + pair: cluster option; stonith-max-attempts + + stonith-max-attempts + - :ref:`score <score>` + - 10 + - How many times fencing can fail for a target before the cluster will no + longer immediately re-attempt it. Any value below 1 will be ignored, and + the default will be used instead. + * - .. _stonith_watchdog_timeout: + + .. index:: + pair: cluster option; stonith-watchdog-timeout + + stonith-watchdog-timeout + - :ref:`timeout <timeout>` + - 0 + - If nonzero, and the cluster detects ``have-watchdog`` as ``true``, then + watchdog-based self-fencing will be performed via SBD when fencing is + required, without requiring a fencing resource explicitly configured. + + If this is set to a positive value, unseen nodes are assumed to + self-fence within this much time. + + **Warning:** It must be ensured that this value is larger than the + ``SBD_WATCHDOG_TIMEOUT`` environment variable on all nodes. Pacemaker + verifies the settings individually on all nodes and prevents startup or + shuts down if configured wrongly on the fly. It is strongly recommended + that ``SBD_WATCHDOG_TIMEOUT`` be set to the same value on all nodes. + + If this is set to a negative value, and ``SBD_WATCHDOG_TIMEOUT`` is set, + twice that value will be used. + + **Warning:** In this case, it is essential (and currently not verified + by pacemaker) that ``SBD_WATCHDOG_TIMEOUT`` is set to the same value on + all nodes. + * - .. _concurrent-fencing: + + .. index:: + pair: cluster option; concurrent-fencing + + concurrent-fencing + - :ref:`boolean <boolean>` + - false + - Whether the cluster is allowed to initiate multiple fence actions + concurrently. Fence actions initiated externally, such as via the + ``stonith_admin`` tool or an application such as DLM, or by the fencer + itself such as recurring device monitors and ``status`` and ``list`` + commands, are not limited by this option. + * - .. _fence_reaction: + + .. index:: + pair: cluster option; fence-reaction + + fence-reaction + - :ref:`enumeration <enumeration>` + - stop + - How should a cluster node react if notified of its own fencing? A + cluster node may receive notification of its own fencing if fencing is + misconfigured, or if fabric fencing is in use that doesn't cut cluster + communication. Allowed values are ``stop`` to attempt to immediately + stop Pacemaker and stay stopped, or ``panic`` to attempt to immediately + reboot the local node, falling back to stop on failure. The default is + likely to be changed to ``panic`` in a future release. *(since 2.0.3)* + * - .. _priority_fencing_delay: + + .. index:: + pair: cluster option; priority-fencing-delay + + priority-fencing-delay + - :ref:`duration <duration>` + - 0 + - Apply this delay to any fencing targeting the lost nodes with the + highest total resource priority in case we don't have the majority of + the nodes in our cluster partition, so that the more significant nodes + potentially win any fencing match (especially meaningful in a + split-brain of a 2-node cluster). A promoted resource instance takes the + resource's priority plus 1 if the resource's priority is not 0. Any + static or random delays introduced by ``pcmk_delay_base`` and + ``pcmk_delay_max`` configured for the corresponding fencing resources + will be added to this delay. This delay should be significantly greater + than (safely twice) the maximum delay from those parameters. *(since + 2.0.4)* + * - .. _node_pending_timeout: + + .. index:: + pair: cluster option; node-pending-timeout + + node-pending-timeout + - :ref:`duration <duration>` + - 0 + - Fence nodes that do not join the controller process group within this + much time after joining the cluster, to allow the cluster to continue + managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. + *(since 2.1.7)* + * - .. _cluster_delay: + + .. index:: + pair: cluster option; cluster-delay + + cluster-delay + - :ref:`duration <duration>` + - 60s + - If the DC requires an action to be executed on another node, it will + consider the action failed if it does not get a response from the other + node within this time (beyond the action's own timeout). The ideal value + will depend on the speed and load of your network and cluster nodes. + * - .. _dc_deadtime: + + .. index:: + pair: cluster option; dc-deadtime + + dc-deadtime + - :ref:`duration <duration>` + - 20s + - How long to wait for a response from other nodes when electing a DC. The + ideal value will depend on the speed and load of your network and + cluster nodes. + * - .. _cluster_ipc_limit: + + .. index:: + pair: cluster option; cluster-ipc-limit + + cluster-ipc-limit + - :ref:`nonnegative integer <nonnegative_integer>` + - 500 + - The maximum IPC message backlog before one cluster daemon will + disconnect another. This is of use in large clusters, for which a good + value is the number of resources in the cluster multiplied by the number + of nodes. The default of 500 is also the minimum. Raise this if you see + "Evicting client" log messages for cluster daemon process IDs. + * - .. _pe_error_series_max: + + .. index:: + pair: cluster option; pe-error-series-max + + pe-error-series-max + - :ref:`integer <integer>` + - -1 + - The number of scheduler inputs resulting in errors to save. These inputs + can be helpful during troubleshooting and when reporting issues. A + negative value means save all inputs, and 0 means save none. + * - .. _pe_warn_series_max: + + .. index:: + pair: cluster option; pe-warn-series-max + + pe-warn-series-max + - :ref:`integer <integer>` + - 5000 + - The number of scheduler inputs resulting in warnings to save. These + inputs can be helpful during troubleshooting and when reporting issues. + A negative value means save all inputs, and 0 means save none. + * - .. _pe_input_series_max: + + .. index:: + pair: cluster option; pe-input-series-max + + pe-input-series-max + - :ref:`integer <integer>` + - 4000 + - The number of "normal" scheduler inputs to save. These inputs can be + helpful during troubleshooting and when reporting issues. A negative + value means save all inputs, and 0 means save none. + * - .. _enable_acl: + + .. index:: + pair: cluster option; enable-acl + + enable-acl + - :ref:`boolean <boolean>` + - false + - Whether :ref:`access control lists <acl>` should be used to authorize + CIB modifications + * - .. _placement_strategy: + + .. index:: + pair: cluster option; placement-strategy + + placement-strategy + - :ref:`enumeration <enumeration>` + - default + - How the cluster should assign resources to nodes (see + :ref:`utilization`). Allowed values are ``default``, ``utilization``, + ``balanced``, and ``minimal``. + * - .. _node_health_strategy: + + .. index:: + pair: cluster option; node-health-strategy + + node-health-strategy + - :ref:`enumeration <enumeration>` + - none + - How the cluster should react to :ref:`node health <node-health>` + attributes. Allowed values are ``none``, ``migrate-on-red``, + ``only-green``, ``progressive``, and ``custom``. + * - .. _node_health_base: + + .. index:: + pair: cluster option; node-health-base + + node-health-base + - :ref:`score <score>` + - 0 + - The base health score assigned to a node. Only used when + ``node-health-strategy`` is ``progressive``. + * - .. _node_health_green: + + .. index:: + pair: cluster option; node-health-green + + node-health-green + - :ref:`score <score>` + - 0 + - The score to use for a node health attribute whose value is ``green``. + Only used when ``node-health-strategy`` is ``progressive`` or + ``custom``. + * - .. _node_health_yellow: + + .. index:: + pair: cluster option; node-health-yellow + + node-health-yellow + - :ref:`score <score>` + - 0 + - The score to use for a node health attribute whose value is ``yellow``. + Only used when ``node-health-strategy`` is ``progressive`` or + ``custom``. + * - .. _node_health_red: + + .. index:: + pair: cluster option; node-health-red + + node-health-red + - :ref:`score <score>` + - 0 + - The score to use for a node health attribute whose value is ``red``. + Only used when ``node-health-strategy`` is ``progressive`` or + ``custom``. + * - .. _cluster_recheck_interval: + + .. index:: + pair: cluster option; cluster-recheck-interval + + cluster-recheck-interval + - :ref:`duration <duration>` + - 15min + - Pacemaker is primarily event-driven, and looks ahead to know when to + recheck the cluster for failure timeouts and most time-based rules + *(since 2.0.3)*. However, it will also recheck the cluster after this + amount of inactivity. This has two goals: rules with ``date_spec`` are + only guaranteed to be checked this often, and it also serves as a + fail-safe for some kinds of scheduler bugs. A value of 0 disables this + polling. + * - .. _shutdown_lock: + + .. index:: + pair: cluster option; shutdown-lock + + shutdown-lock + - :ref:`boolean <boolean>` + - false + - The default of false allows active resources to be recovered elsewhere + when their node is cleanly shut down, which is what the vast majority of + users will want. However, some users prefer to make resources highly + available only for failures, with no recovery for clean shutdowns. If + this option is true, resources active on a node when it is cleanly shut + down are kept "locked" to that node (not allowed to run elsewhere) until + they start again on that node after it rejoins (or for at most + ``shutdown-lock-limit``, if set). Stonith resources and Pacemaker Remote + connections are never locked. Clone and bundle instances and the + promoted role of promotable clones are currently never locked, though + support could be added in a future release. Locks may be manually + cleared using the ``--refresh`` option of ``crm_resource`` (both the + resource and node must be specified; this works with remote nodes if + their connection resource's ``target-role`` is set to ``Stopped``, but + not if Pacemaker Remote is stopped on the remote node without disabling + the connection resource). *(since 2.0.4)* + * - .. _shutdown_lock_limit: + + .. index:: + pair: cluster option; shutdown-lock-limit + + shutdown-lock-limit + - :ref:`duration <duration>` + - 0 + - If ``shutdown-lock`` is true, and this is set to a nonzero time + duration, locked resources will be allowed to start after this much time + has passed since the node shutdown was initiated, even if the node has + not rejoined. (This works with remote nodes only if their connection + resource's ``target-role`` is set to ``Stopped``.) *(since 2.0.4)* + * - .. _remove_after_stop: + + .. index:: + pair: cluster option; remove-after-stop + + remove-after-stop + - :ref:`boolean <boolean>` + - false + - *Deprecated* Whether the cluster should remove resources from + Pacemaker's executor after they are stopped. Values other than the + default are, at best, poorly tested and potentially dangerous. This + option is deprecated and will be removed in a future release. + * - .. _startup_fencing: + + .. index:: + pair: cluster option; startup-fencing + + startup-fencing + - :ref:`boolean <boolean>` + - true + - *Advanced Use Only:* Whether the cluster should fence unseen nodes at + start-up. Setting this to false is unsafe, because the unseen nodes + could be active and running resources but unreachable. ``dc-deadtime`` + acts as a grace period before this fencing, since a DC must be elected + to schedule fencing. + * - .. _election_timeout: + + .. index:: + pair: cluster option; election-timeout + + election-timeout + - :ref:`duration <duration>` + - 2min + - *Advanced Use Only:* If a winner is not declared within this much time + of starting an election, the node that initiated the election will + declare itself the winner. + * - .. _shutdown_escalation: + + .. index:: + pair: cluster option; shutdown-escalation + + shutdown-escalation + - :ref:`duration <duration>` + - 20min + - *Advanced Use Only:* The controller will exit immediately if a shutdown + does not complete within this much time. + * - .. _join_integration_timeout: + + .. index:: + pair: cluster option; join-integration-timeout + + join-integration-timeout + - :ref:`duration <duration>` + - 3min + - *Advanced Use Only:* If you need to adjust this value, it probably + indicates the presence of a bug. + * - .. _join_finalization_timeout: + + .. index:: + pair: cluster option; join-finalization-timeout + + join-finalization-timeout + - :ref:`duration <duration>` + - 30min + - *Advanced Use Only:* If you need to adjust this value, it probably + indicates the presence of a bug. + * - .. _transition_delay: + + .. index:: + pair: cluster option; transition-delay + + transition-delay + - :ref:`duration <duration>` + - 0s + - *Advanced Use Only:* Delay cluster recovery for the configured interval + to allow for additional or related events to occur. This can be useful + if your configuration is sensitive to the order in which ping updates + arrive. Enabling this option will slow down cluster recovery under all + conditions. diff --git a/doc/sphinx/Pacemaker_Explained/advanced-resources.rst b/doc/sphinx/Pacemaker_Explained/collective.rst index a61b76d..a4fa9dc 100644 --- a/doc/sphinx/Pacemaker_Explained/advanced-resources.rst +++ b/doc/sphinx/Pacemaker_Explained/collective.rst @@ -1,5 +1,13 @@ -Advanced Resource Types ------------------------ +.. index: + single: collective resource + single: resource; collective + +Collective Resources +-------------------- + +Pacemaker supports several types of *collective* resources, which consist of +multiple, related resource instances. + .. index: single: group resource @@ -540,11 +548,11 @@ been promoted before they can start. Clone Stickiness ________________ -To achieve a stable allocation pattern, clones are slightly sticky by -default. If no value for ``resource-stickiness`` is provided, the clone -will use a value of 1. Being a small value, it causes minimal -disturbance to the score calculations of other resources but is enough -to prevent Pacemaker from needlessly moving copies around the cluster. +To achieve stable assignments, clones are slightly sticky by default. If no +value for ``resource-stickiness`` is provided, the clone will use a value of 1. +Being a small value, it causes minimal disturbance to the score calculations of +other resources but is enough to prevent Pacemaker from needlessly moving +instances around the cluster. .. note:: diff --git a/doc/sphinx/Pacemaker_Explained/constraints.rst b/doc/sphinx/Pacemaker_Explained/constraints.rst index ab34c9f..a78d6c2 100644 --- a/doc/sphinx/Pacemaker_Explained/constraints.rst +++ b/doc/sphinx/Pacemaker_Explained/constraints.rst @@ -7,49 +7,6 @@ Resource Constraints -------------------- -.. index:: - single: resource; score - single: node; score - -Scores -###### - -Scores of all kinds are integral to how the cluster works. -Practically everything from moving a resource to deciding which -resource to stop in a degraded cluster is achieved by manipulating -scores in some way. - -Scores are calculated per resource and node. Any node with a -negative score for a resource can't run that resource. The cluster -places a resource on the node with the highest score for it. - -Infinity Math -_____________ - -Pacemaker implements **INFINITY** (or equivalently, **+INFINITY**) internally as a -score of 1,000,000. Addition and subtraction with it follow these three basic -rules: - -* Any value + **INFINITY** = **INFINITY** - -* Any value - **INFINITY** = -**INFINITY** - -* **INFINITY** - **INFINITY** = **-INFINITY** - -.. note:: - - What if you want to use a score higher than 1,000,000? Typically this possibility - arises when someone wants to base the score on some external metric that might - go above 1,000,000. - - The short answer is you can't. - - The long answer is it is sometimes possible work around this limitation - creatively. You may be able to set the score to some computed value based on - the external metric rather than use the metric directly. For nodes, you can - store the metric as a node attribute, and query the attribute when computing - the score (possibly as part of a custom resource agent). - .. _location-constraint: .. index:: @@ -434,6 +391,20 @@ Because the above example lets ``symmetrical`` default to TRUE, **Webserver** must be stopped before **Database** can be stopped, and **Webserver** should be stopped before **IP** if they both need to be stopped. +Symmetric and asymmetric ordering +_________________________________ + +A mandatory symmetric ordering of "start A then start B" implies not only that +the start actions must be ordered, but that B is not allowed to be active +unless A is active. For example, if the ordering is added to the configuration +when A is stopped (due to target-role, failure, etc.) and B is already active, +then B will be stopped. + +By contrast, asymmetric ordering of "start A then start B" means the stops can +occur in either order, which implies that B *can* remain active in the same +situation. + + .. index:: single: colocation single: constraint; colocation @@ -535,8 +506,8 @@ _____________________ | | | If ``rsc`` and ``with-rsc`` are specified, and ``rsc`` | | | | is a :ref:`promotable clone <s-resource-promotable>`, | | | | the constraint applies only to ``rsc`` instances in | - | | | this role. Allowed values: ``Started``, ``Promoted``, | - | | | ``Unpromoted``. For details, see | + | | | this role. Allowed values: ``Started``, ``Stopped``, | + | | | ``Promoted``, ``Unpromoted``. For details, see | | | | :ref:`promotable-clone-constraints`. | +----------------+----------------+--------------------------------------------------------+ | with-rsc-role | Started | .. index:: | @@ -548,8 +519,8 @@ _____________________ | | | ``with-rsc`` is a | | | | :ref:`promotable clone <s-resource-promotable>`, the | | | | constraint applies only to ``with-rsc`` instances in | - | | | this role. Allowed values: ``Started``, ``Promoted``, | - | | | ``Unpromoted``. For details, see | + | | | this role. Allowed values: ``Started``, ``Stopped``, | + | | | ``Promoted``, ``Unpromoted``. For details, see | | | | :ref:`promotable-clone-constraints`. | +----------------+----------------+--------------------------------------------------------+ | influence | value of | .. index:: | diff --git a/doc/sphinx/Pacemaker_Explained/index.rst b/doc/sphinx/Pacemaker_Explained/index.rst index de2ddd9..63387f3 100644 --- a/doc/sphinx/Pacemaker_Explained/index.rst +++ b/doc/sphinx/Pacemaker_Explained/index.rst @@ -18,15 +18,16 @@ Table of Contents :numbered: intro - options + local-options + cluster-options nodes resources + operations constraints fencing alerts rules - advanced-options - advanced-resources + collective reusing-configuration utilization acls diff --git a/doc/sphinx/Pacemaker_Explained/local-options.rst b/doc/sphinx/Pacemaker_Explained/local-options.rst new file mode 100644 index 0000000..91eda66 --- /dev/null +++ b/doc/sphinx/Pacemaker_Explained/local-options.rst @@ -0,0 +1,515 @@ +Host-Local Configuration +------------------------ + +.. index:: + pair: XML element; configuration + +.. note:: Directory and file paths below may differ on your system depending on + your Pacemaker build settings. Check your Pacemaker configuration + file to find the correct paths. + +Pacemaker supports several host-local configuration options. These options can +be configured on each node in the main Pacemaker configuration file +(|PCMK_CONFIG_FILE|) in the format ``<NAME>="<VALUE>"``. They work by setting +environment variables when Pacemaker daemons start up. + +.. list-table:: **Local Options** + :class: longtable + :widths: 2 2 2 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _pcmk_logfacility: + + .. index:: + pair: node option; PCMK_logfacility + + PCMK_logfacility + - :ref:`enumeration <enumeration>` + - daemon + - Enable logging via the system log or journal, using the specified log + facility. Messages sent here are of value to all Pacemaker + administrators. This can be disabled using ``none``, but that is not + recommended. Allowed values: + + * ``none`` + * ``daemon`` + * ``user`` + * ``local0`` + * ``local1`` + * ``local2`` + * ``local3`` + * ``local4`` + * ``local5`` + * ``local6`` + * ``local7`` + + * - .. _pcmk_logpriority: + + .. index:: + pair:: node option; PCMK_logpriority + + PCMK_logpriority + - :ref:`enumeration <enumeration>` + - notice + - Unless system logging is disabled using ``PCMK_logfacility=none``, + messages of the specified log severity and higher will be sent to the + system log. The default is appropriate for most installations. Allowed + values: + + * ``emerg`` + * ``alert`` + * ``crit`` + * ``error`` + * ``warning`` + * ``notice`` + * ``info`` + * ``debug`` + + * - .. _pcmk_logfile: + + .. index:: + pair:: node option; PCMK_logfile + + PCMK_logfile + - :ref:`text <text>` + - |PCMK_LOG_FILE| + - Unless set to ``none``, more detailed log messages will be sent to the + specified file (in addition to the system log, if enabled). These + messages may have extended information, and will include messages of info + severity. This log is of more use to developers and advanced system + administrators, and when reporting problems. + + * - .. _pcmk_logfile_mode: + + .. index:: + pair:: node option; PCMK_logfile_mode + + PCMK_logfile_mode + - :ref:`text <text>` + - 0660 + - Pacemaker will set the permissions on the detail log to this value (see + ``chmod(1)``). + + * - .. _pcmk_debug: + + .. index:: + pair:: node option; PCMK_debug + + PCMK_debug + - :ref:`enumeration <enumeration>` + - no + - Whether to send debug severity messages to the detail log. This may be + set for all subsystems (``yes`` or ``no``) or for specific (comma- + separated) subsystems. Allowed subsystems are: + + * ``pacemakerd`` + * ``pacemaker-attrd`` + * ``pacemaker-based`` + * ``pacemaker-controld`` + * ``pacemaker-execd`` + * ``pacemaker-fenced`` + * ``pacemaker-schedulerd`` + + Example: ``PCMK_debug="pacemakerd,pacemaker-execd"`` + + * - .. _pcmk_stderr: + + .. index:: + pair:: node option; PCMK_stderr + + PCMK_stderr + - :ref:`boolean <boolean>` + - no + - *Advanced Use Only:* Whether to send daemon log messages to stderr. This + would be useful only during troubleshooting, when starting Pacemaker + manually on the command line. + + Setting this option in the configuration file is pointless, since the + file is not read when starting Pacemaker manually. However, it can be set + directly as an environment variable on the command line. + + * - .. _pcmk_trace_functions: + + .. index:: + pair:: node option; PCMK_trace_functions + + PCMK_trace_functions + - :ref:`text <text>` + - + - *Advanced Use Only:* Send debug and trace severity messages from these + (comma-separated) source code functions to the detail log. + + Example: + ``PCMK_trace_functions="func1,func2"`` + + * - .. _pcmk_trace_files: + + .. index:: + pair:: node option; PCMK_trace_files + + PCMK_trace_files + - :ref:`text <text>` + - + - *Advanced Use Only:* Send debug and trace severity messages from all + functions in these (comma-separated) source file names to the detail log. + + Example: ``PCMK_trace_files="file1.c,file2.c"`` + + * - .. _pcmk_trace_formats: + + .. index:: + pair:: node option; PCMK_trace_formats + + PCMK_trace_formats + - :ref:`text <text>` + - + - *Advanced Use Only:* Send trace severity messages that are generated by + these (comma-separated) format strings in the source code to the detail + log. + + Example: ``PCMK_trace_formats="Error: %s (%d)"`` + + * - .. _pcmk_trace_tags: + + .. index:: + pair:: node option; PCMK_trace_tags + + PCMK_trace_tags + - :ref:`text <text>` + - + - *Advanced Use Only:* Send debug and trace severity messages related to + these (comma-separated) resource IDs to the detail log. + + Example: ``PCMK_trace_tags="client-ip,dbfs"`` + + * - .. _pcmk_blackbox: + + .. index:: + pair:: node option; PCMK_blackbox + + PCMK_blackbox + - :ref:`enumeration <enumeration>` + - no + - *Advanced Use Only:* Enable blackbox logging globally (``yes`` or ``no``) + or by subsystem. A blackbox contains a rolling buffer of all logs (of all + severities). Blackboxes are stored under |CRM_BLACKBOX_DIR| by default, + by default, and their contents can be viewed using the ``qb-blackbox(8)`` + command. + + The blackbox recorder can be enabled at start using this variable, or at + runtime by sending a Pacemaker subsystem daemon process a ``SIGUSR1`` or + ``SIGTRAP`` signal, and disabled by sending ``SIGUSR2`` (see + ``kill(1)``). The blackbox will be written after a crash, assertion + failure, or ``SIGTRAP`` signal. + + See :ref:`PCMK_debug <pcmk_debug>` for allowed subsystems. + + Example: + ``PCMK_blackbox="pacemakerd,pacemaker-execd"`` + + * - .. _pcmk_trace_blackbox: + + .. index:: + pair:: node option; PCMK_trace_blackbox + + PCMK_trace_blackbox + - :ref:`enumeration <enumeration>` + - + - *Advanced Use Only:* Write a blackbox whenever the message at the + specified function and line is logged. Multiple entries may be comma- + separated. + + Example: ``PCMK_trace_blackbox="remote.c:144,remote.c:149"`` + + * - .. _pcmk_node_start_state: + + .. index:: + pair:: node option; PCMK_node_start_state + + PCMK_node_start_state + - :ref:`enumeration <enumeration>` + - default + - By default, the local host will join the cluster in an online or standby + state when Pacemaker first starts depending on whether it was previously + put into standby mode. If this variable is set to ``standby`` or + ``online``, it will force the local host to join in the specified state. + + * - .. _pcmk_node_action_limit: + + .. index:: + pair:: node option; PCMK_node_action_limit + + PCMK_node_action_limit + - :ref:`nonnegative integer <nonnegative_integer>` + - + - Specify the maximum number of jobs that can be scheduled on this node. If + set, this overrides the ``node-action-limit`` cluster property for this + node. + + * - .. _pcmk_shutdown_delay: + + .. index:: + pair:: node option; PCMK_shutdown_delay + + PCMK_shutdown_delay + - :ref:`timeout <timeout>` + - + - Specify a delay before shutting down ``pacemakerd`` after shutting down + all other Pacemaker daemons. + + * - .. _pcmk_fail_fast: + + .. index:: + pair:: node option; PCMK_fail_fast + + PCMK_fail_fast + - :ref:`boolean <boolean>` + - no + - By default, if a Pacemaker subsystem crashes, the main ``pacemakerd`` + process will attempt to restart it. If this variable is set to ``yes``, + ``pacemakerd`` will panic the local host instead. + + * - .. _pcmk_panic_action: + + .. index:: + pair:: node option; PCMK_panic_action + + PCMK_panic_action + - :ref:`enumeration <enumeration>` + - reboot + - Pacemaker will panic the local host under certain conditions. By default, + this means rebooting the host. This variable can change that behavior: if + ``crash``, trigger a kernel crash (useful if you want a kernel dump to + investigate); if ``sync-reboot`` or ``sync-crash``, synchronize + filesystems before rebooting the host or triggering a kernel crash. The + sync values are more likely to preserve log messages, but with the risk + that the host may be left active if the synchronization hangs. + + * - .. _pcmk_authkey_location: + + .. index:: + pair:: node option; PCMK_authkey_location + + PCMK_authkey_location + - :ref:`text <text>` + - |PCMK_AUTHKEY_FILE| + - Use the contents of this file as the authorization key to use with + Pacemaker Remote connections. This file must be readable by Pacemaker + daemons (that is, it must allow read permissions to either the + |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group), and its contents + must be identical on all nodes. + + * - .. _pcmk_remote_address: + + .. index:: + pair:: node option; PCMK_remote_address + + PCMK_remote_address + - :ref:`text <text>` + - + - By default, if the Pacemaker Remote service is run on the local node, it + will listen for connections on all IP addresses. This may be set to one + address to listen on instead, as a resolvable hostname or as a numeric + IPv4 or IPv6 address. When resolving names or listening on all addresses, + IPv6 will be preferred if available. When listening on an IPv6 address, + IPv4 clients will be supported via IPv4-mapped IPv6 addresses. + + Example: ``PCMK_remote_address="192.0.2.1"`` + + * - .. _pcmk_remote_port: + + .. index:: + pair:: node option; PCMK_remote_port + + PCMK_remote_port + - :ref:`port <port>` + - 3121 + - Use this TCP port number for Pacemaker Remote node connections. This + value must be the same on all nodes. + + * - .. _pcmk_remote_pid1: + + .. index:: + pair:: node option; PCMK_remote_pid1 + + PCMK_remote_pid1 + - :ref:`enumeration <enumeration>` + - default + - *Advanced Use Only:* When a bundle resource's ``run-command`` option is + left to default, Pacemaker Remote runs as PID 1 in the bundle's + containers. When it does so, it loads environment variables from the + container's |PCMK_INIT_ENV_FILE| and performs the PID 1 responsibility of + reaping dead subprocesses. + + This option controls whether those actions are performed when Pacemaker + Remote is not running as PID 1. It is intended primarily for developer + testing but can be useful when ``run-command`` is set to a separate, + custom PID 1 process that launches Pacemaker Remote. + + * ``full``: Pacemaker Remote loads environment variables from + |PCMK_INIT_ENV_FILE| and reaps dead subprocesses. + * ``vars``: Pacemaker Remote loads environment variables from + |PCMK_INIT_ENV_FILE| but does not reap dead subprocesses. + * ``default``: Pacemaker Remote performs neither action. + + If Pacemaker Remote is running as PID 1, this option is ignored, and the + behavior is the same as for ``full``. + + * - .. _pcmk_tls_priorities: + + .. index:: + pair:: node option; PCMK_tls_priorities + + PCMK_tls_priorities + - :ref:`text <text>` + - |PCMK_GNUTLS_PRIORITIES| + - *Advanced Use Only:* These GnuTLS cipher priorities will be used for TLS + connections (whether for Pacemaker Remote connections or remote CIB + access, when enabled). See: + + https://gnutls.org/manual/html_node/Priority-Strings.html + + Pacemaker will append ``":+ANON-DH"`` for remote CIB access and + ``":+DHE-PSK:+PSK"`` for Pacemaker Remote connections, as they are + required for the respective functionality. + + Example: + ``PCMK_tls_priorities="SECURE128:+SECURE192"`` + + * - .. _pcmk_dh_min_bits: + + .. index:: + pair:: node option; PCMK_dh_min_bits + + PCMK_dh_min_bits + - :ref:`nonnegative integer <nonnegative_integer>` + - 0 (no minimum) + - *Advanced Use Only:* Set a lower bound on the bit length of the prime + number generated for Diffie-Hellman parameters needed by TLS connections. + The default is no minimum. + + The server (Pacemaker Remote daemon, or CIB manager configured to accept + remote clients) will use this value to provide a floor for the value + recommended by the GnuTLS library. The library will only accept a limited + number of specific values, which vary by library version, so setting + these is recommended only when required for compatibility with specific + client versions. + + Clients (connecting cluster nodes or remote CIB commands) will require + that the server use a prime of at least this size. This is recommended + only when the value must be lowered in order for the client's GnuTLS + library to accept a connection to an older server. + + * - .. _pcmk_dh_max_bits: + + .. index:: + pair:: node option; PCMK_dh_max_bits + + PCMK_dh_max_bits + - :ref:`nonnegative integer <nonnegative_integer>` + - 0 (no maximum) + - *Advanced Use Only:* Set an upper bound on the bit length of the prime + number generated for Diffie-Hellman parameters needed by TLS connections. + The default is no maximum. + + The server (Pacemaker Remote daemon, or CIB manager configured to accept + remote clients) will use this value to provide a ceiling for the value + recommended by the GnuTLS library. The library will only accept a limited + number of specific values, which vary by library version, so setting + these is recommended only when required for compatibility with specific + client versions. + + Clients do not use ``PCMK_dh_max_bits``. + + * - .. _pcmk_ipc_type: + + .. index:: + pair:: node option; PCMK_ipc_type + + PCMK_ipc_type + - :ref:`enumeration <enumeration>` + - shared-mem + - *Advanced Use Only:* Force use of a particular IPC method. Allowed values: + + * ``shared-mem`` + * ``socket`` + * ``posix`` + * ``sysv`` + + * - .. _pcmk_ipc_buffer: + + .. index:: + pair:: node option; PCMK_ipc_buffer + + PCMK_ipc_buffer + - :ref:`nonnegative integer <nonnegative_integer>` + - 131072 + - *Advanced Use Only:* Specify an IPC buffer size in bytes. This can be + useful when connecting to large clusters that result in messages + exceeding the default size (which will also result in log messages + referencing this variable). + + * - .. _pcmk_cluster_type: + + .. index:: + pair:: node option; PCMK_cluster_type + + PCMK_cluster_type + - :ref:`enumeration <enumeration>` + - corosync + - *Advanced Use Only:* Specify the cluster layer to be used. If unset, + Pacemaker will detect and use a supported cluster layer, if available. + Currently, ``"corosync"`` is the only supported cluster layer. If + multiple layers are supported in the future, this will allow overriding + Pacemaker's automatic detection to select a specific one. + + * - .. _pcmk_schema_directory: + + .. index:: + pair:: node option; PCMK_schema_directory + + PCMK_schema_directory + - :ref:`text <text>` + - |CRM_SCHEMA_DIRECTORY| + - *Advanced Use Only:* Specify an alternate location for RNG schemas and + XSL transforms. + + * - .. _pcmk_valgrind_enabled: + + .. index:: + pair:: node option; PCMK_valgrind_enabled + + PCMK_valgrind_enabled + - :ref:`enumeration <enumeration>` + - no + - *Advanced Use Only:* Whether subsystem daemons should be run under + ``valgrind``. Allowed values are the same as for ``PCMK_debug``. + + * - .. _pcmk_callgrind_enabled: + + .. index:: + pair:: node option; PCMK_callgrind_enabled + + PCMK_callgrind_enabled + - :ref:`enumeration <enumeration>` + - no + - *Advanced Use Only:* Whether subsystem daemons should be run under + ``valgrind`` with the ``callgrind`` tool enabled. Allowed values are the + same as for ``PCMK_debug``. + + * - .. _valgrind_opts: + + .. index:: + pair:: node option; VALGRIND_OPTS + + VALGRIND_OPTS + - :ref:`text <text>` + - + - *Advanced Use Only:* Pass these options to valgrind, when enabled (see + ``valgrind(1)``). ``"--vgdb=no"`` should usually be specified because + ``pacemaker-execd`` can lower privileges when executing commands, which + would otherwise leave a bunch of unremovable files in ``/tmp``. diff --git a/doc/sphinx/Pacemaker_Explained/nodes.rst b/doc/sphinx/Pacemaker_Explained/nodes.rst index 6fcadb3..378b067 100644 --- a/doc/sphinx/Pacemaker_Explained/nodes.rst +++ b/doc/sphinx/Pacemaker_Explained/nodes.rst @@ -105,6 +105,9 @@ To read back the value that was just set: The ``--type nodes`` indicates that this is a permanent node attribute; ``--type status`` would indicate a transient node attribute. + +.. _special_node_attributes: + Special node attributes ####################### @@ -154,35 +157,26 @@ unset to be false, and anything else to be an error. | | ``crm_resource --cleanup`` commands rather | | | than directly. | +----------------------------+-----------------------------------------------------+ - | maintenance | .. index:: | - | | pair: node attribute; maintenance | + | maintenance | .. _node_maintenance: | | | | - | | Similar to the ``maintenance-mode`` | - | | :ref:`cluster option <cluster_options>`, but | - | | for a single node. If true, resources will | - | | not be started or stopped on the node, | - | | resources and individual clone instances | - | | running on the node will become unmanaged, | - | | and any recurring operations for those will | - | | be cancelled. | + | | .. index:: | + | | pair: node attribute; maintenance | | | | - | | **Warning:** Restarting pacemaker on a node that is | - | | in single-node maintenance mode will likely | - | | lead to undesirable effects. If | - | | ``maintenance`` is set as a transient | - | | attribute, it will be erased when | - | | Pacemaker is stopped, which will | - | | immediately take the node out of | - | | maintenance mode and likely get it | - | | fenced. Even if permanent, if Pacemaker | - | | is restarted, any resources active on the | - | | node will have their local history erased | - | | when the node rejoins, so the cluster | - | | will no longer consider them running on | - | | the node and thus will consider them | - | | managed again, leading them to be started | - | | elsewhere. This behavior might be | - | | improved in a future release. | + | | If true, the cluster will not start or stop any | + | | resources on this node. Any resources active on the | + | | node become unmanaged, and any recurring operations | + | | for those resources (except those specifying | + | | ``role`` as ``Stopped``) will be paused. The | + | | :ref:`maintenance-mode <maintenance_mode>` cluster | + | | option, if true, overrides this. If this attribute | + | | is true, it overrides the | + | | :ref:`is-managed <is_managed>` and | + | | :ref:`maintenance <rsc_maintenance>` | + | | meta-attributes of affected resources and | + | | :ref:`enabled <op_enabled>` meta-attribute for | + | | affected recurring actions. Pacemaker should not be | + | | restarted on a node that is in single-node | + | | maintenance mode. | +----------------------------+-----------------------------------------------------+ | probe_complete | .. index:: | | | pair: node attribute; probe_complete | diff --git a/doc/sphinx/Pacemaker_Explained/operations.rst b/doc/sphinx/Pacemaker_Explained/operations.rst new file mode 100644 index 0000000..b1ad65d --- /dev/null +++ b/doc/sphinx/Pacemaker_Explained/operations.rst @@ -0,0 +1,623 @@ +.. index:: + single: resource; action + single: resource; operation + +.. _operation: + +Resource Operations +------------------- + +*Operations* are actions the cluster can perform on a resource by calling the +resource agent. Resource agents must support certain common operations such as +start, stop, and monitor, and may implement any others. + +Operations may be explicitly configured for two purposes: to override defaults +for options (such as timeout) that the cluster will use whenever it initiates +the operation, and to run an operation on a recurring basis (for example, to +monitor the resource for failure). + +.. topic:: An OCF resource with a non-default start timeout + + .. code-block:: xml + + <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> + <operations> + <op id="Public-IP-start" name="start" timeout="60s"/> + </operations> + <instance_attributes id="params-public-ip"> + <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> + </instance_attributes> + </primitive> + +Pacemaker identifies operations by a combination of name and interval, so this +combination must be unique for each resource. That is, you should not configure +two operations for the same resource with the same name and interval. + +.. _operation_properties: + +Operation Properties +#################### + +Operation properties may be specified directly in the ``op`` element as +XML attributes, or in a separate ``meta_attributes`` block as ``nvpair`` elements. +XML attributes take precedence over ``nvpair`` elements if both are specified. + +.. table:: **Properties of an Operation** + :class: longtable + :widths: 1 2 3 + + +----------------+-----------------------------------+-----------------------------------------------------+ + | Field | Default | Description | + +================+===================================+=====================================================+ + | id | | .. index:: | + | | | single: id; action property | + | | | single: action; property, id | + | | | | + | | | A unique name for the operation. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | name | | .. index:: | + | | | single: name; action property | + | | | single: action; property, name | + | | | | + | | | The action to perform. This can be any action | + | | | supported by the agent; common values include | + | | | ``monitor``, ``start``, and ``stop``. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | interval | 0 | .. index:: | + | | | single: interval; action property | + | | | single: action; property, interval | + | | | | + | | | How frequently (in seconds) to perform the | + | | | operation. A value of 0 means "when needed". | + | | | A positive value defines a *recurring action*, | + | | | which is typically used with | + | | | :ref:`monitor <s-resource-monitoring>`. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | timeout | | .. index:: | + | | | single: timeout; action property | + | | | single: action; property, timeout | + | | | | + | | | How long to wait before declaring the action | + | | | has failed | + +----------------+-----------------------------------+-----------------------------------------------------+ + | on-fail | Varies by action: | .. index:: | + | | | single: on-fail; action property | + | | * ``stop``: ``fence`` if | single: action; property, on-fail | + | | ``stonith-enabled`` is true | | + | | or ``block`` otherwise | The action to take if this action ever fails. | + | | * ``demote``: ``on-fail`` of the | Allowed values: | + | | ``monitor`` action with | | + | | ``role`` set to ``Promoted``, | * ``ignore:`` Pretend the resource did not fail. | + | | if present, enabled, and | * ``block:`` Don't perform any further operations | + | | configured to a value other | on the resource. | + | | than ``demote``, or ``restart`` | * ``stop:`` Stop the resource and do not start | + | | otherwise | it elsewhere. | + | | * all other actions: ``restart`` | * ``demote:`` Demote the resource, without a | + | | | full restart. This is valid only for ``promote`` | + | | | actions, and for ``monitor`` actions with both | + | | | a nonzero ``interval`` and ``role`` set to | + | | | ``Promoted``; for any other action, a | + | | | configuration error will be logged, and the | + | | | default behavior will be used. *(since 2.0.5)* | + | | | * ``restart:`` Stop the resource and start it | + | | | again (possibly on a different node). | + | | | * ``fence:`` STONITH the node on which the | + | | | resource failed. | + | | | * ``standby:`` Move *all* resources away from the | + | | | node on which the resource failed. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | enabled | TRUE | .. _op_enabled: | + | | | | + | | | .. index:: | + | | | single: enabled; action property | + | | | single: action; property, enabled | + | | | | + | | | If ``false``, ignore this operation definition. | + | | | This does not suppress all actions of this type, | + | | | but is typically used to pause a recurring monitor. | + | | | This can complement the resource being unmanaged | + | | | (:ref:`is-managed <is_managed>` set to ``false``), | + | | | which does not stop recurring operations. | + | | | Maintenance mode, which does stop configured this | + | | | monitors, overrides this setting. Allowed values: | + | | | ``true``, ``false``. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | record-pending | TRUE | .. index:: | + | | | single: record-pending; action property | + | | | single: action; property, record-pending | + | | | | + | | | If ``true``, the intention to perform the operation | + | | | is recorded so that GUIs and CLI tools can indicate | + | | | that an operation is in progress. This is best set | + | | | as an *operation default* | + | | | (see :ref:`s-operation-defaults`). Allowed values: | + | | | ``true``, ``false``. | + +----------------+-----------------------------------+-----------------------------------------------------+ + | role | | .. index:: | + | | | single: role; action property | + | | | single: action; property, role | + | | | | + | | | Run the operation only on node(s) that the cluster | + | | | thinks should be in the specified role. This only | + | | | makes sense for recurring ``monitor`` operations. | + | | | Allowed (case-sensitive) values: ``Stopped``, | + | | | ``Started``, and in the case of :ref:`promotable | + | | | clone resources <s-resource-promotable>`, | + | | | ``Unpromoted`` and ``Promoted``. | + +----------------+-----------------------------------+-----------------------------------------------------+ + +.. note:: + + When ``on-fail`` is set to ``demote``, recovery from failure by a successful + demote causes the cluster to recalculate whether and where a new instance + should be promoted. The node with the failure is eligible, so if promotion + scores have not changed, it will be promoted again. + + There is no direct equivalent of ``migration-threshold`` for the promoted + role, but the same effect can be achieved with a location constraint using a + :ref:`rule <rules>` with a node attribute expression for the resource's fail + count. + + For example, to immediately ban the promoted role from a node with any + failed promote or promoted instance monitor: + + .. code-block:: xml + + <rsc_location id="loc1" rsc="my_primitive"> + <rule id="rule1" score="-INFINITY" role="Promoted" boolean-op="or"> + <expression id="expr1" attribute="fail-count-my_primitive#promote_0" + operation="gte" value="1"/> + <expression id="expr2" attribute="fail-count-my_primitive#monitor_10000" + operation="gte" value="1"/> + </rule> + </rsc_location> + + This example assumes that there is a promotable clone of the ``my_primitive`` + resource (note that the primitive name, not the clone name, is used in the + rule), and that there is a recurring 10-second-interval monitor configured for + the promoted role (fail count attributes specify the interval in + milliseconds). + +.. _s-resource-monitoring: + +Monitoring Resources for Failure +################################ + +When Pacemaker first starts a resource, it runs one-time ``monitor`` operations +(referred to as *probes*) to ensure the resource is running where it's +supposed to be, and not running where it's not supposed to be. (This behavior +can be affected by the ``resource-discovery`` location constraint property.) + +Other than those initial probes, Pacemaker will *not* (by default) check that +the resource continues to stay healthy [#]_. You must configure ``monitor`` +operations explicitly to perform these checks. + +.. topic:: An OCF resource with a recurring health check + + .. code-block:: xml + + <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> + <operations> + <op id="Public-IP-start" name="start" timeout="60s"/> + <op id="Public-IP-monitor" name="monitor" interval="60s"/> + </operations> + <instance_attributes id="params-public-ip"> + <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> + </instance_attributes> + </primitive> + +By default, a ``monitor`` operation will ensure that the resource is running +where it is supposed to. The ``target-role`` property can be used for further +checking. + +For example, if a resource has one ``monitor`` operation with +``interval=10 role=Started`` and a second ``monitor`` operation with +``interval=11 role=Stopped``, the cluster will run the first monitor on any nodes +it thinks *should* be running the resource, and the second monitor on any nodes +that it thinks *should not* be running the resource (for the truly paranoid, +who want to know when an administrator manually starts a service by mistake). + +.. note:: + + Currently, monitors with ``role=Stopped`` are not implemented for + :ref:`clone <s-resource-clone>` resources. + + +.. _s-operation-defaults: + +Setting Global Defaults for Operations +###################################### + +You can change the global default values for operation properties +in a given cluster. These are defined in an ``op_defaults`` section +of the CIB's ``configuration`` section, and can be set with +``crm_attribute``. For example, + +.. code-block:: none + + # crm_attribute --type op_defaults --name timeout --update 20s + +would default each operation's ``timeout`` to 20 seconds. If an +operation's definition also includes a value for ``timeout``, then that +value would be used for that operation instead. + +When Implicit Operations Take a Long Time +######################################### + +The cluster will always perform a number of implicit operations: ``start``, +``stop`` and a non-recurring ``monitor`` operation used at startup to check +whether the resource is already active. If one of these is taking too long, +then you can create an entry for them and specify a longer timeout. + +.. topic:: An OCF resource with custom timeouts for its implicit actions + + .. code-block:: xml + + <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> + <operations> + <op id="public-ip-startup" name="monitor" interval="0" timeout="90s"/> + <op id="public-ip-start" name="start" interval="0" timeout="180s"/> + <op id="public-ip-stop" name="stop" interval="0" timeout="15min"/> + </operations> + <instance_attributes id="params-public-ip"> + <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> + </instance_attributes> + </primitive> + +Multiple Monitor Operations +########################### + +Provided no two operations (for a single resource) have the same name +and interval, you can have as many ``monitor`` operations as you like. +In this way, you can do a superficial health check every minute and +progressively more intense ones at higher intervals. + +To tell the resource agent what kind of check to perform, you need to +provide each monitor with a different value for a common parameter. +The OCF standard creates a special parameter called ``OCF_CHECK_LEVEL`` +for this purpose and dictates that it is "made available to the +resource agent without the normal ``OCF_RESKEY`` prefix". + +Whatever name you choose, you can specify it by adding an +``instance_attributes`` block to the ``op`` tag. It is up to each +resource agent to look for the parameter and decide how to use it. + +.. topic:: An OCF resource with two recurring health checks, performing + different levels of checks specified via ``OCF_CHECK_LEVEL``. + + .. code-block:: xml + + <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> + <operations> + <op id="public-ip-health-60" name="monitor" interval="60"> + <instance_attributes id="params-public-ip-depth-60"> + <nvpair id="public-ip-depth-60" name="OCF_CHECK_LEVEL" value="10"/> + </instance_attributes> + </op> + <op id="public-ip-health-300" name="monitor" interval="300"> + <instance_attributes id="params-public-ip-depth-300"> + <nvpair id="public-ip-depth-300" name="OCF_CHECK_LEVEL" value="20"/> + </instance_attributes> + </op> + </operations> + <instance_attributes id="params-public-ip"> + <nvpair id="public-ip-level" name="ip" value="192.0.2.2"/> + </instance_attributes> + </primitive> + +Disabling a Monitor Operation +############################# + +The easiest way to stop a recurring monitor is to just delete it. +However, there can be times when you only want to disable it +temporarily. In such cases, simply add ``enabled=false`` to the +operation's definition. + +.. topic:: Example of an OCF resource with a disabled health check + + .. code-block:: xml + + <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> + <operations> + <op id="public-ip-check" name="monitor" interval="60s" enabled="false"/> + </operations> + <instance_attributes id="params-public-ip"> + <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> + </instance_attributes> + </primitive> + +This can be achieved from the command line by executing: + +.. code-block:: none + + # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="false"/>' + +Once you've done whatever you needed to do, you can then re-enable it with + +.. code-block:: none + + # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="true"/>' + + +.. index:: + single: start-delay; operation attribute + single: interval-origin; operation attribute + single: interval; interval-origin + single: operation; interval-origin + single: operation; start-delay + +Specifying When Recurring Actions are Performed +############################################### + +By default, recurring actions are scheduled relative to when the resource +started. In some cases, you might prefer that a recurring action start relative +to a specific date and time. For example, you might schedule an in-depth +monitor to run once every 24 hours, and want it to run outside business hours. + +To do this, set the operation's ``interval-origin``. The cluster uses this point +to calculate the correct ``start-delay`` such that the operation will occur +at ``interval-origin`` plus a multiple of the operation interval. + +For example, if the recurring operation's interval is 24h, its +``interval-origin`` is set to 02:00, and it is currently 14:32, then the +cluster would initiate the operation after 11 hours and 28 minutes. + +The value specified for ``interval`` and ``interval-origin`` can be any +date/time conforming to the +`ISO8601 standard <https://en.wikipedia.org/wiki/ISO_8601>`_. By way of +example, to specify an operation that would run on the first Monday of +2021 and every Monday after that, you would add: + +.. topic:: Example recurring action that runs relative to base date/time + + .. code-block:: xml + + <op id="intensive-monitor" name="monitor" interval="P7D" interval-origin="2021-W01-1"/> + + +.. index:: + single: resource; failure recovery + single: operation; failure recovery + +.. _failure-handling: + +Handling Resource Failure +######################### + +By default, Pacemaker will attempt to recover failed resources by restarting +them. However, failure recovery is highly configurable. + +.. index:: + single: resource; failure count + single: operation; failure count + +Failure Counts +______________ + +Pacemaker tracks resource failures for each combination of node, resource, and +operation (start, stop, monitor, etc.). + +You can query the fail count for a particular node, resource, and/or operation +using the ``crm_failcount`` command. For example, to see how many times the +10-second monitor for ``myrsc`` has failed on ``node1``, run: + +.. code-block:: none + + # crm_failcount --query -r myrsc -N node1 -n monitor -I 10s + +If you omit the node, ``crm_failcount`` will use the local node. If you omit +the operation and interval, ``crm_failcount`` will display the sum of the fail +counts for all operations on the resource. + +You can use ``crm_resource --cleanup`` or ``crm_failcount --delete`` to clear +fail counts. For example, to clear the above monitor failures, run: + +.. code-block:: none + + # crm_resource --cleanup -r myrsc -N node1 -n monitor -I 10s + +If you omit the resource, ``crm_resource --cleanup`` will clear failures for +all resources. If you omit the node, it will clear failures on all nodes. If +you omit the operation and interval, it will clear the failures for all +operations on the resource. + +.. note:: + + Even when cleaning up only a single operation, all failed operations will + disappear from the status display. This allows us to trigger a re-check of + the resource's current status. + +Higher-level tools may provide other commands for querying and clearing +fail counts. + +The ``crm_mon`` tool shows the current cluster status, including any failed +operations. To see the current fail counts for any failed resources, call +``crm_mon`` with the ``--failcounts`` option. This shows the fail counts per +resource (that is, the sum of any operation fail counts for the resource). + +.. index:: + single: migration-threshold; resource meta-attribute + single: resource; migration-threshold + +Failure Response +________________ + +Normally, if a running resource fails, pacemaker will try to stop it and start +it again. Pacemaker will choose the best location to start it each time, which +may be the same node that it failed on. + +However, if a resource fails repeatedly, it is possible that there is an +underlying problem on that node, and you might desire trying a different node +in such a case. Pacemaker allows you to set your preference via the +``migration-threshold`` resource meta-attribute. [#]_ + +If you define ``migration-threshold`` to *N* for a resource, it will be banned +from the original node after *N* failures there. + +.. note:: + + The ``migration-threshold`` is per *resource*, even though fail counts are + tracked per *operation*. The operation fail counts are added together + to compare against the ``migration-threshold``. + +By default, fail counts remain until manually cleared by an administrator +using ``crm_resource --cleanup`` or ``crm_failcount --delete`` (hopefully after +first fixing the failure's cause). It is possible to have fail counts expire +automatically by setting the ``failure-timeout`` resource meta-attribute. + +.. important:: + + A successful operation does not clear past failures. If a recurring monitor + operation fails once, succeeds many times, then fails again days later, its + fail count is 2. Fail counts are cleared only by manual intervention or + failure timeout. + +For example, setting ``migration-threshold`` to 2 and ``failure-timeout`` to +``60s`` would cause the resource to move to a new node after 2 failures, and +allow it to move back (depending on stickiness and constraint scores) after one +minute. + +.. note:: + + ``failure-timeout`` is measured since the most recent failure. That is, older + failures do not individually time out and lower the fail count. Instead, all + failures are timed out simultaneously (and the fail count is reset to 0) if + there is no new failure for the timeout period. + +There are two exceptions to the migration threshold: when a resource either +fails to start or fails to stop. + +If the cluster property ``start-failure-is-fatal`` is set to ``true`` (which is +the default), start failures cause the fail count to be set to ``INFINITY`` and +thus always cause the resource to move immediately. + +Stop failures are slightly different and crucial. If a resource fails to stop +and fencing is enabled, then the cluster will fence the node in order to be +able to start the resource elsewhere. If fencing is disabled, then the cluster +has no way to continue and will not try to start the resource elsewhere, but +will try to stop it again after any failure timeout or clearing. + + +.. index:: + single: reload + single: reload-agent + +Reloading an Agent After a Definition Change +############################################ + +The cluster automatically detects changes to the configuration of active +resources. The cluster's normal response is to stop the service (using the old +definition) and start it again (with the new definition). This works, but some +resource agents are smarter and can be told to use a new set of options without +restarting. + +To take advantage of this capability, the resource agent must: + +* Implement the ``reload-agent`` action. What it should do depends completely + on your application! + + .. note:: + + Resource agents may also implement a ``reload`` action to make the managed + service reload its own *native* configuration. This is different from + ``reload-agent``, which makes effective changes in the resource's + *Pacemaker* configuration (specifically, the values of the agent's + reloadable parameters). + +* Advertise the ``reload-agent`` operation in the ``actions`` section of its + meta-data. + +* Set the ``reloadable`` attribute to 1 in the ``parameters`` section of + its meta-data for any parameters eligible to be reloaded after a change. + +Once these requirements are satisfied, the cluster will automatically know to +reload the resource (instead of restarting) when a reloadable parameter +changes. + +.. note:: + + Metadata will not be re-read unless the resource needs to be started. If you + edit the agent of an already active resource to set a parameter reloadable, + the resource may restart the first time the parameter value changes. + +.. note:: + + If both a reloadable and non-reloadable parameter are changed + simultaneously, the resource will be restarted. + + + +.. _live-migration: + +Migrating Resources +################### + +Normally, when the cluster needs to move a resource, it fully restarts the +resource (that is, it stops the resource on the current node and starts it on +the new node). + +However, some types of resources, such as many virtual machines, are able to +move to another location without loss of state (often referred to as live +migration or hot migration). In pacemaker, this is called live migration. +Pacemaker can be configured to migrate a resource when moving it, rather than +restarting it. + +Not all resources are able to migrate; see the +:ref:`migration checklist <migration_checklist>` below. Even those that can, +won't do so in all situations. Conceptually, there are two requirements from +which the other prerequisites follow: + +* The resource must be active and healthy at the old location; and +* everything required for the resource to run must be available on both the old + and new locations. + +The cluster is able to accommodate both *push* and *pull* migration models by +requiring the resource agent to support two special actions: ``migrate_to`` +(performed on the current location) and ``migrate_from`` (performed on the +destination). + +In push migration, the process on the current location transfers the resource +to the new location where is it later activated. In this scenario, most of the +work would be done in the ``migrate_to`` action and, if anything, the +activation would occur during ``migrate_from``. + +Conversely for pull, the ``migrate_to`` action is practically empty and +``migrate_from`` does most of the work, extracting the relevant resource state +from the old location and activating it. + +There is no wrong or right way for a resource agent to implement migration, as +long as it works. + +.. _migration_checklist: + +.. topic:: Migration Checklist + + * The resource may not be a clone. + * The resource agent standard must be OCF. + * The resource must not be in a failed or degraded state. + * The resource agent must support ``migrate_to`` and ``migrate_from`` + actions, and advertise them in its meta-data. + * The resource must have the ``allow-migrate`` meta-attribute set to + ``true`` (which is not the default). + +If an otherwise migratable resource depends on another resource via an ordering +constraint, there are special situations in which it will be restarted rather +than migrated. + +For example, if the resource depends on a clone, and at the time the resource +needs to be moved, the clone has instances that are stopping and instances that +are starting, then the resource will be restarted. The scheduler is not yet +able to model this situation correctly and so takes the safer (if less optimal) +path. + +Also, if a migratable resource depends on a non-migratable resource, and both +need to be moved, the migratable resource will be restarted. +.. rubric:: Footnotes + +.. [#] Currently, anyway. Automatic monitoring operations may be added in a future + version of Pacemaker. + +.. [#] The naming of this option was perhaps unfortunate as it is easily + confused with live migration, the process of moving a resource from one + node to another without stopping it. Xen virtual guests are the most + common example of resources that can be migrated in this manner. diff --git a/doc/sphinx/Pacemaker_Explained/options.rst b/doc/sphinx/Pacemaker_Explained/options.rst deleted file mode 100644 index ee0511c..0000000 --- a/doc/sphinx/Pacemaker_Explained/options.rst +++ /dev/null @@ -1,622 +0,0 @@ -Cluster-Wide Configuration --------------------------- - -.. index:: - pair: XML element; cib - pair: XML element; configuration - -Configuration Layout -#################### - -The cluster is defined by the Cluster Information Base (CIB), which uses XML -notation. The simplest CIB, an empty one, looks like this: - -.. topic:: An empty configuration - - .. code-block:: xml - - <cib crm_feature_set="3.6.0" validate-with="pacemaker-3.5" epoch="1" num_updates="0" admin_epoch="0"> - <configuration> - <crm_config/> - <nodes/> - <resources/> - <constraints/> - </configuration> - <status/> - </cib> - -The empty configuration above contains the major sections that make up a CIB: - -* ``cib``: The entire CIB is enclosed with a ``cib`` element. Certain - fundamental settings are defined as attributes of this element. - - * ``configuration``: This section -- the primary focus of this document -- - contains traditional configuration information such as what resources the - cluster serves and the relationships among them. - - * ``crm_config``: cluster-wide configuration options - - * ``nodes``: the machines that host the cluster - - * ``resources``: the services run by the cluster - - * ``constraints``: indications of how resources should be placed - - * ``status``: This section contains the history of each resource on each - node. Based on this data, the cluster can construct the complete current - state of the cluster. The authoritative source for this section is the - local executor (pacemaker-execd process) on each cluster node, and the - cluster will occasionally repopulate the entire section. For this reason, - it is never written to disk, and administrators are advised against - modifying it in any way. - -In this document, configuration settings will be described as properties or -options based on how they are defined in the CIB: - -* Properties are XML attributes of an XML element. - -* Options are name-value pairs expressed as ``nvpair`` child elements of an XML - element. - -Normally, you will use command-line tools that abstract the XML, so the -distinction will be unimportant; both properties and options are cluster -settings you can tweak. - -CIB Properties -############## - -Certain settings are defined by CIB properties (that is, attributes of the -``cib`` tag) rather than with the rest of the cluster configuration in the -``configuration`` section. - -The reason is simply a matter of parsing. These options are used by the -configuration database which is, by design, mostly ignorant of the content it -holds. So the decision was made to place them in an easy-to-find location. - -.. table:: **CIB Properties** - :class: longtable - :widths: 1 3 - - +------------------+-----------------------------------------------------------+ - | Attribute | Description | - +==================+===========================================================+ - | admin_epoch | .. index:: | - | | pair: admin_epoch; cib | - | | | - | | When a node joins the cluster, the cluster performs a | - | | check to see which node has the best configuration. It | - | | asks the node with the highest (``admin_epoch``, | - | | ``epoch``, ``num_updates``) tuple to replace the | - | | configuration on all the nodes -- which makes setting | - | | them, and setting them correctly, very important. | - | | ``admin_epoch`` is never modified by the cluster; you can | - | | use this to make the configurations on any inactive nodes | - | | obsolete. | - | | | - | | **Warning:** Never set this value to zero. In such cases, | - | | the cluster cannot tell the difference between your | - | | configuration and the "empty" one used when nothing is | - | | found on disk. | - +------------------+-----------------------------------------------------------+ - | epoch | .. index:: | - | | pair: epoch; cib | - | | | - | | The cluster increments this every time the configuration | - | | is updated (usually by the administrator). | - +------------------+-----------------------------------------------------------+ - | num_updates | .. index:: | - | | pair: num_updates; cib | - | | | - | | The cluster increments this every time the configuration | - | | or status is updated (usually by the cluster) and resets | - | | it to 0 when epoch changes. | - +------------------+-----------------------------------------------------------+ - | validate-with | .. index:: | - | | pair: validate-with; cib | - | | | - | | Determines the type of XML validation that will be done | - | | on the configuration. If set to ``none``, the cluster | - | | will not verify that updates conform to the DTD (nor | - | | reject ones that don't). | - +------------------+-----------------------------------------------------------+ - | cib-last-written | .. index:: | - | | pair: cib-last-written; cib | - | | | - | | Indicates when the configuration was last written to | - | | disk. Maintained by the cluster; for informational | - | | purposes only. | - +------------------+-----------------------------------------------------------+ - | have-quorum | .. index:: | - | | pair: have-quorum; cib | - | | | - | | Indicates if the cluster has quorum. If false, this may | - | | mean that the cluster cannot start resources or fence | - | | other nodes (see ``no-quorum-policy`` below). Maintained | - | | by the cluster. | - +------------------+-----------------------------------------------------------+ - | dc-uuid | .. index:: | - | | pair: dc-uuid; cib | - | | | - | | Indicates which cluster node is the current leader. Used | - | | by the cluster when placing resources and determining the | - | | order of some events. Maintained by the cluster. | - +------------------+-----------------------------------------------------------+ - -.. _cluster_options: - -Cluster Options -############### - -Cluster options, as you might expect, control how the cluster behaves when -confronted with various situations. - -They are grouped into sets within the ``crm_config`` section. In advanced -configurations, there may be more than one set. (This will be described later -in the chapter on :ref:`rules` where we will show how to have the cluster use -different sets of options during working hours than during weekends.) For now, -we will describe the simple case where each option is present at most once. - -You can obtain an up-to-date list of cluster options, including their default -values, by running the ``man pacemaker-schedulerd`` and -``man pacemaker-controld`` commands. - -.. table:: **Cluster Options** - :class: longtable - :widths: 2 1 4 - - +---------------------------+---------+----------------------------------------------------+ - | Option | Default | Description | - +===========================+=========+====================================================+ - | cluster-name | | .. index:: | - | | | pair: cluster option; cluster-name | - | | | | - | | | An (optional) name for the cluster as a whole. | - | | | This is mostly for users' convenience for use | - | | | as desired in administration, but this can be | - | | | used in the Pacemaker configuration in | - | | | :ref:`rules` (as the ``#cluster-name`` | - | | | :ref:`node attribute | - | | | <node-attribute-expressions-special>`. It may | - | | | also be used by higher-level tools when | - | | | displaying cluster information, and by | - | | | certain resource agents (for example, the | - | | | ``ocf:heartbeat:GFS2`` agent stores the | - | | | cluster name in filesystem meta-data). | - +---------------------------+---------+----------------------------------------------------+ - | dc-version | | .. index:: | - | | | pair: cluster option; dc-version | - | | | | - | | | Version of Pacemaker on the cluster's DC. | - | | | Determined automatically by the cluster. Often | - | | | includes the hash which identifies the exact | - | | | Git changeset it was built from. Used for | - | | | diagnostic purposes. | - +---------------------------+---------+----------------------------------------------------+ - | cluster-infrastructure | | .. index:: | - | | | pair: cluster option; cluster-infrastructure | - | | | | - | | | The messaging stack on which Pacemaker is | - | | | currently running. Determined automatically by | - | | | the cluster. Used for informational and | - | | | diagnostic purposes. | - +---------------------------+---------+----------------------------------------------------+ - | no-quorum-policy | stop | .. index:: | - | | | pair: cluster option; no-quorum-policy | - | | | | - | | | What to do when the cluster does not have | - | | | quorum. Allowed values: | - | | | | - | | | * ``ignore:`` continue all resource management | - | | | * ``freeze:`` continue resource management, but | - | | | don't recover resources from nodes not in the | - | | | affected partition | - | | | * ``stop:`` stop all resources in the affected | - | | | cluster partition | - | | | * ``demote:`` demote promotable resources and | - | | | stop all other resources in the affected | - | | | cluster partition *(since 2.0.5)* | - | | | * ``suicide:`` fence all nodes in the affected | - | | | cluster partition | - +---------------------------+---------+----------------------------------------------------+ - | batch-limit | 0 | .. index:: | - | | | pair: cluster option; batch-limit | - | | | | - | | | The maximum number of actions that the cluster | - | | | may execute in parallel across all nodes. The | - | | | "correct" value will depend on the speed and | - | | | load of your network and cluster nodes. If zero, | - | | | the cluster will impose a dynamically calculated | - | | | limit only when any node has high load. If -1, the | - | | | cluster will not impose any limit. | - +---------------------------+---------+----------------------------------------------------+ - | migration-limit | -1 | .. index:: | - | | | pair: cluster option; migration-limit | - | | | | - | | | The number of | - | | | :ref:`live migration <live-migration>` actions | - | | | that the cluster is allowed to execute in | - | | | parallel on a node. A value of -1 means | - | | | unlimited. | - +---------------------------+---------+----------------------------------------------------+ - | symmetric-cluster | true | .. index:: | - | | | pair: cluster option; symmetric-cluster | - | | | | - | | | Whether resources can run on any node by default | - | | | (if false, a resource is allowed to run on a | - | | | node only if a | - | | | :ref:`location constraint <location-constraint>` | - | | | enables it) | - +---------------------------+---------+----------------------------------------------------+ - | stop-all-resources | false | .. index:: | - | | | pair: cluster option; stop-all-resources | - | | | | - | | | Whether all resources should be disallowed from | - | | | running (can be useful during maintenance) | - +---------------------------+---------+----------------------------------------------------+ - | stop-orphan-resources | true | .. index:: | - | | | pair: cluster option; stop-orphan-resources | - | | | | - | | | Whether resources that have been deleted from | - | | | the configuration should be stopped. This value | - | | | takes precedence over ``is-managed`` (that is, | - | | | even unmanaged resources will be stopped when | - | | | orphaned if this value is ``true`` | - +---------------------------+---------+----------------------------------------------------+ - | stop-orphan-actions | true | .. index:: | - | | | pair: cluster option; stop-orphan-actions | - | | | | - | | | Whether recurring :ref:`operations <operation>` | - | | | that have been deleted from the configuration | - | | | should be cancelled | - +---------------------------+---------+----------------------------------------------------+ - | start-failure-is-fatal | true | .. index:: | - | | | pair: cluster option; start-failure-is-fatal | - | | | | - | | | Whether a failure to start a resource on a | - | | | particular node prevents further start attempts | - | | | on that node? If ``false``, the cluster will | - | | | decide whether the node is still eligible based | - | | | on the resource's current failure count and | - | | | :ref:`migration-threshold <failure-handling>`. | - +---------------------------+---------+----------------------------------------------------+ - | enable-startup-probes | true | .. index:: | - | | | pair: cluster option; enable-startup-probes | - | | | | - | | | Whether the cluster should check the | - | | | pre-existing state of resources when the cluster | - | | | starts | - +---------------------------+---------+----------------------------------------------------+ - | maintenance-mode | false | .. index:: | - | | | pair: cluster option; maintenance-mode | - | | | | - | | | Whether the cluster should refrain from | - | | | monitoring, starting and stopping resources | - +---------------------------+---------+----------------------------------------------------+ - | stonith-enabled | true | .. index:: | - | | | pair: cluster option; stonith-enabled | - | | | | - | | | Whether the cluster is allowed to fence nodes | - | | | (for example, failed nodes and nodes with | - | | | resources that can't be stopped. | - | | | | - | | | If true, at least one fence device must be | - | | | configured before resources are allowed to run. | - | | | | - | | | If false, unresponsive nodes are immediately | - | | | assumed to be running no resources, and resource | - | | | recovery on online nodes starts without any | - | | | further protection (which can mean *data loss* | - | | | if the unresponsive node still accesses shared | - | | | storage, for example). See also the | - | | | :ref:`requires <requires>` resource | - | | | meta-attribute. | - +---------------------------+---------+----------------------------------------------------+ - | stonith-action | reboot | .. index:: | - | | | pair: cluster option; stonith-action | - | | | | - | | | Action the cluster should send to the fence agent | - | | | when a node must be fenced. Allowed values are | - | | | ``reboot``, ``off``, and (for legacy agents only) | - | | | ``poweroff``. | - +---------------------------+---------+----------------------------------------------------+ - | stonith-timeout | 60s | .. index:: | - | | | pair: cluster option; stonith-timeout | - | | | | - | | | How long to wait for ``on``, ``off``, and | - | | | ``reboot`` fence actions to complete by default. | - +---------------------------+---------+----------------------------------------------------+ - | stonith-max-attempts | 10 | .. index:: | - | | | pair: cluster option; stonith-max-attempts | - | | | | - | | | How many times fencing can fail for a target | - | | | before the cluster will no longer immediately | - | | | re-attempt it. | - +---------------------------+---------+----------------------------------------------------+ - | stonith-watchdog-timeout | 0 | .. index:: | - | | | pair: cluster option; stonith-watchdog-timeout | - | | | | - | | | If nonzero, and the cluster detects | - | | | ``have-watchdog`` as ``true``, then watchdog-based | - | | | self-fencing will be performed via SBD when | - | | | fencing is required, without requiring a fencing | - | | | resource explicitly configured. | - | | | | - | | | If this is set to a positive value, unseen nodes | - | | | are assumed to self-fence within this much time. | - | | | | - | | | **Warning:** It must be ensured that this value is | - | | | larger than the ``SBD_WATCHDOG_TIMEOUT`` | - | | | environment variable on all nodes. Pacemaker | - | | | verifies the settings individually on all nodes | - | | | and prevents startup or shuts down if configured | - | | | wrongly on the fly. It is strongly recommended | - | | | that ``SBD_WATCHDOG_TIMEOUT`` be set to the same | - | | | value on all nodes. | - | | | | - | | | If this is set to a negative value, and | - | | | ``SBD_WATCHDOG_TIMEOUT`` is set, twice that value | - | | | will be used. | - | | | | - | | | **Warning:** In this case, it is essential (and | - | | | currently not verified by pacemaker) that | - | | | ``SBD_WATCHDOG_TIMEOUT`` is set to the same | - | | | value on all nodes. | - +---------------------------+---------+----------------------------------------------------+ - | concurrent-fencing | false | .. index:: | - | | | pair: cluster option; concurrent-fencing | - | | | | - | | | Whether the cluster is allowed to initiate | - | | | multiple fence actions concurrently. Fence actions | - | | | initiated externally, such as via the | - | | | ``stonith_admin`` tool or an application such as | - | | | DLM, or by the fencer itself such as recurring | - | | | device monitors and ``status`` and ``list`` | - | | | commands, are not limited by this option. | - +---------------------------+---------+----------------------------------------------------+ - | fence-reaction | stop | .. index:: | - | | | pair: cluster option; fence-reaction | - | | | | - | | | How should a cluster node react if notified of its | - | | | own fencing? A cluster node may receive | - | | | notification of its own fencing if fencing is | - | | | misconfigured, or if fabric fencing is in use that | - | | | doesn't cut cluster communication. Allowed values | - | | | are ``stop`` to attempt to immediately stop | - | | | pacemaker and stay stopped, or ``panic`` to | - | | | attempt to immediately reboot the local node, | - | | | falling back to stop on failure. The default is | - | | | likely to be changed to ``panic`` in a future | - | | | release. *(since 2.0.3)* | - +---------------------------+---------+----------------------------------------------------+ - | priority-fencing-delay | 0 | .. index:: | - | | | pair: cluster option; priority-fencing-delay | - | | | | - | | | Apply this delay to any fencing targeting the lost | - | | | nodes with the highest total resource priority in | - | | | case we don't have the majority of the nodes in | - | | | our cluster partition, so that the more | - | | | significant nodes potentially win any fencing | - | | | match (especially meaningful in a split-brain of a | - | | | 2-node cluster). A promoted resource instance | - | | | takes the resource's priority plus 1 if the | - | | | resource's priority is not 0. Any static or random | - | | | delays introduced by ``pcmk_delay_base`` and | - | | | ``pcmk_delay_max`` configured for the | - | | | corresponding fencing resources will be added to | - | | | this delay. This delay should be significantly | - | | | greater than (safely twice) the maximum delay from | - | | | those parameters. *(since 2.0.4)* | - +---------------------------+---------+----------------------------------------------------+ - | cluster-delay | 60s | .. index:: | - | | | pair: cluster option; cluster-delay | - | | | | - | | | Estimated maximum round-trip delay over the | - | | | network (excluding action execution). If the DC | - | | | requires an action to be executed on another node, | - | | | it will consider the action failed if it does not | - | | | get a response from the other node in this time | - | | | (after considering the action's own timeout). The | - | | | "correct" value will depend on the speed and load | - | | | of your network and cluster nodes. | - +---------------------------+---------+----------------------------------------------------+ - | dc-deadtime | 20s | .. index:: | - | | | pair: cluster option; dc-deadtime | - | | | | - | | | How long to wait for a response from other nodes | - | | | during startup. The "correct" value will depend on | - | | | the speed/load of your network and the type of | - | | | switches used. | - +---------------------------+---------+----------------------------------------------------+ - | cluster-ipc-limit | 500 | .. index:: | - | | | pair: cluster option; cluster-ipc-limit | - | | | | - | | | The maximum IPC message backlog before one cluster | - | | | daemon will disconnect another. This is of use in | - | | | large clusters, for which a good value is the | - | | | number of resources in the cluster multiplied by | - | | | the number of nodes. The default of 500 is also | - | | | the minimum. Raise this if you see | - | | | "Evicting client" messages for cluster daemon PIDs | - | | | in the logs. | - +---------------------------+---------+----------------------------------------------------+ - | pe-error-series-max | -1 | .. index:: | - | | | pair: cluster option; pe-error-series-max | - | | | | - | | | The number of scheduler inputs resulting in errors | - | | | to save. Used when reporting problems. A value of | - | | | -1 means unlimited (report all), and 0 means none. | - +---------------------------+---------+----------------------------------------------------+ - | pe-warn-series-max | 5000 | .. index:: | - | | | pair: cluster option; pe-warn-series-max | - | | | | - | | | The number of scheduler inputs resulting in | - | | | warnings to save. Used when reporting problems. A | - | | | value of -1 means unlimited (report all), and 0 | - | | | means none. | - +---------------------------+---------+----------------------------------------------------+ - | pe-input-series-max | 4000 | .. index:: | - | | | pair: cluster option; pe-input-series-max | - | | | | - | | | The number of "normal" scheduler inputs to save. | - | | | Used when reporting problems. A value of -1 means | - | | | unlimited (report all), and 0 means none. | - +---------------------------+---------+----------------------------------------------------+ - | enable-acl | false | .. index:: | - | | | pair: cluster option; enable-acl | - | | | | - | | | Whether :ref:`acl` should be used to authorize | - | | | modifications to the CIB | - +---------------------------+---------+----------------------------------------------------+ - | placement-strategy | default | .. index:: | - | | | pair: cluster option; placement-strategy | - | | | | - | | | How the cluster should allocate resources to nodes | - | | | (see :ref:`utilization`). Allowed values are | - | | | ``default``, ``utilization``, ``balanced``, and | - | | | ``minimal``. | - +---------------------------+---------+----------------------------------------------------+ - | node-health-strategy | none | .. index:: | - | | | pair: cluster option; node-health-strategy | - | | | | - | | | How the cluster should react to node health | - | | | attributes (see :ref:`node-health`). Allowed values| - | | | are ``none``, ``migrate-on-red``, ``only-green``, | - | | | ``progressive``, and ``custom``. | - +---------------------------+---------+----------------------------------------------------+ - | node-health-base | 0 | .. index:: | - | | | pair: cluster option; node-health-base | - | | | | - | | | The base health score assigned to a node. Only | - | | | used when ``node-health-strategy`` is | - | | | ``progressive``. | - +---------------------------+---------+----------------------------------------------------+ - | node-health-green | 0 | .. index:: | - | | | pair: cluster option; node-health-green | - | | | | - | | | The score to use for a node health attribute whose | - | | | value is ``green``. Only used when | - | | | ``node-health-strategy`` is ``progressive`` or | - | | | ``custom``. | - +---------------------------+---------+----------------------------------------------------+ - | node-health-yellow | 0 | .. index:: | - | | | pair: cluster option; node-health-yellow | - | | | | - | | | The score to use for a node health attribute whose | - | | | value is ``yellow``. Only used when | - | | | ``node-health-strategy`` is ``progressive`` or | - | | | ``custom``. | - +---------------------------+---------+----------------------------------------------------+ - | node-health-red | 0 | .. index:: | - | | | pair: cluster option; node-health-red | - | | | | - | | | The score to use for a node health attribute whose | - | | | value is ``red``. Only used when | - | | | ``node-health-strategy`` is ``progressive`` or | - | | | ``custom``. | - +---------------------------+---------+----------------------------------------------------+ - | cluster-recheck-interval | 15min | .. index:: | - | | | pair: cluster option; cluster-recheck-interval | - | | | | - | | | Pacemaker is primarily event-driven, and looks | - | | | ahead to know when to recheck the cluster for | - | | | failure timeouts and most time-based rules | - | | | *(since 2.0.3)*. However, it will also recheck the | - | | | cluster after this amount of inactivity. This has | - | | | two goals: rules with ``date_spec`` are only | - | | | guaranteed to be checked this often, and it also | - | | | serves as a fail-safe for some kinds of scheduler | - | | | bugs. A value of 0 disables this polling; positive | - | | | values are a time interval. | - +---------------------------+---------+----------------------------------------------------+ - | shutdown-lock | false | .. index:: | - | | | pair: cluster option; shutdown-lock | - | | | | - | | | The default of false allows active resources to be | - | | | recovered elsewhere when their node is cleanly | - | | | shut down, which is what the vast majority of | - | | | users will want. However, some users prefer to | - | | | make resources highly available only for failures, | - | | | with no recovery for clean shutdowns. If this | - | | | option is true, resources active on a node when it | - | | | is cleanly shut down are kept "locked" to that | - | | | node (not allowed to run elsewhere) until they | - | | | start again on that node after it rejoins (or for | - | | | at most ``shutdown-lock-limit``, if set). Stonith | - | | | resources and Pacemaker Remote connections are | - | | | never locked. Clone and bundle instances and the | - | | | promoted role of promotable clones are currently | - | | | never locked, though support could be added in a | - | | | future release. Locks may be manually cleared | - | | | using the ``--refresh`` option of ``crm_resource`` | - | | | (both the resource and node must be specified; | - | | | this works with remote nodes if their connection | - | | | resource's ``target-role`` is set to ``Stopped``, | - | | | but not if Pacemaker Remote is stopped on the | - | | | remote node without disabling the connection | - | | | resource). *(since 2.0.4)* | - +---------------------------+---------+----------------------------------------------------+ - | shutdown-lock-limit | 0 | .. index:: | - | | | pair: cluster option; shutdown-lock-limit | - | | | | - | | | If ``shutdown-lock`` is true, and this is set to a | - | | | nonzero time duration, locked resources will be | - | | | allowed to start after this much time has passed | - | | | since the node shutdown was initiated, even if the | - | | | node has not rejoined. (This works with remote | - | | | nodes only if their connection resource's | - | | | ``target-role`` is set to ``Stopped``.) | - | | | *(since 2.0.4)* | - +---------------------------+---------+----------------------------------------------------+ - | remove-after-stop | false | .. index:: | - | | | pair: cluster option; remove-after-stop | - | | | | - | | | *Deprecated* Should the cluster remove | - | | | resources from Pacemaker's executor after they are | - | | | stopped? Values other than the default are, at | - | | | best, poorly tested and potentially dangerous. | - | | | This option is deprecated and will be removed in a | - | | | future release. | - +---------------------------+---------+----------------------------------------------------+ - | startup-fencing | true | .. index:: | - | | | pair: cluster option; startup-fencing | - | | | | - | | | *Advanced Use Only:* Should the cluster fence | - | | | unseen nodes at start-up? Setting this to false is | - | | | unsafe, because the unseen nodes could be active | - | | | and running resources but unreachable. | - +---------------------------+---------+----------------------------------------------------+ - | election-timeout | 2min | .. index:: | - | | | pair: cluster option; election-timeout | - | | | | - | | | *Advanced Use Only:* If you need to adjust this | - | | | value, it probably indicates the presence of a bug.| - +---------------------------+---------+----------------------------------------------------+ - | shutdown-escalation | 20min | .. index:: | - | | | pair: cluster option; shutdown-escalation | - | | | | - | | | *Advanced Use Only:* If you need to adjust this | - | | | value, it probably indicates the presence of a bug.| - +---------------------------+---------+----------------------------------------------------+ - | join-integration-timeout | 3min | .. index:: | - | | | pair: cluster option; join-integration-timeout | - | | | | - | | | *Advanced Use Only:* If you need to adjust this | - | | | value, it probably indicates the presence of a bug.| - +---------------------------+---------+----------------------------------------------------+ - | join-finalization-timeout | 30min | .. index:: | - | | | pair: cluster option; join-finalization-timeout | - | | | | - | | | *Advanced Use Only:* If you need to adjust this | - | | | value, it probably indicates the presence of a bug.| - +---------------------------+---------+----------------------------------------------------+ - | transition-delay | 0s | .. index:: | - | | | pair: cluster option; transition-delay | - | | | | - | | | *Advanced Use Only:* Delay cluster recovery for | - | | | the configured interval to allow for additional or | - | | | related events to occur. This can be useful if | - | | | your configuration is sensitive to the order in | - | | | which ping updates arrive. Enabling this option | - | | | will slow down cluster recovery under all | - | | | conditions. | - +---------------------------+---------+----------------------------------------------------+ diff --git a/doc/sphinx/Pacemaker_Explained/resources.rst b/doc/sphinx/Pacemaker_Explained/resources.rst index 3b7520f..a971c44 100644 --- a/doc/sphinx/Pacemaker_Explained/resources.rst +++ b/doc/sphinx/Pacemaker_Explained/resources.rst @@ -362,8 +362,8 @@ behave and can be easily set using the ``--meta`` option of the | | | all :ref:`colocation constraints | | | | <s-resource-colocation>` involving this resource, | | | | as well as the implicit colocation constraints | - | | | created if this resource is in a :ref:`group | - | | | <group-resources>`. For details, see | + | | | created if this resource is in a | + | | | :ref:`group <group-resources>`. For details, see | | | | :ref:`s-coloc-influence`. *(since 2.1.0)* | +----------------------------+----------------------------------+------------------------------------------------------+ | target-role | Started | .. index:: | @@ -375,31 +375,39 @@ behave and can be easily set using the ``--meta`` option of the | | | | | | | * ``Stopped:`` Force the resource to be stopped | | | | * ``Started:`` Allow the resource to be started | - | | | (and in the case of :ref:`promotable clone | - | | | resources <s-resource-promotable>`, promoted | - | | | if appropriate) | + | | | (and in the case of | + | | | :ref:`promotable <s-resource-promotable>` clone | + | | | resources, promoted if appropriate) | | | | * ``Unpromoted:`` Allow the resource to be started, | | | | but only in the unpromoted role if the resource is | | | | :ref:`promotable <s-resource-promotable>` | | | | * ``Promoted:`` Equivalent to ``Started`` | +----------------------------+----------------------------------+------------------------------------------------------+ - | is-managed | TRUE | .. index:: | + | is-managed | TRUE | .. _is_managed: | + | | | | + | | | .. index:: | | | | single: is-managed; resource option | | | | single: resource; option, is-managed | | | | | - | | | Is the cluster allowed to start and stop | - | | | the resource? Allowed values: ``true``, ``false`` | + | | | If false, the cluster will not start or stop the | + | | | resource on any node. Recurring actions for the | + | | | resource are unaffected. Maintenance mode overrides | + | | | this setting. Allowed values: ``true``, ``false`` | +----------------------------+----------------------------------+------------------------------------------------------+ - | maintenance | FALSE | .. index:: | + | maintenance | FALSE | .. _rsc_maintenance: | + | | | | + | | | .. index:: | | | | single: maintenance; resource option | | | | single: resource; option, maintenance | | | | | - | | | Similar to the ``maintenance-mode`` | - | | | :ref:`cluster option <cluster_options>`, but for | - | | | a single resource. If true, the resource will not | - | | | be started, stopped, or monitored on any node. This | - | | | differs from ``is-managed`` in that monitors will | - | | | not be run. Allowed values: ``true``, ``false`` | + | | | If true, the cluster will not start or stop the | + | | | resource on any node, and will pause any recurring | + | | | monitors (except those specifying ``role`` as | + | | | ``Stopped``). If true, the | + | | | :ref:`maintenance-mode <maintenance_mode>` cluster | + | | | option or :ref:`maintenance <node_maintenance>` | + | | | node attribute override this. Allowed values: | + | | | ``true``, ``false`` | +----------------------------+----------------------------------+------------------------------------------------------+ | resource-stickiness | 1 for individual clone | .. _resource-stickiness: | | | instances, 0 for all | | @@ -686,389 +694,3 @@ attributes, their purpose and default values. <action name="meta-data" timeout="5s" /> </actions> </resource-agent> - -.. index:: - single: resource; action - single: resource; operation - -.. _operation: - -Resource Operations -################### - -*Operations* are actions the cluster can perform on a resource by calling the -resource agent. Resource agents must support certain common operations such as -start, stop, and monitor, and may implement any others. - -Operations may be explicitly configured for two purposes: to override defaults -for options (such as timeout) that the cluster will use whenever it initiates -the operation, and to run an operation on a recurring basis (for example, to -monitor the resource for failure). - -.. topic:: An OCF resource with a non-default start timeout - - .. code-block:: xml - - <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> - <operations> - <op id="Public-IP-start" name="start" timeout="60s"/> - </operations> - <instance_attributes id="params-public-ip"> - <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> - </instance_attributes> - </primitive> - -Pacemaker identifies operations by a combination of name and interval, so this -combination must be unique for each resource. That is, you should not configure -two operations for the same resource with the same name and interval. - -.. _operation_properties: - -Operation Properties -____________________ - -Operation properties may be specified directly in the ``op`` element as -XML attributes, or in a separate ``meta_attributes`` block as ``nvpair`` elements. -XML attributes take precedence over ``nvpair`` elements if both are specified. - -.. table:: **Properties of an Operation** - :class: longtable - :widths: 1 2 3 - - +----------------+-----------------------------------+-----------------------------------------------------+ - | Field | Default | Description | - +================+===================================+=====================================================+ - | id | | .. index:: | - | | | single: id; action property | - | | | single: action; property, id | - | | | | - | | | A unique name for the operation. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | name | | .. index:: | - | | | single: name; action property | - | | | single: action; property, name | - | | | | - | | | The action to perform. This can be any action | - | | | supported by the agent; common values include | - | | | ``monitor``, ``start``, and ``stop``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | interval | 0 | .. index:: | - | | | single: interval; action property | - | | | single: action; property, interval | - | | | | - | | | How frequently (in seconds) to perform the | - | | | operation. A value of 0 means "when needed". | - | | | A positive value defines a *recurring action*, | - | | | which is typically used with | - | | | :ref:`monitor <s-resource-monitoring>`. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | timeout | | .. index:: | - | | | single: timeout; action property | - | | | single: action; property, timeout | - | | | | - | | | How long to wait before declaring the action | - | | | has failed | - +----------------+-----------------------------------+-----------------------------------------------------+ - | on-fail | Varies by action: | .. index:: | - | | | single: on-fail; action property | - | | * ``stop``: ``fence`` if | single: action; property, on-fail | - | | ``stonith-enabled`` is true | | - | | or ``block`` otherwise | The action to take if this action ever fails. | - | | * ``demote``: ``on-fail`` of the | Allowed values: | - | | ``monitor`` action with | | - | | ``role`` set to ``Promoted``, | * ``ignore:`` Pretend the resource did not fail. | - | | if present, enabled, and | * ``block:`` Don't perform any further operations | - | | configured to a value other | on the resource. | - | | than ``demote``, or ``restart`` | * ``stop:`` Stop the resource and do not start | - | | otherwise | it elsewhere. | - | | * all other actions: ``restart`` | * ``demote:`` Demote the resource, without a | - | | | full restart. This is valid only for ``promote`` | - | | | actions, and for ``monitor`` actions with both | - | | | a nonzero ``interval`` and ``role`` set to | - | | | ``Promoted``; for any other action, a | - | | | configuration error will be logged, and the | - | | | default behavior will be used. *(since 2.0.5)* | - | | | * ``restart:`` Stop the resource and start it | - | | | again (possibly on a different node). | - | | | * ``fence:`` STONITH the node on which the | - | | | resource failed. | - | | | * ``standby:`` Move *all* resources away from the | - | | | node on which the resource failed. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | enabled | TRUE | .. index:: | - | | | single: enabled; action property | - | | | single: action; property, enabled | - | | | | - | | | If ``false``, ignore this operation definition. | - | | | This is typically used to pause a particular | - | | | recurring ``monitor`` operation; for instance, it | - | | | can complement the respective resource being | - | | | unmanaged (``is-managed=false``), as this alone | - | | | will :ref:`not block any configured monitoring | - | | | <s-monitoring-unmanaged>`. Disabling the operation | - | | | does not suppress all actions of the given type. | - | | | Allowed values: ``true``, ``false``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | record-pending | TRUE | .. index:: | - | | | single: record-pending; action property | - | | | single: action; property, record-pending | - | | | | - | | | If ``true``, the intention to perform the operation | - | | | is recorded so that GUIs and CLI tools can indicate | - | | | that an operation is in progress. This is best set | - | | | as an *operation default* | - | | | (see :ref:`s-operation-defaults`). Allowed values: | - | | | ``true``, ``false``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - | role | | .. index:: | - | | | single: role; action property | - | | | single: action; property, role | - | | | | - | | | Run the operation only on node(s) that the cluster | - | | | thinks should be in the specified role. This only | - | | | makes sense for recurring ``monitor`` operations. | - | | | Allowed (case-sensitive) values: ``Stopped``, | - | | | ``Started``, and in the case of :ref:`promotable | - | | | clone resources <s-resource-promotable>`, | - | | | ``Unpromoted`` and ``Promoted``. | - +----------------+-----------------------------------+-----------------------------------------------------+ - -.. note:: - - When ``on-fail`` is set to ``demote``, recovery from failure by a successful - demote causes the cluster to recalculate whether and where a new instance - should be promoted. The node with the failure is eligible, so if promotion - scores have not changed, it will be promoted again. - - There is no direct equivalent of ``migration-threshold`` for the promoted - role, but the same effect can be achieved with a location constraint using a - :ref:`rule <rules>` with a node attribute expression for the resource's fail - count. - - For example, to immediately ban the promoted role from a node with any - failed promote or promoted instance monitor: - - .. code-block:: xml - - <rsc_location id="loc1" rsc="my_primitive"> - <rule id="rule1" score="-INFINITY" role="Promoted" boolean-op="or"> - <expression id="expr1" attribute="fail-count-my_primitive#promote_0" - operation="gte" value="1"/> - <expression id="expr2" attribute="fail-count-my_primitive#monitor_10000" - operation="gte" value="1"/> - </rule> - </rsc_location> - - This example assumes that there is a promotable clone of the ``my_primitive`` - resource (note that the primitive name, not the clone name, is used in the - rule), and that there is a recurring 10-second-interval monitor configured for - the promoted role (fail count attributes specify the interval in - milliseconds). - -.. _s-resource-monitoring: - -Monitoring Resources for Failure -________________________________ - -When Pacemaker first starts a resource, it runs one-time ``monitor`` operations -(referred to as *probes*) to ensure the resource is running where it's -supposed to be, and not running where it's not supposed to be. (This behavior -can be affected by the ``resource-discovery`` location constraint property.) - -Other than those initial probes, Pacemaker will *not* (by default) check that -the resource continues to stay healthy [#]_. You must configure ``monitor`` -operations explicitly to perform these checks. - -.. topic:: An OCF resource with a recurring health check - - .. code-block:: xml - - <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> - <operations> - <op id="Public-IP-start" name="start" timeout="60s"/> - <op id="Public-IP-monitor" name="monitor" interval="60s"/> - </operations> - <instance_attributes id="params-public-ip"> - <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> - </instance_attributes> - </primitive> - -By default, a ``monitor`` operation will ensure that the resource is running -where it is supposed to. The ``target-role`` property can be used for further -checking. - -For example, if a resource has one ``monitor`` operation with -``interval=10 role=Started`` and a second ``monitor`` operation with -``interval=11 role=Stopped``, the cluster will run the first monitor on any nodes -it thinks *should* be running the resource, and the second monitor on any nodes -that it thinks *should not* be running the resource (for the truly paranoid, -who want to know when an administrator manually starts a service by mistake). - -.. note:: - - Currently, monitors with ``role=Stopped`` are not implemented for - :ref:`clone <s-resource-clone>` resources. - -.. _s-monitoring-unmanaged: - -Monitoring Resources When Administration is Disabled -____________________________________________________ - -Recurring ``monitor`` operations behave differently under various administrative -settings: - -* When a resource is unmanaged (by setting ``is-managed=false``): No monitors - will be stopped. - - If the unmanaged resource is stopped on a node where the cluster thinks it - should be running, the cluster will detect and report that it is not, but it - will not consider the monitor failed, and will not try to start the resource - until it is managed again. - - Starting the unmanaged resource on a different node is strongly discouraged - and will at least cause the cluster to consider the resource failed, and - may require the resource's ``target-role`` to be set to ``Stopped`` then - ``Started`` to be recovered. - -* When a resource is put into maintenance mode (by setting - ``maintenance=true``): The resource will be marked as unmanaged. (This - overrides ``is-managed=true``.) - - Additionally, all monitor operations will be stopped, except those specifying - ``role`` as ``Stopped`` (which will be newly initiated if appropriate). As - with unmanaged resources in general, starting a resource on a node other than - where the cluster expects it to be will cause problems. - -* When a node is put into standby: All resources will be moved away from the - node, and all ``monitor`` operations will be stopped on the node, except those - specifying ``role`` as ``Stopped`` (which will be newly initiated if - appropriate). - -* When a node is put into maintenance mode: All resources that are active on the - node will be marked as in maintenance mode. See above for more details. - -* When the cluster is put into maintenance mode: All resources in the cluster - will be marked as in maintenance mode. See above for more details. - -A resource is in maintenance mode if the cluster, the node where the resource -is active, or the resource itself is configured to be in maintenance mode. If a -resource is in maintenance mode, then it is also unmanaged. However, if a -resource is unmanaged, it is not necessarily in maintenance mode. - -.. _s-operation-defaults: - -Setting Global Defaults for Operations -______________________________________ - -You can change the global default values for operation properties -in a given cluster. These are defined in an ``op_defaults`` section -of the CIB's ``configuration`` section, and can be set with -``crm_attribute``. For example, - -.. code-block:: none - - # crm_attribute --type op_defaults --name timeout --update 20s - -would default each operation's ``timeout`` to 20 seconds. If an -operation's definition also includes a value for ``timeout``, then that -value would be used for that operation instead. - -When Implicit Operations Take a Long Time -_________________________________________ - -The cluster will always perform a number of implicit operations: ``start``, -``stop`` and a non-recurring ``monitor`` operation used at startup to check -whether the resource is already active. If one of these is taking too long, -then you can create an entry for them and specify a longer timeout. - -.. topic:: An OCF resource with custom timeouts for its implicit actions - - .. code-block:: xml - - <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> - <operations> - <op id="public-ip-startup" name="monitor" interval="0" timeout="90s"/> - <op id="public-ip-start" name="start" interval="0" timeout="180s"/> - <op id="public-ip-stop" name="stop" interval="0" timeout="15min"/> - </operations> - <instance_attributes id="params-public-ip"> - <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> - </instance_attributes> - </primitive> - -Multiple Monitor Operations -___________________________ - -Provided no two operations (for a single resource) have the same name -and interval, you can have as many ``monitor`` operations as you like. -In this way, you can do a superficial health check every minute and -progressively more intense ones at higher intervals. - -To tell the resource agent what kind of check to perform, you need to -provide each monitor with a different value for a common parameter. -The OCF standard creates a special parameter called ``OCF_CHECK_LEVEL`` -for this purpose and dictates that it is "made available to the -resource agent without the normal ``OCF_RESKEY`` prefix". - -Whatever name you choose, you can specify it by adding an -``instance_attributes`` block to the ``op`` tag. It is up to each -resource agent to look for the parameter and decide how to use it. - -.. topic:: An OCF resource with two recurring health checks, performing - different levels of checks specified via ``OCF_CHECK_LEVEL``. - - .. code-block:: xml - - <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> - <operations> - <op id="public-ip-health-60" name="monitor" interval="60"> - <instance_attributes id="params-public-ip-depth-60"> - <nvpair id="public-ip-depth-60" name="OCF_CHECK_LEVEL" value="10"/> - </instance_attributes> - </op> - <op id="public-ip-health-300" name="monitor" interval="300"> - <instance_attributes id="params-public-ip-depth-300"> - <nvpair id="public-ip-depth-300" name="OCF_CHECK_LEVEL" value="20"/> - </instance_attributes> - </op> - </operations> - <instance_attributes id="params-public-ip"> - <nvpair id="public-ip-level" name="ip" value="192.0.2.2"/> - </instance_attributes> - </primitive> - -Disabling a Monitor Operation -_____________________________ - -The easiest way to stop a recurring monitor is to just delete it. -However, there can be times when you only want to disable it -temporarily. In such cases, simply add ``enabled=false`` to the -operation's definition. - -.. topic:: Example of an OCF resource with a disabled health check - - .. code-block:: xml - - <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat"> - <operations> - <op id="public-ip-check" name="monitor" interval="60s" enabled="false"/> - </operations> - <instance_attributes id="params-public-ip"> - <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/> - </instance_attributes> - </primitive> - -This can be achieved from the command line by executing: - -.. code-block:: none - - # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="false"/>' - -Once you've done whatever you needed to do, you can then re-enable it with - -.. code-block:: none - - # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="true"/>' - -.. [#] Currently, anyway. Automatic monitoring operations may be added in a future - version of Pacemaker. diff --git a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst index 0f34f84..06c00f0 100644 --- a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst +++ b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst @@ -330,6 +330,11 @@ resources. A single configuration element can be listed in any number of tags. +.. important:: + + If listing nodes in a tag, you must list the node's ``id``, not name. + + Using Tags in Constraints and Resource Sets ___________________________________________ diff --git a/doc/sphinx/Pacemaker_Explained/status.rst b/doc/sphinx/Pacemaker_Explained/status.rst index 2d7dd7e..6384eda 100644 --- a/doc/sphinx/Pacemaker_Explained/status.rst +++ b/doc/sphinx/Pacemaker_Explained/status.rst @@ -33,7 +33,7 @@ Users are highly recommended *not* to modify any part of a node's state *directly*. The cluster will periodically regenerate the entire section from authoritative sources, so any changes should be done with the tools appropriate to those sources. - + .. table:: **Authoritative Sources for State Information** :widths: 1 1 @@ -48,9 +48,7 @@ with the tools appropriate to those sources. +----------------------+----------------------+ The fields used in the ``node_state`` objects are named as they are -largely for historical reasons and are rooted in Pacemaker's origins -as the resource manager for the older Heartbeat project. They have remained -unchanged to preserve compatibility with older versions. +largely for historical reasons, to maintain compatibility with older versions. .. table:: **Node Status Fields** :widths: 1 3 @@ -147,8 +145,8 @@ all known resources have been checked for on this machine (``probe_complete``). Operation History ################# -A node's resource history is held in the ``lrm_resources`` tag (a child -of the ``lrm`` tag). The information stored here includes enough +A node's resource history is held in the ``lrm_resources`` element (a child +of the ``lrm`` element). The information stored here includes enough information for the cluster to stop the resource safely if it is removed from the ``configuration`` section. Specifically, the resource's ``id``, ``class``, ``type`` and ``provider`` are stored. @@ -159,11 +157,9 @@ removed from the ``configuration`` section. Specifically, the resource's <lrm_resource id="apcstonith" type="fence_apc_snmp" class="stonith"/> -Additionally, we store the last job for every combination of -``resource``, ``action`` and ``interval``. The concatenation of the values in -this tuple are used to create the id of the ``lrm_rsc_op`` object. +Additionally, we store history entries for certain actions. -.. table:: **Contents of an lrm_rsc_op job** +.. table:: **Attributes of an lrm_rsc_op element** :class: longtable :widths: 1 3 @@ -174,78 +170,78 @@ this tuple are used to create the id of the ``lrm_rsc_op`` object. | | single: id; action status | | | single: action; status, id | | | | - | | Identifier for the job constructed from the resource's | - | | ``operation`` and ``interval``. | + | | Identifier for the history entry constructed from the | + | | resource ID, action name, and operation interval. | +------------------+----------------------------------------------------------+ | call-id | .. index:: | | | single: call-id; action status | | | single: action; status, call-id | | | | - | | The job's ticket number. Used as a sort key to determine | - | | the order in which the jobs were executed. | + | | A node-specific counter used to determine the order in | + | | which actions were executed. | +------------------+----------------------------------------------------------+ | operation | .. index:: | | | single: operation; action status | | | single: action; status, operation | | | | - | | The action the resource agent was invoked with. | + | | The action name the resource agent was invoked with. | +------------------+----------------------------------------------------------+ | interval | .. index:: | | | single: interval; action status | | | single: action; status, interval | | | | | | The frequency, in milliseconds, at which the operation | - | | will be repeated. A one-off job is indicated by 0. | + | | will be repeated. One-time execution is indicated by 0. | +------------------+----------------------------------------------------------+ | op-status | .. index:: | | | single: op-status; action status | | | single: action; status, op-status | | | | - | | The job's status. Generally this will be either 0 (done) | - | | or -1 (pending). Rarely used in favor of ``rc-code``. | + | | The execution status of this action. The meanings of | + | | these codes are internal to Pacemaker. | +------------------+----------------------------------------------------------+ | rc-code | .. index:: | | | single: rc-code; action status | | | single: action; status, rc-code | | | | - | | The job's result. Refer to the *Resource Agents* chapter | - | | of *Pacemaker Administration* for details on what the | - | | values here mean and how they are interpreted. | + | | The resource agent's exit status for this action. Refer | + | | to the *Resource Agents* chapter of | + | | *Pacemaker Administration* for how these values are | + | | interpreted. | +------------------+----------------------------------------------------------+ | last-rc-change | .. index:: | | | single: last-rc-change; action status | | | single: action; status, last-rc-change | | | | | | Machine-local date/time, in seconds since epoch, at | - | | which the job first returned the current value of | + | | which the action first returned the current value of | | | ``rc-code``. For diagnostic purposes. | +------------------+----------------------------------------------------------+ | exec-time | .. index:: | | | single: exec-time; action status | | | single: action; status, exec-time | | | | - | | Time, in milliseconds, that the job was running for. | + | | Time, in milliseconds, that the action was running for. | | | For diagnostic purposes. | +------------------+----------------------------------------------------------+ | queue-time | .. index:: | | | single: queue-time; action status | | | single: action; status, queue-time | | | | - | | Time, in seconds, that the job was queued for in the | + | | Time, in seconds, that the action was queued for in the | | | local executor. For diagnostic purposes. | +------------------+----------------------------------------------------------+ | crm_feature_set | .. index:: | | | single: crm_feature_set; action status | | | single: action; status, crm_feature_set | | | | - | | The version which this job description conforms to. Used | - | | when processing ``op-digest``. | + | | The Pacemaker feature set used to record this entry. | +------------------+----------------------------------------------------------+ | transition-key | .. index:: | | | single: transition-key; action status | | | single: action; status, transition-key | | | | - | | A concatenation of the job's graph action number, the | + | | A concatenation of the action's graph action number, the | | | graph number, the expected result and the UUID of the | | | controller instance that scheduled it. This is used to | | | construct ``transition-magic`` (below). | @@ -254,13 +250,13 @@ this tuple are used to create the id of the ``lrm_rsc_op`` object. | | single: transition-magic; action status | | | single: action; status, transition-magic | | | | - | | A concatenation of the job's ``op-status``, ``rc-code`` | + | | A concatenation of ``op-status``, ``rc-code`` | | | and ``transition-key``. Guaranteed to be unique for the | | | life of the cluster (which ensures it is part of CIB | | | update notifications) and contains all the information | | | needed for the controller to correctly analyze and | - | | process the completed job. Most importantly, the | - | | decomposed elements tell the controller if the job | + | | process the completed action. Most importantly, the | + | | decomposed elements tell the controller if the history | | | entry was expected and whether it failed. | +------------------+----------------------------------------------------------+ | op-digest | .. index:: | @@ -268,7 +264,7 @@ this tuple are used to create the id of the ``lrm_rsc_op`` object. | | single: action; status, op-digest | | | | | | An MD5 sum representing the parameters passed to the | - | | job. Used to detect changes to the configuration, to | + | | action. Used to detect changes to the configuration, to | | | restart resources if necessary. | +------------------+----------------------------------------------------------+ | crm-debug-origin | .. index:: | @@ -296,7 +292,7 @@ ________________________________ last-rc-change="1239008085" exec-time="10" queue-time="0"/> </lrm_resource> -In the above example, the job is a non-recurring monitor operation +In the above example, the action is a non-recurring monitor operation often referred to as a "probe" for the ``apcstonith`` resource. The cluster schedules probes for every configured resource on a node when @@ -308,16 +304,16 @@ the 2nd graph produced by this instance of the controller (2668bbeb-06d5-40f9-936d-24cb7f87006a). The third field of the ``transition-key`` contains a 7, which indicates -that the job expects to find the resource inactive. By looking at the ``rc-code`` -property, we see that this was the case. +that the cluster expects to find the resource inactive. By looking at the +``rc-code`` property, we see that this was the case. -As that is the only job recorded for this node, we can conclude that +As that is the only action recorded for this node, we can conclude that the cluster started the resource elsewhere. Complex Operation History Example _________________________________ -.. topic:: Resource history of a ``pingd`` clone with multiple jobs +.. topic:: Resource history of a ``pingd`` clone with multiple entries .. code-block:: xml @@ -344,7 +340,7 @@ _________________________________ last-rc-change="1239008085" exec-time="20" queue-time="0"/> </lrm_resource> -When more than one job record exists, it is important to first sort +When more than one history entry exists, it is important to first sort them by ``call-id`` before interpreting them. Once sorted, the above example can be summarized as: @@ -354,7 +350,7 @@ Once sorted, the above example can be summarized as: #. A start operation returning 0 (success), with a ``call-id`` of 33 #. A recurring monitor returning 0 (success), with a ``call-id`` of 34 -The cluster processes each job record to build up a picture of the +The cluster processes each history entry to build up a picture of the resource's state. After the first and second entries, it is considered stopped, and after the third it considered active. diff --git a/doc/sphinx/Pacemaker_Explained/utilization.rst b/doc/sphinx/Pacemaker_Explained/utilization.rst index 93c67cd..87eef60 100644 --- a/doc/sphinx/Pacemaker_Explained/utilization.rst +++ b/doc/sphinx/Pacemaker_Explained/utilization.rst @@ -4,19 +4,19 @@ Utilization and Placement Strategy ---------------------------------- Pacemaker decides where to place a resource according to the resource -allocation scores on every node. The resource will be allocated to the +assignment scores on every node. The resource will be assigned to the node where the resource has the highest score. -If the resource allocation scores on all the nodes are equal, by the default +If the resource assignment scores on all the nodes are equal, by the default placement strategy, Pacemaker will choose a node with the least number of -allocated resources for balancing the load. If the number of resources on each +assigned resources for balancing the load. If the number of resources on each node is equal, the first eligible node listed in the CIB will be chosen to run the resource. Often, in real-world situations, different resources use significantly different proportions of a node's capacities (memory, I/O, etc.). We cannot balance the load ideally just according to the number of resources -allocated to a node. Besides, if resources are placed such that their combined +assigned to a node. Besides, if resources are placed such that their combined requirements exceed the provided capacity, they may fail to start completely or run with degraded performance. @@ -119,7 +119,7 @@ Four values are available for the ``placement-strategy``: * **default** Utilization values are not taken into account at all. - Resources are allocated according to allocation scores. If scores are equal, + Resources are assigned according to assignment scores. If scores are equal, resources are evenly distributed across nodes. * **utilization** @@ -127,7 +127,7 @@ Four values are available for the ``placement-strategy``: Utilization values are taken into account *only* when deciding whether a node is considered eligible (i.e. whether it has sufficient free capacity to satisfy the resource's requirements). Load-balancing is still done based on the - number of resources allocated to a node. + number of resources assigned to a node. * **balanced** @@ -152,11 +152,11 @@ Now Pacemaker will ensure the load from your resources will be distributed evenly throughout the cluster, without the need for convoluted sets of colocation constraints. -Allocation Details +Assignment Details ################## -Which node is preferred to get consumed first when allocating resources? -________________________________________________________________________ +Which node is preferred to get consumed first when assigning resources? +_______________________________________________________________________ * The node with the highest node weight gets consumed first. Node weight is a score maintained by the cluster to represent node health. @@ -164,18 +164,18 @@ ________________________________________________________________________ * If multiple nodes have the same node weight: * If ``placement-strategy`` is ``default`` or ``utilization``, - the node that has the least number of allocated resources gets consumed first. + the node that has the least number of assigned resources gets consumed first. - * If their numbers of allocated resources are equal, + * If their numbers of assigned resources are equal, the first eligible node listed in the CIB gets consumed first. * If ``placement-strategy`` is ``balanced``, the node that has the most free capacity gets consumed first. * If the free capacities of the nodes are equal, - the node that has the least number of allocated resources gets consumed first. + the node that has the least number of assigned resources gets consumed first. - * If their numbers of allocated resources are equal, + * If their numbers of assigned resources are equal, the first eligible node listed in the CIB gets consumed first. * If ``placement-strategy`` is ``minimal``, @@ -201,17 +201,17 @@ Which resource is preferred to be assigned first? _________________________________________________ * The resource that has the highest ``priority`` (see :ref:`resource_options`) gets - allocated first. + assigned first. * If their priorities are equal, check whether they are already running. The - resource that has the highest score on the node where it's running gets allocated + resource that has the highest score on the node where it's running gets assigned first, to prevent resource shuffling. * If the scores above are equal or the resources are not running, the resource has - the highest score on the preferred node gets allocated first. + the highest score on the preferred node gets assigned first. * If the scores above are equal, the first runnable resource listed in the CIB - gets allocated first. + gets assigned first. Limitations and Workarounds ########################### @@ -233,9 +233,9 @@ services stopped. In the contrived example at the start of this chapter: -* ``rsc-small`` would be allocated to ``node1`` +* ``rsc-small`` would be assigned to ``node1`` -* ``rsc-medium`` would be allocated to ``node2`` +* ``rsc-medium`` would be assigned to ``node2`` * ``rsc-large`` would remain inactive diff --git a/doc/sphinx/Pacemaker_Remote/alternatives.rst b/doc/sphinx/Pacemaker_Remote/alternatives.rst index 83ed67c..adbdc99 100644 --- a/doc/sphinx/Pacemaker_Remote/alternatives.rst +++ b/doc/sphinx/Pacemaker_Remote/alternatives.rst @@ -78,13 +78,8 @@ using virtual machines. Key differences: technology -- for example, the ``libvirt-daemon-lxc`` package to get the `libvirt-lxc <http://libvirt.org/drvlxc.html>`_ driver for LXC containers. -* Libvirt XML definitions must be generated for the containers. The - ``pacemaker-cts`` package includes a script for this purpose, - ``/usr/share/pacemaker/tests/cts/lxc_autogen.sh``. Run it with the - ``--help`` option for details on how to use it. It is intended for testing - purposes only, and hardcodes various parameters that would need to be set - appropriately in real usage. Of course, you can create XML definitions - manually, following the appropriate libvirt driver documentation. +* Libvirt XML definitions must be generated for the containers. You can create + XML definitions manually, following the appropriate libvirt driver documentation. * To share the authentication key, either share the host's ``/etc/pacemaker`` directory with the container, or copy the key into the container's diff --git a/doc/sphinx/Pacemaker_Remote/baremetal-tutorial.rst b/doc/sphinx/Pacemaker_Remote/baremetal-tutorial.rst index a3c0fbe..7c23bd6 100644 --- a/doc/sphinx/Pacemaker_Remote/baremetal-tutorial.rst +++ b/doc/sphinx/Pacemaker_Remote/baremetal-tutorial.rst @@ -109,7 +109,7 @@ Start and enable the ``pcsd`` daemon on the remote node. [root@remote1 ~]# systemctl enable pcsd Created symlink /etc/systemd/system/multi-user.target.wants/pcsd.service → /usr/lib/systemd/system/pcsd.service. -Next, set a password for the ``hacluster`` user on the remote node +Next, set a password for the |CRM_DAEMON_USER| user on the remote node .. code-block:: none diff --git a/doc/sphinx/Pacemaker_Remote/kvm-tutorial.rst b/doc/sphinx/Pacemaker_Remote/kvm-tutorial.rst index 253149e..ef09882 100644 --- a/doc/sphinx/Pacemaker_Remote/kvm-tutorial.rst +++ b/doc/sphinx/Pacemaker_Remote/kvm-tutorial.rst @@ -254,7 +254,7 @@ Start and enable the ``pcsd`` daemon on the guest. [root@guest1 ~]# systemctl enable pcsd Created symlink /etc/systemd/system/multi-user.target.wants/pcsd.service → /usr/lib/systemd/system/pcsd.service. -Next, set a password for the ``hacluster`` user on the guest. +Next, set a password for the |CRM_DAEMON_USER| user on the guest. .. code-block:: none diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in index 7d843d8..556eb72 100644 --- a/doc/sphinx/conf.py.in +++ b/doc/sphinx/conf.py.in @@ -30,6 +30,16 @@ doc_license += " version 4.0 or later (CC-BY-SA v4.0+)" rst_prolog=""" .. |CFS_DISTRO| replace:: AlmaLinux .. |CFS_DISTRO_VER| replace:: 9 +.. |CRM_BLACKBOX_DIR| replace:: ``%CRM_BLACKBOX_DIR%`` +.. |CRM_DAEMON_GROUP| replace:: ``%CRM_DAEMON_GROUP%`` +.. |CRM_DAEMON_USER| replace:: ``%CRM_DAEMON_USER%`` +.. |CRM_DAEMON_USER_RAW| replace:: %CRM_DAEMON_USER% +.. |CRM_SCHEMA_DIRECTORY| replace:: %CRM_SCHEMA_DIRECTORY% +.. |PCMK_AUTHKEY_FILE| replace:: %PACEMAKER_CONFIG_DIR%/authkey +.. |PCMK_CONFIG_FILE| replace:: ``%CONFIGDIR%/pacemaker`` +.. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env`` +.. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log +.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% .. |REMOTE_DISTRO| replace:: AlmaLinux .. |REMOTE_DISTRO_VER| replace:: 9 """ |