summaryrefslogtreecommitdiffstats
path: root/lib/pacemaker
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
commite5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch)
treea6716c9275b4b413f6c9194798b34b91affb3cc7 /lib/pacemaker
parentInitial commit. (diff)
downloadpacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz
pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/pacemaker')
-rw-r--r--lib/pacemaker/Makefile.am69
-rw-r--r--lib/pacemaker/libpacemaker_private.h986
-rw-r--r--lib/pacemaker/pcmk_acl.c379
-rw-r--r--lib/pacemaker/pcmk_cluster_queries.c900
-rw-r--r--lib/pacemaker/pcmk_fence.c626
-rw-r--r--lib/pacemaker/pcmk_graph_consumer.c874
-rw-r--r--lib/pacemaker/pcmk_graph_logging.c234
-rw-r--r--lib/pacemaker/pcmk_graph_producer.c1078
-rw-r--r--lib/pacemaker/pcmk_injections.c784
-rw-r--r--lib/pacemaker/pcmk_output.c2331
-rw-r--r--lib/pacemaker/pcmk_resource.c173
-rw-r--r--lib/pacemaker/pcmk_result_code.c167
-rw-r--r--lib/pacemaker/pcmk_rule.c295
-rw-r--r--lib/pacemaker/pcmk_sched_actions.c1919
-rw-r--r--lib/pacemaker/pcmk_sched_bundle.c876
-rw-r--r--lib/pacemaker/pcmk_sched_clone.c643
-rw-r--r--lib/pacemaker/pcmk_sched_colocation.c1595
-rw-r--r--lib/pacemaker/pcmk_sched_constraints.c421
-rw-r--r--lib/pacemaker/pcmk_sched_fencing.c493
-rw-r--r--lib/pacemaker/pcmk_sched_group.c865
-rw-r--r--lib/pacemaker/pcmk_sched_instances.c1659
-rw-r--r--lib/pacemaker/pcmk_sched_location.c678
-rw-r--r--lib/pacemaker/pcmk_sched_migration.c386
-rw-r--r--lib/pacemaker/pcmk_sched_nodes.c351
-rw-r--r--lib/pacemaker/pcmk_sched_ordering.c1463
-rw-r--r--lib/pacemaker/pcmk_sched_primitive.c1573
-rw-r--r--lib/pacemaker/pcmk_sched_probes.c896
-rw-r--r--lib/pacemaker/pcmk_sched_promotable.c1286
-rw-r--r--lib/pacemaker/pcmk_sched_recurring.c716
-rw-r--r--lib/pacemaker/pcmk_sched_remote.c729
-rw-r--r--lib/pacemaker/pcmk_sched_resource.c722
-rw-r--r--lib/pacemaker/pcmk_sched_tickets.c531
-rw-r--r--lib/pacemaker/pcmk_sched_utilization.c469
-rw-r--r--lib/pacemaker/pcmk_scheduler.c811
-rw-r--r--lib/pacemaker/pcmk_simulate.c999
-rw-r--r--lib/pacemaker/pcmk_status.c368
36 files changed, 29345 insertions, 0 deletions
diff --git a/lib/pacemaker/Makefile.am b/lib/pacemaker/Makefile.am
new file mode 100644
index 0000000..ebf3b6d
--- /dev/null
+++ b/lib/pacemaker/Makefile.am
@@ -0,0 +1,69 @@
+#
+# Copyright 2004-2023 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU General Public License version 2
+# or later (GPLv2+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+
+AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
+
+noinst_HEADERS = libpacemaker_private.h
+
+## libraries
+lib_LTLIBRARIES = libpacemaker.la
+
+## SOURCES
+
+libpacemaker_la_LDFLAGS = -version-info 7:0:6
+
+libpacemaker_la_CFLAGS = $(CFLAGS_HARDENED_LIB)
+libpacemaker_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB)
+
+libpacemaker_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(top_builddir)/lib/fencing/libstonithd.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/common/libcrmcommon.la
+
+# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version
+# Use += rather than backlashed continuation lines for parsing by bumplibs
+libpacemaker_la_SOURCES =
+libpacemaker_la_SOURCES += pcmk_acl.c
+libpacemaker_la_SOURCES += pcmk_cluster_queries.c
+libpacemaker_la_SOURCES += pcmk_fence.c
+libpacemaker_la_SOURCES += pcmk_graph_consumer.c
+libpacemaker_la_SOURCES += pcmk_graph_logging.c
+libpacemaker_la_SOURCES += pcmk_graph_producer.c
+libpacemaker_la_SOURCES += pcmk_injections.c
+libpacemaker_la_SOURCES += pcmk_output.c
+libpacemaker_la_SOURCES += pcmk_resource.c
+libpacemaker_la_SOURCES += pcmk_result_code.c
+libpacemaker_la_SOURCES += pcmk_rule.c
+libpacemaker_la_SOURCES += pcmk_sched_actions.c
+libpacemaker_la_SOURCES += pcmk_sched_bundle.c
+libpacemaker_la_SOURCES += pcmk_sched_clone.c
+libpacemaker_la_SOURCES += pcmk_sched_colocation.c
+libpacemaker_la_SOURCES += pcmk_sched_constraints.c
+libpacemaker_la_SOURCES += pcmk_sched_fencing.c
+libpacemaker_la_SOURCES += pcmk_sched_group.c
+libpacemaker_la_SOURCES += pcmk_sched_instances.c
+libpacemaker_la_SOURCES += pcmk_sched_location.c
+libpacemaker_la_SOURCES += pcmk_sched_migration.c
+libpacemaker_la_SOURCES += pcmk_sched_nodes.c
+libpacemaker_la_SOURCES += pcmk_sched_ordering.c
+libpacemaker_la_SOURCES += pcmk_sched_primitive.c
+libpacemaker_la_SOURCES += pcmk_sched_probes.c
+libpacemaker_la_SOURCES += pcmk_sched_promotable.c
+libpacemaker_la_SOURCES += pcmk_sched_recurring.c
+libpacemaker_la_SOURCES += pcmk_sched_remote.c
+libpacemaker_la_SOURCES += pcmk_sched_resource.c
+libpacemaker_la_SOURCES += pcmk_sched_tickets.c
+libpacemaker_la_SOURCES += pcmk_sched_utilization.c
+libpacemaker_la_SOURCES += pcmk_scheduler.c
+libpacemaker_la_SOURCES += pcmk_simulate.c
+libpacemaker_la_SOURCES += pcmk_status.c
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
new file mode 100644
index 0000000..192d5a7
--- /dev/null
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -0,0 +1,986 @@
+/*
+ * Copyright 2021-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
+# define PCMK__LIBPACEMAKER_PRIVATE__H
+
+/* This header is for the sole use of libpacemaker, so that functions can be
+ * declared with G_GNUC_INTERNAL for efficiency.
+ */
+
+#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
+
+// Flags to modify the behavior of add_colocated_node_scores()
+enum pcmk__coloc_select {
+ // With no other flags, apply all "with this" colocations
+ pcmk__coloc_select_default = 0,
+
+ // Apply "this with" colocations instead of "with this" colocations
+ pcmk__coloc_select_this_with = (1 << 0),
+
+ // Apply only colocations with non-negative scores
+ pcmk__coloc_select_nonnegative = (1 << 1),
+
+ // Apply only colocations with at least one matching node
+ pcmk__coloc_select_active = (1 << 2),
+};
+
+// Flags the update_ordered_actions() method can return
+enum pcmk__updated {
+ pcmk__updated_none = 0, // Nothing changed
+ pcmk__updated_first = (1 << 0), // First action was updated
+ pcmk__updated_then = (1 << 1), // Then action was updated
+};
+
+#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \
+ au_flags = pcmk__set_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Action update", \
+ (action)->uuid, au_flags, \
+ (flags_to_set), #flags_to_set); \
+ } while (0)
+
+#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \
+ au_flags = pcmk__clear_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Action update", \
+ (action)->uuid, au_flags, \
+ (flags_to_clear), #flags_to_clear); \
+ } while (0)
+
+// Resource allocation methods
+struct resource_alloc_functions_s {
+ /*!
+ * \internal
+ * \brief Assign a resource to a node
+ *
+ * \param[in,out] rsc Resource to assign to a node
+ * \param[in] prefer Node to prefer, if all else is equal
+ *
+ * \return Node that \p rsc is assigned to, if assigned entirely to one node
+ */
+ pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer);
+
+ /*!
+ * \internal
+ * \brief Create all actions needed for a given resource
+ *
+ * \param[in,out] rsc Resource to create actions for
+ */
+ void (*create_actions)(pe_resource_t *rsc);
+
+ /*!
+ * \internal
+ * \brief Schedule any probes needed for a resource on a node
+ *
+ * \param[in,out] rsc Resource to create probe for
+ * \param[in,out] node Node to create probe on
+ *
+ * \return true if any probe was created, otherwise false
+ */
+ bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node);
+
+ /*!
+ * \internal
+ * \brief Create implicit constraints needed for a resource
+ *
+ * \param[in,out] rsc Resource to create implicit constraints for
+ */
+ void (*internal_constraints)(pe_resource_t *rsc);
+
+ /*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint, apply its score to the dependent's
+ * allowed node weights (if we are still placing resources) or priority (if
+ * we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ * \param[in] for_dependent true if called on behalf of dependent
+ */
+ void (*apply_coloc_score) (pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent);
+
+ /*!
+ * \internal
+ * \brief Create list of all resources in colocations with a given resource
+ *
+ * Given a resource, create a list of all resources involved in mandatory
+ * colocations with it, whether directly or indirectly via chained colocations.
+ *
+ * \param[in] rsc Resource to add to colocated list
+ * \param[in] orig_rsc Resource originally requested
+ * \param[in,out] colocated_rscs Existing list
+ *
+ * \return List of given resource and all resources involved in colocations
+ *
+ * \note This function is recursive; top-level callers should pass NULL as
+ * \p colocated_rscs and \p orig_rsc, and the desired resource as
+ * \p rsc. The recursive calls will use other values.
+ */
+ GList *(*colocated_resources)(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList *colocated_rscs);
+
+ /*!
+ * \internal
+ * \brief Add colocations affecting a resource as primary to a list
+ *
+ * Given a resource being assigned (\p orig_rsc) and a resource somewhere in
+ * its chain of ancestors (\p rsc, which may be \p orig_rsc), get
+ * colocations that affect the ancestor as primary and should affect the
+ * resource, and add them to a given list.
+ *
+ * \param[in] rsc Resource whose colocations should be added
+ * \param[in] orig_rsc Affected resource (\p rsc or a descendant)
+ * \param[in,out] list List of colocations to add to
+ *
+ * \note All arguments should be non-NULL.
+ * \note The pcmk__with_this_colocations() wrapper should usually be used
+ * instead of using this method directly.
+ */
+ void (*with_this_colocations)(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+ /*!
+ * \internal
+ * \brief Add colocations affecting a resource as dependent to a list
+ *
+ * Given a resource being assigned (\p orig_rsc) and a resource somewhere in
+ * its chain of ancestors (\p rsc, which may be \p orig_rsc), get
+ * colocations that affect the ancestor as dependent and should affect the
+ * resource, and add them to a given list.
+ *
+ *
+ * \param[in] rsc Resource whose colocations should be added
+ * \param[in] orig_rsc Affected resource (\p rsc or a descendant)
+ * \param[in,out] list List of colocations to add to
+ *
+ * \note All arguments should be non-NULL.
+ * \note The pcmk__this_with_colocations() wrapper should usually be used
+ * instead of using this method directly.
+ */
+ void (*this_with_colocations)(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+ /*!
+ * \internal
+ * \brief Update nodes with scores of colocated resources' nodes
+ *
+ * Given a table of nodes and a resource, update the nodes' scores with the
+ * scores of the best nodes matching the attribute used for each of the
+ * resource's relevant colocations.
+ *
+ * \param[in,out] rsc Resource to check colocations for
+ * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID)
+ * \param[in,out] nodes Nodes to update
+ * \param[in] attr Colocation attribute (NULL to use default)
+ * \param[in] factor Incorporate scores multiplied by this factor
+ * \param[in] flags Bitmask of enum pcmk__coloc_select values
+ *
+ * \note The caller remains responsible for freeing \p *nodes.
+ */
+ void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags);
+
+ /*!
+ * \internal
+ * \brief Apply a location constraint to a resource's allowed node scores
+ *
+ * \param[in,out] rsc Resource to apply constraint to
+ * \param[in,out] location Location constraint to apply
+ */
+ void (*apply_location)(pe_resource_t *rsc, pe__location_t *location);
+
+ /*!
+ * \internal
+ * \brief Return action flags for a given resource action
+ *
+ * \param[in,out] action Action to get flags for
+ * \param[in] node If not NULL, limit effects to this node
+ *
+ * \return Flags appropriate to \p action on \p node
+ * \note For primitives, this will be the same as action->flags regardless
+ * of node. For collective resources, the flags can differ due to
+ * multiple instances possibly being involved.
+ */
+ enum pe_action_flags (*action_flags)(pe_action_t *action,
+ const pe_node_t *node);
+
+ /*!
+ * \internal
+ * \brief Update two actions according to an ordering between them
+ *
+ * Given information about an ordering of two actions, update the actions'
+ * flags (and runnable_before members if appropriate) as appropriate for the
+ * ordering. Effects may cascade to other orderings involving the actions as
+ * well.
+ *
+ * \param[in,out] first 'First' action in an ordering
+ * \param[in,out] then 'Then' action in an ordering
+ * \param[in] node If not NULL, limit scope of ordering to this
+ * node (only used when interleaving instances)
+ * \param[in] flags Action flags for \p first for ordering purposes
+ * \param[in] filter Action flags to limit scope of certain updates
+ * (may include pe_action_optional to affect only
+ * mandatory actions, and pe_action_runnable to
+ * affect only runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+ uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set);
+
+ void (*output_actions)(pe_resource_t *rsc);
+
+ /*!
+ * \internal
+ * \brief Add a resource's actions to the transition graph
+ *
+ * \param[in,out] rsc Resource whose actions should be added
+ */
+ void (*add_actions_to_graph)(pe_resource_t *rsc);
+
+ /*!
+ * \internal
+ * \brief Add meta-attributes relevant to transition graph actions to XML
+ *
+ * If a given resource supports variant-specific meta-attributes that are
+ * needed for transition graph actions, add them to a given XML element.
+ *
+ * \param[in] rsc Resource whose meta-attributes should be added
+ * \param[in,out] xml Transition graph action attributes XML to add to
+ */
+ void (*add_graph_meta)(const pe_resource_t *rsc, xmlNode *xml);
+
+ /*!
+ * \internal
+ * \brief Add a resource's utilization to a table of utilization values
+ *
+ * This function is used when summing the utilization of a resource and all
+ * resources colocated with it, to determine whether a node has sufficient
+ * capacity. Given a resource and a table of utilization values, it will add
+ * the resource's utilization to the existing values, if the resource has
+ * not yet been allocated to a node.
+ *
+ * \param[in] rsc Resource with utilization to add
+ * \param[in] orig_rsc Resource being allocated (for logging only)
+ * \param[in] all_rscs List of all resources that will be summed
+ * \param[in,out] utilization Table of utilization values to add to
+ */
+ void (*add_utilization)(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization);
+
+ /*!
+ * \internal
+ * \brief Apply a shutdown lock for a resource, if appropriate
+ *
+ * \param[in,out] rsc Resource to check for shutdown lock
+ */
+ void (*shutdown_lock)(pe_resource_t *rsc);
+};
+
+// Actions (pcmk_sched_actions.c)
+
+G_GNUC_INTERNAL
+void pcmk__update_action_for_orderings(pe_action_t *action,
+ pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__log_action(const char *pre_text, const pe_action_t *action,
+ bool details);
+
+G_GNUC_INTERNAL
+pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name,
+ guint interval_ms, const pe_node_t *node);
+
+G_GNUC_INTERNAL
+pe_action_t *pcmk__new_shutdown_action(pe_node_t *node);
+
+G_GNUC_INTERNAL
+bool pcmk__action_locks_rsc_to_node(const pe_action_t *action);
+
+G_GNUC_INTERNAL
+void pcmk__deduplicate_action_inputs(pe_action_t *action);
+
+G_GNUC_INTERNAL
+void pcmk__output_actions(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
+ const xmlNode *xml_op);
+
+G_GNUC_INTERNAL
+void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set);
+
+
+// Recurring actions (pcmk_sched_recurring.c)
+
+G_GNUC_INTERNAL
+void pcmk__create_recurring_actions(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id,
+ const char *task, guint interval_ms,
+ const pe_node_t *node, const char *reason);
+
+G_GNUC_INTERNAL
+void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task,
+ guint interval_ms, pe_node_t *node);
+
+G_GNUC_INTERNAL
+bool pcmk__action_is_recurring(const pe_action_t *action);
+
+
+// Producing transition graphs (pcmk_graph_producer.c)
+
+G_GNUC_INTERNAL
+bool pcmk__graph_has_loop(const pe_action_t *init_action,
+ const pe_action_t *action,
+ pe_action_wrapper_t *input);
+
+G_GNUC_INTERNAL
+void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__create_graph(pe_working_set_t *data_set);
+
+
+// Fencing (pcmk_sched_fencing.c)
+
+G_GNUC_INTERNAL
+void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node,
+ pe_action_t *action, enum pe_ordering order);
+
+G_GNUC_INTERNAL
+void pcmk__fence_guest(pe_node_t *node);
+
+G_GNUC_INTERNAL
+bool pcmk__node_unfenced(const pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data);
+
+
+// Injected scheduler inputs (pcmk_sched_injections.c)
+
+void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
+ const pcmk_injections_t *injections);
+
+
+// Constraints of any type (pcmk_sched_constraints.c)
+
+G_GNUC_INTERNAL
+pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
+
+G_GNUC_INTERNAL
+xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
+ const pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+bool pcmk__valid_resource_or_tag(const pe_working_set_t *data_set,
+ const char *id, pe_resource_t **rsc,
+ pe_tag_t **tag);
+
+G_GNUC_INTERNAL
+bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
+ bool convert_rsc, const pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__create_internal_constraints(pe_working_set_t *data_set);
+
+
+// Location constraints
+
+G_GNUC_INTERNAL
+void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
+ int node_weight, const char *discover_mode,
+ pe_node_t *foo_node,
+ pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__apply_locations(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint);
+
+
+// Colocation constraints (pcmk_sched_colocation.c)
+
+enum pcmk__coloc_affects {
+ pcmk__coloc_affects_nothing = 0,
+ pcmk__coloc_affects_location,
+ pcmk__coloc_affects_role,
+};
+
+G_GNUC_INTERNAL
+enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool preview);
+
+G_GNUC_INTERNAL
+void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags);
+
+G_GNUC_INTERNAL
+void pcmk__add_dependent_scores(gpointer data, gpointer user_data);
+
+G_GNUC_INTERNAL
+void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__add_this_with_list(GList **list, GList *addition);
+
+G_GNUC_INTERNAL
+void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__add_with_this_list(GList **list, GList *addition);
+
+G_GNUC_INTERNAL
+void pcmk__new_colocation(const char *id, const char *node_attr, int score,
+ pe_resource_t *dependent, pe_resource_t *primary,
+ const char *dependent_role, const char *primary_role,
+ bool influence, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__block_colocation_dependents(pe_action_t *action,
+ pe_working_set_t *data_set);
+
+/*!
+ * \internal
+ * \brief Check whether colocation's dependent preferences should be considered
+ *
+ * \param[in] colocation Colocation constraint
+ * \param[in] rsc Primary instance (normally this will be
+ * colocation->primary, which NULL will be treated as,
+ * but for clones or bundles with multiple instances
+ * this can be a particular instance)
+ *
+ * \return true if colocation influence should be effective, otherwise false
+ */
+static inline bool
+pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
+ const pe_resource_t *rsc)
+{
+ if (rsc == NULL) {
+ rsc = colocation->primary;
+ }
+
+ /* A bundle replica colocates its remote connection with its container,
+ * using a finite score so that the container can run on Pacemaker Remote
+ * nodes.
+ *
+ * Moving a connection is lightweight and does not interrupt the service,
+ * while moving a container is heavyweight and does interrupt the service,
+ * so don't move a clean, active container based solely on the preferences
+ * of its connection.
+ *
+ * This also avoids problematic scenarios where two containers want to
+ * perpetually swap places.
+ */
+ if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes)
+ && !pcmk_is_set(rsc->flags, pe_rsc_failed)
+ && pcmk__list_of_1(rsc->running_on)) {
+ return false;
+ }
+
+ /* The dependent in a colocation influences the primary's location
+ * if the influence option is true or the primary is not yet active.
+ */
+ return colocation->influence || (rsc->running_on == NULL);
+}
+
+
+// Ordering constraints (pcmk_sched_ordering.c)
+
+G_GNUC_INTERNAL
+void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task,
+ pe_action_t *first_action, pe_resource_t *then_rsc,
+ char *then_task, pe_action_t *then_action,
+ uint32_t flags, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__order_stops_before_shutdown(pe_node_t *node,
+ pe_action_t *shutdown_op);
+
+G_GNUC_INTERNAL
+void pcmk__apply_orderings(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+void pcmk__order_after_each(pe_action_t *after, GList *list);
+
+
+/*!
+ * \internal
+ * \brief Create a new ordering between two resource actions
+ *
+ * \param[in,out] first_rsc Resource for 'first' action
+ * \param[in,out] first_task Action key for 'first' action
+ * \param[in] then_rsc Resource for 'then' action
+ * \param[in,out] then_task Action key for 'then' action
+ * \param[in] flags Bitmask of enum pe_ordering flags
+ */
+#define pcmk__order_resource_actions(first_rsc, first_task, \
+ then_rsc, then_task, flags) \
+ pcmk__new_ordering((first_rsc), \
+ pcmk__op_key((first_rsc)->id, (first_task), 0), \
+ NULL, \
+ (then_rsc), \
+ pcmk__op_key((then_rsc)->id, (then_task), 0), \
+ NULL, (flags), (first_rsc)->cluster)
+
+#define pcmk__order_starts(rsc1, rsc2, flags) \
+ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
+ (rsc2), CRMD_ACTION_START, (flags))
+
+#define pcmk__order_stops(rsc1, rsc2, flags) \
+ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
+ (rsc2), CRMD_ACTION_STOP, (flags))
+
+
+// Ticket constraints (pcmk_sched_tickets.c)
+
+G_GNUC_INTERNAL
+void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
+
+
+// Promotable clone resources (pcmk_sched_promotable.c)
+
+G_GNUC_INTERNAL
+void pcmk__add_promotion_scores(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__require_promotion_tickets(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__set_instance_roles(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__create_promotable_actions(pe_resource_t *clone);
+
+G_GNUC_INTERNAL
+void pcmk__promotable_restart_ordering(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__order_promotable_instances(pe_resource_t *clone);
+
+G_GNUC_INTERNAL
+void pcmk__update_dependent_with_promotable(const pe_resource_t *primary,
+ pe_resource_t *dependent,
+ const pcmk__colocation_t *colocation);
+
+G_GNUC_INTERNAL
+void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary,
+ pe_resource_t *dependent,
+ const pcmk__colocation_t *colocation);
+
+
+// Pacemaker Remote nodes (pcmk_sched_remote.c)
+
+G_GNUC_INTERNAL
+bool pcmk__is_failed_remote_node(const pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc,
+ const pe_node_t *node);
+
+G_GNUC_INTERNAL
+pe_node_t *pcmk__connection_host_for_action(const pe_action_t *action);
+
+G_GNUC_INTERNAL
+void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params);
+
+G_GNUC_INTERNAL
+void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action);
+
+
+// Primitives (pcmk_sched_primitive.c)
+
+G_GNUC_INTERNAL
+pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_create_actions(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_internal_constraints(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action,
+ const pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent);
+
+G_GNUC_INTERNAL
+void pcmk__with_primitive_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node,
+ bool optional);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList *all_rscs, GHashTable *utilization);
+
+G_GNUC_INTERNAL
+void pcmk__primitive_shutdown_lock(pe_resource_t *rsc);
+
+
+// Groups (pcmk_sched_group.c)
+
+G_GNUC_INTERNAL
+pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer);
+
+G_GNUC_INTERNAL
+void pcmk__group_create_actions(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__group_internal_constraints(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__group_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent);
+
+G_GNUC_INTERNAL
+void pcmk__with_group_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__group_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc,
+ const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags);
+
+G_GNUC_INTERNAL
+void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location);
+
+G_GNUC_INTERNAL
+enum pe_action_flags pcmk__group_action_flags(pe_action_t *action,
+ const pe_node_t *node);
+
+G_GNUC_INTERNAL
+uint32_t pcmk__group_update_ordered_actions(pe_action_t *first,
+ pe_action_t *then,
+ const pe_node_t *node,
+ uint32_t flags, uint32_t filter,
+ uint32_t type,
+ pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+GList *pcmk__group_colocated_resources(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList *colocated_rscs);
+
+G_GNUC_INTERNAL
+void pcmk__group_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization);
+
+G_GNUC_INTERNAL
+void pcmk__group_shutdown_lock(pe_resource_t *rsc);
+
+
+// Clones (pcmk_sched_clone.c)
+
+G_GNUC_INTERNAL
+pe_node_t *pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer);
+
+G_GNUC_INTERNAL
+void pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent);
+
+G_GNUC_INTERNAL
+void pcmk__with_clone_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__clone_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+// Bundles (pcmk_sched_bundle.c)
+
+G_GNUC_INTERNAL
+const pe_resource_t *pcmk__get_rsc_in_container(const pe_resource_t *instance);
+
+G_GNUC_INTERNAL
+void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent);
+
+G_GNUC_INTERNAL
+void pcmk__with_bundle_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__bundle_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list);
+
+G_GNUC_INTERNAL
+void pcmk__output_bundle_actions(pe_resource_t *rsc);
+
+
+// Clone instances or bundle replica containers (pcmk_sched_instances.c)
+
+G_GNUC_INTERNAL
+void pcmk__assign_instances(pe_resource_t *collective, GList *instances,
+ int max_total, int max_per_node);
+
+G_GNUC_INTERNAL
+void pcmk__create_instance_actions(pe_resource_t *rsc, GList *instances);
+
+G_GNUC_INTERNAL
+bool pcmk__instance_matches(const pe_resource_t *instance,
+ const pe_node_t *node, enum rsc_role_e role,
+ bool current);
+
+G_GNUC_INTERNAL
+pe_resource_t *pcmk__find_compatible_instance(const pe_resource_t *match_rsc,
+ const pe_resource_t *rsc,
+ enum rsc_role_e role,
+ bool current);
+
+G_GNUC_INTERNAL
+uint32_t pcmk__instance_update_ordered_actions(pe_action_t *first,
+ pe_action_t *then,
+ const pe_node_t *node,
+ uint32_t flags, uint32_t filter,
+ uint32_t type,
+ pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+enum pe_action_flags pcmk__collective_action_flags(pe_action_t *action,
+ const GList *instances,
+ const pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__add_collective_constraints(GList **list,
+ const pe_resource_t *instance,
+ const pe_resource_t *collective,
+ bool with_this);
+
+
+// Injections (pcmk_injections.c)
+
+G_GNUC_INTERNAL
+xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
+
+G_GNUC_INTERNAL
+xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
+ bool up);
+
+G_GNUC_INTERNAL
+xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
+ const char *resource,
+ const char *lrm_name,
+ const char *rclass,
+ const char *rtype,
+ const char *rprovider);
+
+G_GNUC_INTERNAL
+void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
+ const char *resource, const char *task,
+ guint interval_ms, int rc);
+
+G_GNUC_INTERNAL
+xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
+ lrmd_event_data_t *op, int target_rc);
+
+
+// Nodes (pcmk_sched_nodes.c)
+
+G_GNUC_INTERNAL
+bool pcmk__node_available(const pe_node_t *node, bool consider_score,
+ bool consider_guest);
+
+G_GNUC_INTERNAL
+bool pcmk__any_node_available(GHashTable *nodes);
+
+G_GNUC_INTERNAL
+GHashTable *pcmk__copy_node_table(GHashTable *nodes);
+
+G_GNUC_INTERNAL
+GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node);
+
+G_GNUC_INTERNAL
+void pcmk__apply_node_health(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc,
+ const pe_node_t *node);
+
+
+// Functions applying to more than one variant (pcmk_sched_resource.c)
+
+G_GNUC_INTERNAL
+void pcmk__set_allocation_methods(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
+ const xmlNode *rsc_entry, bool active_on_node);
+
+G_GNUC_INTERNAL
+GList *pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+GList *pcmk__colocated_resources(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList *colocated_rscs);
+
+G_GNUC_INTERNAL
+void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml);
+
+G_GNUC_INTERNAL
+void pcmk__output_resource_actions(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen,
+ bool force);
+
+G_GNUC_INTERNAL
+bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
+
+G_GNUC_INTERNAL
+void pcmk__unassign_resource(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+bool pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node,
+ pe_resource_t **failed);
+
+G_GNUC_INTERNAL
+void pcmk__sort_resources(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
+
+G_GNUC_INTERNAL
+gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
+
+
+// Functions related to probes (pcmk_sched_probes.c)
+
+G_GNUC_INTERNAL
+bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__order_probes(pe_working_set_t *data_set);
+
+G_GNUC_INTERNAL
+bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node);
+
+G_GNUC_INTERNAL
+void pcmk__schedule_probes(pe_working_set_t *data_set);
+
+
+// Functions related to live migration (pcmk_sched_migration.c)
+
+void pcmk__create_migration_actions(pe_resource_t *rsc,
+ const pe_node_t *current);
+
+void pcmk__abort_dangling_migration(void *data, void *user_data);
+
+bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current);
+
+void pcmk__order_migration_equivalents(pe__ordering_t *order);
+
+
+// Functions related to node utilization (pcmk_sched_utilization.c)
+
+G_GNUC_INTERNAL
+int pcmk__compare_node_capacities(const pe_node_t *node1,
+ const pe_node_t *node2);
+
+G_GNUC_INTERNAL
+void pcmk__consume_node_capacity(GHashTable *current_utilization,
+ const pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__release_node_capacity(GHashTable *current_utilization,
+ const pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc);
+
+G_GNUC_INTERNAL
+void pcmk__create_utilization_constraints(pe_resource_t *rsc,
+ const GList *allowed_nodes);
+
+G_GNUC_INTERNAL
+void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set);
+
+#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_acl.c b/lib/pacemaker/pcmk_acl.c
new file mode 100644
index 0000000..c2072dc
--- /dev/null
+++ b/lib/pacemaker/pcmk_acl.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xpath.h>
+#include <libxslt/transform.h>
+#include <libxslt/variables.h>
+#include <libxslt/xsltutils.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/internal.h>
+
+#include <pacemaker-internal.h>
+
+#define ACL_NS_PREFIX "http://clusterlabs.org/ns/pacemaker/access/"
+#define ACL_NS_Q_PREFIX "pcmk-access-"
+#define ACL_NS_Q_WRITABLE (const xmlChar *) ACL_NS_Q_PREFIX "writable"
+#define ACL_NS_Q_READABLE (const xmlChar *) ACL_NS_Q_PREFIX "readable"
+#define ACL_NS_Q_DENIED (const xmlChar *) ACL_NS_Q_PREFIX "denied"
+
+static const xmlChar *NS_WRITABLE = (const xmlChar *) ACL_NS_PREFIX "writable";
+static const xmlChar *NS_READABLE = (const xmlChar *) ACL_NS_PREFIX "readable";
+static const xmlChar *NS_DENIED = (const xmlChar *) ACL_NS_PREFIX "denied";
+
+/*!
+ * \brief This function takes a node and marks it with the namespace
+ * given in the ns parameter.
+ *
+ * \param[in,out] i_node
+ * \param[in] ns
+ * \param[in,out] ret
+ * \param[in,out] ns_recycle_writable
+ * \param[in,out] ns_recycle_readable
+ * \param[in,out] ns_recycle_denied
+ */
+static void
+pcmk__acl_mark_node_with_namespace(xmlNode *i_node, const xmlChar *ns, int *ret, xmlNs **ns_recycle_writable, xmlNs **ns_recycle_readable, xmlNs **ns_recycle_denied)
+{
+ if (ns == NS_WRITABLE)
+ {
+ if (*ns_recycle_writable == NULL)
+ {
+ *ns_recycle_writable = xmlNewNs(xmlDocGetRootElement(i_node->doc),
+ NS_WRITABLE, ACL_NS_Q_WRITABLE);
+ }
+ xmlSetNs(i_node, *ns_recycle_writable);
+ *ret = pcmk_rc_ok;
+ }
+ else if (ns == NS_READABLE)
+ {
+ if (*ns_recycle_readable == NULL)
+ {
+ *ns_recycle_readable = xmlNewNs(xmlDocGetRootElement(i_node->doc),
+ NS_READABLE, ACL_NS_Q_READABLE);
+ }
+ xmlSetNs(i_node, *ns_recycle_readable);
+ *ret = pcmk_rc_ok;
+ }
+ else if (ns == NS_DENIED)
+ {
+ if (*ns_recycle_denied == NULL)
+ {
+ *ns_recycle_denied = xmlNewNs(xmlDocGetRootElement(i_node->doc),
+ NS_DENIED, ACL_NS_Q_DENIED);
+ };
+ xmlSetNs(i_node, *ns_recycle_denied);
+ *ret = pcmk_rc_ok;
+ }
+}
+
+/*!
+ * \brief This function takes some XML, and annotates it with XML
+ * namespaces to indicate the ACL permissions.
+ *
+ * \param[in,out] xml_modify
+ *
+ * \return A standard Pacemaker return code
+ * Namely:
+ * - pcmk_rc_ok upon success,
+ * - pcmk_rc_already if ACLs were not applicable,
+ * - pcmk_rc_schema_validation if the validation schema version
+ * is unsupported (see note), or
+ * - EINVAL or ENOMEM as appropriate;
+ *
+ * \note This function is recursive
+ */
+static int
+pcmk__acl_annotate_permissions_recursive(xmlNode *xml_modify)
+{
+
+ static xmlNs *ns_recycle_writable = NULL,
+ *ns_recycle_readable = NULL,
+ *ns_recycle_denied = NULL;
+ static const xmlDoc *prev_doc = NULL;
+
+ xmlNode *i_node = NULL;
+ const xmlChar *ns;
+ int ret = EINVAL; // nodes have not been processed yet
+
+ if (prev_doc == NULL || prev_doc != xml_modify->doc) {
+ prev_doc = xml_modify->doc;
+ ns_recycle_writable = ns_recycle_readable = ns_recycle_denied = NULL;
+ }
+
+ for (i_node = xml_modify; i_node != NULL; i_node = i_node->next) {
+ switch (i_node->type) {
+ case XML_ELEMENT_NODE:
+ pcmk__set_xml_doc_flag(i_node, pcmk__xf_tracking);
+
+ if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_read)) {
+ ns = NS_DENIED;
+ } else if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_write)) {
+ ns = NS_READABLE;
+ } else {
+ ns = NS_WRITABLE;
+ }
+ pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied);
+ /* XXX recursion can be turned into plain iteration to save stack */
+ if (i_node->properties != NULL) {
+ /* this is not entirely clear, but relies on the very same
+ class-hierarchy emulation that libxml2 has firmly baked in
+ its API/ABI */
+ ret |= pcmk__acl_annotate_permissions_recursive((xmlNodePtr) i_node->properties);
+ }
+ if (i_node->children != NULL) {
+ ret |= pcmk__acl_annotate_permissions_recursive(i_node->children);
+ }
+ break;
+ case XML_ATTRIBUTE_NODE:
+ /* we can utilize that parent has already been assigned the ns */
+ if (!pcmk__check_acl(i_node->parent,
+ (const char *) i_node->name,
+ pcmk__xf_acl_read)) {
+ ns = NS_DENIED;
+ } else if (!pcmk__check_acl(i_node,
+ (const char *) i_node->name,
+ pcmk__xf_acl_write)) {
+ ns = NS_READABLE;
+ } else {
+ ns = NS_WRITABLE;
+ }
+ pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied);
+ break;
+ case XML_COMMENT_NODE:
+ /* we can utilize that parent has already been assigned the ns */
+ if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_read))
+ {
+ ns = NS_DENIED;
+ }
+ else if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_write))
+ {
+ ns = NS_READABLE;
+ }
+ else
+ {
+ ns = NS_WRITABLE;
+ }
+ pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+pcmk__acl_annotate_permissions(const char *cred, const xmlDoc *cib_doc,
+ xmlDoc **acl_evaled_doc)
+{
+ int ret, version;
+ xmlNode *target, *comment;
+ const char *validation;
+
+ CRM_CHECK(cred != NULL, return EINVAL);
+ CRM_CHECK(cib_doc != NULL, return EINVAL);
+ CRM_CHECK(acl_evaled_doc != NULL, return EINVAL);
+
+ /* avoid trivial accidental XML injection */
+ if (strpbrk(cred, "<>&") != NULL) {
+ return EINVAL;
+ }
+
+ if (!pcmk_acl_required(cred)) {
+ /* nothing to evaluate */
+ return pcmk_rc_already;
+ }
+
+ // @COMPAT xmlDocGetRootElement() requires non-const in libxml2 < 2.9.2
+
+ validation = crm_element_value(xmlDocGetRootElement((xmlDoc *) cib_doc),
+ XML_ATTR_VALIDATION);
+ version = get_schema_version(validation);
+ if (get_schema_version(PCMK__COMPAT_ACL_2_MIN_INCL) > version) {
+ return pcmk_rc_schema_validation;
+ }
+
+ target = copy_xml(xmlDocGetRootElement((xmlDoc *) cib_doc));
+ if (target == NULL) {
+ return EINVAL;
+ }
+
+ pcmk__enable_acl(target, target, cred);
+
+ ret = pcmk__acl_annotate_permissions_recursive(target);
+
+ if (ret == pcmk_rc_ok) {
+ char* credentials = crm_strdup_printf("ACLs as evaluated for user %s", cred);
+ comment = xmlNewDocComment(target->doc, (pcmkXmlStr) credentials);
+ free(credentials);
+ if (comment == NULL) {
+ xmlFreeNode(target);
+ return EINVAL;
+ }
+ xmlAddPrevSibling(xmlDocGetRootElement(target->doc), comment);
+ *acl_evaled_doc = target->doc;
+ return pcmk_rc_ok;
+ } else {
+ xmlFreeNode(target);
+ return ret; //for now, it should be some kind of error
+ }
+}
+
+int
+pcmk__acl_evaled_render(xmlDoc *annotated_doc, enum pcmk__acl_render_how how,
+ xmlChar **doc_txt_ptr)
+{
+ xmlDoc *xslt_doc;
+ xsltStylesheet *xslt;
+ xsltTransformContext *xslt_ctxt;
+ xmlDoc *res;
+ char *sfile;
+ static const char *params_namespace[] = {
+ "accessrendercfg:c-writable", ACL_NS_Q_PREFIX "writable:",
+ "accessrendercfg:c-readable", ACL_NS_Q_PREFIX "readable:",
+ "accessrendercfg:c-denied", ACL_NS_Q_PREFIX "denied:",
+ "accessrendercfg:c-reset", "",
+ "accessrender:extra-spacing", "no",
+ "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX,
+ NULL
+ }, *params_useansi[] = {
+ /* start with hard-coded defaults, then adapt per the template ones */
+ "accessrendercfg:c-writable", "\x1b[32m",
+ "accessrendercfg:c-readable", "\x1b[34m",
+ "accessrendercfg:c-denied", "\x1b[31m",
+ "accessrendercfg:c-reset", "\x1b[0m",
+ "accessrender:extra-spacing", "no",
+ "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX,
+ NULL
+ }, *params_noansi[] = {
+ "accessrendercfg:c-writable", "vvv---[ WRITABLE ]---vvv",
+ "accessrendercfg:c-readable", "vvv---[ READABLE ]---vvv",
+ "accessrendercfg:c-denied", "vvv---[ ~DENIED~ ]---vvv",
+ "accessrendercfg:c-reset", "",
+ "accessrender:extra-spacing", "yes",
+ "accessrender:self-reproducing-prefix", "",
+ NULL
+ };
+ const char **params;
+ int ret;
+ xmlParserCtxtPtr parser_ctxt;
+
+ /* unfortunately, the input (coming from CIB originally) was parsed with
+ blanks ignored, and since the output is a conversion of XML to text
+ format (we would be covered otherwise thanks to implicit
+ pretty-printing), we need to dump the tree to string output first,
+ only to subsequently reparse it -- this time with blanks honoured */
+ xmlChar *annotated_dump;
+ int dump_size;
+
+ CRM_ASSERT(how != pcmk__acl_render_none);
+
+ // Color is the default render mode for terminals; text is default otherwise
+ if (how == pcmk__acl_render_default) {
+ if (isatty(STDOUT_FILENO)) {
+ how = pcmk__acl_render_color;
+ } else {
+ how = pcmk__acl_render_text;
+ }
+ }
+
+ xmlDocDumpFormatMemory(annotated_doc, &annotated_dump, &dump_size, 1);
+ res = xmlReadDoc(annotated_dump, "on-the-fly-access-render", NULL,
+ XML_PARSE_NONET);
+ CRM_ASSERT(res != NULL);
+ xmlFree(annotated_dump);
+ xmlFreeDoc(annotated_doc);
+ annotated_doc = res;
+
+ sfile = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_base_xslt,
+ "access-render-2");
+ parser_ctxt = xmlNewParserCtxt();
+
+ CRM_ASSERT(sfile != NULL);
+ CRM_ASSERT(parser_ctxt != NULL);
+
+ xslt_doc = xmlCtxtReadFile(parser_ctxt, sfile, NULL, XML_PARSE_NONET);
+
+ xslt = xsltParseStylesheetDoc(xslt_doc); /* acquires xslt_doc! */
+ if (xslt == NULL) {
+ crm_crit("Problem in parsing %s", sfile);
+ return EINVAL;
+ }
+ free(sfile);
+ sfile = NULL;
+ xmlFreeParserCtxt(parser_ctxt);
+
+ xslt_ctxt = xsltNewTransformContext(xslt, annotated_doc);
+ CRM_ASSERT(xslt_ctxt != NULL);
+
+ switch (how) {
+ case pcmk__acl_render_namespace:
+ params = params_namespace;
+ break;
+ case pcmk__acl_render_text:
+ params = params_noansi;
+ break;
+ default:
+ /* pcmk__acl_render_color is the only remaining option.
+ * The compiler complains about params possibly uninitialized if we
+ * don't use default here.
+ */
+ params = params_useansi;
+ break;
+ }
+
+ xsltQuoteUserParams(xslt_ctxt, params);
+
+ res = xsltApplyStylesheetUser(xslt, annotated_doc, NULL,
+ NULL, NULL, xslt_ctxt);
+
+ xmlFreeDoc(annotated_doc);
+ annotated_doc = NULL;
+ xsltFreeTransformContext(xslt_ctxt);
+ xslt_ctxt = NULL;
+
+ if (how == pcmk__acl_render_color && params != params_useansi) {
+ char **param_i = (char **) params;
+ do {
+ free(*param_i);
+ } while (*param_i++ != NULL);
+ free(params);
+ }
+
+ if (res == NULL) {
+ ret = EINVAL;
+ } else {
+ int doc_txt_len;
+ int temp = xsltSaveResultToString(doc_txt_ptr, &doc_txt_len, res, xslt);
+ xmlFreeDoc(res);
+ if (temp == 0) {
+ ret = pcmk_rc_ok;
+ } else {
+ ret = EINVAL;
+ }
+ }
+ xsltFreeStylesheet(xslt);
+ return ret;
+}
diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c
new file mode 100644
index 0000000..6002cd4
--- /dev/null
+++ b/lib/pacemaker/pcmk_cluster_queries.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright 2020-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h> // gboolean, GMainLoop, etc.
+#include <libxml/tree.h> // xmlNode
+
+#include <pacemaker.h>
+#include <pacemaker-internal.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/cib/internal.h>
+#include <crm/msg_xml.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/iso8601.h>
+#include <crm/common/ipc_controld.h>
+#include <crm/common/ipc_pacemakerd.h>
+
+//! Object to store node info from the controller API
+typedef struct {
+ /* Adapted from pcmk_controld_api_reply_t:data:node_info.
+ * (char **) are convenient here for use within callbacks: we can skip
+ * copying strings unless the caller passes a non-NULL value.
+ */
+ uint32_t id;
+ char **node_name;
+ char **uuid;
+ char **state;
+ bool have_quorum;
+ bool is_remote;
+} node_info_t;
+
+//! Object to store API results, a timeout, and an output object
+typedef struct {
+ pcmk__output_t *out;
+ bool show_output;
+ int rc;
+ unsigned int message_timeout_ms;
+ enum pcmk_pacemakerd_state pcmkd_state;
+ node_info_t node_info;
+} data_t;
+
+/*!
+ * \internal
+ * \brief Validate that an IPC API event is a good reply
+ *
+ * \param[in,out] data API results and options
+ * \param[in] api IPC API connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+validate_reply_event(data_t *data, const pcmk_ipc_api_t *api,
+ enum pcmk_ipc_event event_type, crm_exit_t status)
+{
+ pcmk__output_t *out = data->out;
+
+ switch (event_type) {
+ case pcmk_ipc_event_reply:
+ break;
+
+ case pcmk_ipc_event_disconnect:
+ if (data->rc == ECONNRESET) { // Unexpected
+ out->err(out, "error: Lost connection to %s",
+ pcmk_ipc_name(api, true));
+ }
+ // Nothing bad but not the reply we're looking for
+ return ENOTSUP;
+
+ default:
+ // Ditto
+ return ENOTSUP;
+ }
+
+ if (status != CRM_EX_OK) {
+ out->err(out, "error: Bad reply from %s: %s",
+ pcmk_ipc_name(api, true), crm_exit_str(status));
+ data->rc = EBADMSG;
+ return data->rc;
+ }
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Validate that a controller API event is a good reply of expected type
+ *
+ * \param[in,out] data API results and options
+ * \param[in] api Controller connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in] event_data Event-specific data
+ * \param[in] expected_type Expected reply type
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+validate_controld_reply(data_t *data, const pcmk_ipc_api_t *api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ const void *event_data,
+ enum pcmk_controld_api_reply expected_type)
+{
+ pcmk__output_t *out = data->out;
+ int rc = pcmk_rc_ok;
+ const pcmk_controld_api_reply_t *reply = NULL;
+
+ rc = validate_reply_event(data, api, event_type, status);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ reply = (const pcmk_controld_api_reply_t *) event_data;
+
+ if (reply->reply_type != expected_type) {
+ out->err(out, "error: Unexpected reply type '%s' from controller",
+ pcmk__controld_api_reply2str(reply->reply_type));
+ data->rc = EBADMSG;
+ return data->rc;
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Validate that a \p pacemakerd API event is a good reply of expected
+ * type
+ *
+ * \param[in,out] data API results and options
+ * \param[in] api \p pacemakerd connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in] event_data Event-specific data
+ * \param[in] expected_type Expected reply type
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+validate_pcmkd_reply(data_t *data, const pcmk_ipc_api_t *api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ const void *event_data,
+ enum pcmk_pacemakerd_api_reply expected_type)
+{
+ pcmk__output_t *out = data->out;
+ const pcmk_pacemakerd_api_reply_t *reply = NULL;
+ int rc = validate_reply_event(data, api, event_type, status);
+
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ reply = (const pcmk_pacemakerd_api_reply_t *) event_data;
+
+ if (reply->reply_type != expected_type) {
+ out->err(out, "error: Unexpected reply type '%s' from pacemakerd",
+ pcmk__pcmkd_api_reply2str(reply->reply_type));
+ data->rc = EBADMSG;
+ return data->rc;
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Process a controller status IPC event
+ *
+ * \param[in,out] controld_api Controller connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing
+ * event-specific data
+ * \param[in,out] user_data \p data_t object for API results and options
+ */
+static void
+controller_status_event_cb(pcmk_ipc_api_t *controld_api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ void *event_data, void *user_data)
+{
+ data_t *data = (data_t *) user_data;
+ pcmk__output_t *out = data->out;
+ const pcmk_controld_api_reply_t *reply = NULL;
+
+ int rc = validate_controld_reply(data, controld_api, event_type, status,
+ event_data, pcmk_controld_reply_ping);
+
+ if (rc != pcmk_rc_ok) {
+ return;
+ }
+
+ reply = (const pcmk_controld_api_reply_t *) event_data;
+ out->message(out, "health",
+ reply->data.ping.sys_from, reply->host_from,
+ reply->data.ping.fsa_state, reply->data.ping.result);
+ data->rc = pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Process a designated controller IPC event
+ *
+ * \param[in,out] controld_api Controller connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing
+ * event-specific data
+ * \param[in,out] user_data \p data_t object for API results and options
+ */
+static void
+designated_controller_event_cb(pcmk_ipc_api_t *controld_api,
+ enum pcmk_ipc_event event_type,
+ crm_exit_t status, void *event_data,
+ void *user_data)
+{
+ data_t *data = (data_t *) user_data;
+ pcmk__output_t *out = data->out;
+ const pcmk_controld_api_reply_t *reply = NULL;
+
+ int rc = validate_controld_reply(data, controld_api, event_type, status,
+ event_data, pcmk_controld_reply_ping);
+
+ if (rc != pcmk_rc_ok) {
+ return;
+ }
+
+ reply = (const pcmk_controld_api_reply_t *) event_data;
+ out->message(out, "dc", reply->host_from);
+ data->rc = pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Process a node info IPC event
+ *
+ * \param[in,out] controld_api Controller connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing
+ * event-specific data
+ * \param[in,out] user_data \p data_t object for API results and options
+ */
+static void
+node_info_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type,
+ crm_exit_t status, void *event_data, void *user_data)
+{
+ data_t *data = (data_t *) user_data;
+ pcmk__output_t *out = data->out;
+
+ const pcmk_controld_api_reply_t *reply = NULL;
+
+ int rc = validate_controld_reply(data, controld_api, event_type, status,
+ event_data, pcmk_controld_reply_info);
+
+ if (rc != pcmk_rc_ok) {
+ return;
+ }
+
+ reply = (const pcmk_controld_api_reply_t *) event_data;
+
+ if (reply->data.node_info.uname == NULL) {
+ out->err(out, "Node is not known to cluster");
+ data->rc = pcmk_rc_node_unknown;
+ return;
+ }
+
+ data->node_info.have_quorum = reply->data.node_info.have_quorum;
+ data->node_info.is_remote = reply->data.node_info.is_remote;
+ data->node_info.id = (uint32_t) reply->data.node_info.id;
+
+ pcmk__str_update(data->node_info.node_name, reply->data.node_info.uname);
+ pcmk__str_update(data->node_info.uuid, reply->data.node_info.uuid);
+ pcmk__str_update(data->node_info.state, reply->data.node_info.state);
+
+ if (data->show_output) {
+ out->message(out, "node-info",
+ reply->data.node_info.id, reply->data.node_info.uname,
+ reply->data.node_info.uuid, reply->data.node_info.state,
+ reply->data.node_info.have_quorum,
+ reply->data.node_info.is_remote);
+ }
+
+ data->rc = pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Process a \p pacemakerd status IPC event
+ *
+ * \param[in,out] pacemakerd_api \p pacemakerd connection
+ * \param[in] event_type Type of event that occurred
+ * \param[in] status Event status
+ * \param[in,out] event_data \p pcmk_pacemakerd_api_reply_t object
+ * containing event-specific data
+ * \param[in,out] user_data \p data_t object for API results and options
+ */
+static void
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ void *event_data, void *user_data)
+{
+ data_t *data = user_data;
+ pcmk__output_t *out = data->out;
+ const pcmk_pacemakerd_api_reply_t *reply = NULL;
+
+ int rc = validate_pcmkd_reply(data, pacemakerd_api, event_type, status,
+ event_data, pcmk_pacemakerd_reply_ping);
+
+ if (rc != pcmk_rc_ok) {
+ return;
+ }
+
+ // Parse desired information from reply
+ reply = (const pcmk_pacemakerd_api_reply_t *) event_data;
+
+ data->pcmkd_state = reply->data.ping.state;
+ data->rc = pcmk_rc_ok;
+
+ if (!data->show_output) {
+ return;
+ }
+
+ if (reply->data.ping.status == pcmk_rc_ok) {
+ out->message(out, "pacemakerd-health",
+ reply->data.ping.sys_from, reply->data.ping.state, NULL,
+ reply->data.ping.last_good);
+ } else {
+ out->message(out, "pacemakerd-health",
+ reply->data.ping.sys_from, reply->data.ping.state,
+ "query failed", time(NULL));
+ }
+}
+
+static pcmk_ipc_api_t *
+ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb,
+ enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok)
+{
+ int rc;
+ pcmk__output_t *out = data->out;
+ pcmk_ipc_api_t *api = NULL;
+
+ rc = pcmk_new_ipc_api(&api, server);
+ if (api == NULL) {
+ out->err(out, "error: Could not connect to %s: %s",
+ pcmk_ipc_name(api, true),
+ pcmk_rc_str(rc));
+ data->rc = rc;
+ return NULL;
+ }
+ if (cb != NULL) {
+ pcmk_register_ipc_callback(api, cb, data);
+ }
+
+ rc = pcmk_connect_ipc(api, dispatch_type);
+
+ if (rc != pcmk_rc_ok) {
+ if (rc == EREMOTEIO) {
+ data->pcmkd_state = pcmk_pacemakerd_state_remote;
+ if (eremoteio_ok) {
+ /* EREMOTEIO may be expected and acceptable for some callers
+ * on a Pacemaker Remote node
+ */
+ rc = pcmk_rc_ok;
+ } else {
+ out->err(out, "error: Could not connect to %s: %s",
+ pcmk_ipc_name(api, true), pcmk_rc_str(rc));
+ }
+ }
+ data->rc = rc;
+ pcmk_free_ipc_api(api);
+ return NULL;
+ }
+
+ return api;
+}
+
+/*!
+ * \internal
+ * \brief Poll an IPC API connection until timeout or a reply is received
+ *
+ * \param[in,out] data API results and options
+ * \param[in,out] api IPC API connection
+ * \param[in] on_node If not \p NULL, name of the node to poll (used only
+ * for logging)
+ *
+ * \note Sets the \p rc member of \p data on error
+ */
+static void
+poll_until_reply(data_t *data, pcmk_ipc_api_t *api, const char *on_node)
+{
+ pcmk__output_t *out = data->out;
+
+ uint64_t start_nsec = qb_util_nano_current_get();
+ uint64_t end_nsec = start_nsec;
+ uint64_t elapsed_ms = 0;
+ uint64_t remaining_ms = data->message_timeout_ms;
+
+ while (remaining_ms > 0) {
+ int rc = pcmk_poll_ipc(api, remaining_ms);
+
+ if (rc == EAGAIN) {
+ // Poll timed out
+ break;
+ }
+
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "error: Failed to poll %s API%s%s: %s",
+ pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "",
+ pcmk__s(on_node, ""), pcmk_rc_str(rc));
+ data->rc = rc;
+ return;
+ }
+
+ pcmk_dispatch_ipc(api);
+
+ if (data->rc != EAGAIN) {
+ // Received a reply
+ return;
+ }
+ end_nsec = qb_util_nano_current_get();
+ elapsed_ms = (end_nsec - start_nsec) / QB_TIME_NS_IN_MSEC;
+ remaining_ms = data->message_timeout_ms - elapsed_ms;
+ }
+
+ out->err(out,
+ "error: Timed out after %ums waiting for reply from %s API%s%s",
+ data->message_timeout_ms, pcmk_ipc_name(api, true),
+ (on_node != NULL)? " on " : "", pcmk__s(on_node, ""));
+ data->rc = EAGAIN;
+}
+
+/*!
+ * \internal
+ * \brief Get and output controller status
+ *
+ * \param[in,out] out Output object
+ * \param[in] node_name Name of node whose status is desired
+ * (\p NULL for DC)
+ * \param[in] message_timeout_ms How long to wait for a reply from the
+ * \p pacemaker-controld API. If 0,
+ * \p pcmk_ipc_dispatch_sync will be used.
+ * Otherwise, \p pcmk_ipc_dispatch_poll will
+ * be used.
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__controller_status(pcmk__output_t *out, const char *node_name,
+ unsigned int message_timeout_ms)
+{
+ data_t data = {
+ .out = out,
+ .rc = EAGAIN,
+ .message_timeout_ms = message_timeout_ms,
+ };
+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll;
+ pcmk_ipc_api_t *controld_api = NULL;
+
+ if (message_timeout_ms == 0) {
+ dispatch_type = pcmk_ipc_dispatch_sync;
+ }
+ controld_api = ipc_connect(&data, pcmk_ipc_controld,
+ controller_status_event_cb, dispatch_type,
+ false);
+
+ if (controld_api != NULL) {
+ int rc = pcmk_controld_api_ping(controld_api, node_name);
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "error: Could not ping controller API on %s: %s",
+ pcmk__s(node_name, "DC"), pcmk_rc_str(rc));
+ data.rc = rc;
+ }
+
+ if (dispatch_type == pcmk_ipc_dispatch_poll) {
+ poll_until_reply(&data, controld_api, pcmk__s(node_name, "DC"));
+ }
+ pcmk_free_ipc_api(controld_api);
+ }
+
+ return data.rc;
+}
+
+
+// Documented in header
+int
+pcmk_controller_status(xmlNodePtr *xml, const char *node_name,
+ unsigned int message_timeout_ms)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__controller_status(out, node_name, message_timeout_ms);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Get and output designated controller node name
+ *
+ * \param[in,out] out Output object
+ * \param[in] message_timeout_ms How long to wait for a reply from the
+ * \p pacemaker-controld API. If 0,
+ * \p pcmk_ipc_dispatch_sync will be used.
+ * Otherwise, \p pcmk_ipc_dispatch_poll will
+ * be used.
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__designated_controller(pcmk__output_t *out,
+ unsigned int message_timeout_ms)
+{
+ data_t data = {
+ .out = out,
+ .rc = EAGAIN,
+ .message_timeout_ms = message_timeout_ms,
+ };
+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll;
+ pcmk_ipc_api_t *controld_api = NULL;
+
+ if (message_timeout_ms == 0) {
+ dispatch_type = pcmk_ipc_dispatch_sync;
+ }
+ controld_api = ipc_connect(&data, pcmk_ipc_controld,
+ designated_controller_event_cb, dispatch_type,
+ false);
+
+ if (controld_api != NULL) {
+ int rc = pcmk_controld_api_ping(controld_api, NULL);
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "error: Could not ping controller API on DC: %s",
+ pcmk_rc_str(rc));
+ data.rc = rc;
+ }
+
+ if (dispatch_type == pcmk_ipc_dispatch_poll) {
+ poll_until_reply(&data, controld_api, "DC");
+ }
+ pcmk_free_ipc_api(controld_api);
+ }
+
+ return data.rc;
+}
+
+// Documented in header
+int
+pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__designated_controller(out, message_timeout_ms);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Get and optionally output node info corresponding to a node ID from
+ * the controller
+ *
+ * \param[in,out] out Output object
+ * \param[in,out] node_id ID of node whose name to get. If \p NULL
+ * or 0, get the local node name. If not
+ * \p NULL, store the true node ID here on
+ * success.
+ * \param[out] node_name If not \p NULL, where to store the node
+ * name
+ * \param[out] uuid If not \p NULL, where to store the node
+ * UUID
+ * \param[out] state If not \p NULL, where to store the
+ * membership state
+ * \param[out] is_remote If not \p NULL, where to store whether the
+ * node is a Pacemaker Remote node
+ * \param[out] have_quorum If not \p NULL, where to store whether the
+ * node has quorum
+ * \param[in] show_output Whether to show the node info
+ * \param[in] message_timeout_ms How long to wait for a reply from the
+ * \p pacemaker-controld API. If 0,
+ * \p pcmk_ipc_dispatch_sync will be used.
+ * Otherwise, \p pcmk_ipc_dispatch_poll will
+ * be used.
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note The caller is responsible for freeing \p *node_name, \p *uuid, and
+ * \p *state using \p free().
+ */
+int
+pcmk__query_node_info(pcmk__output_t *out, uint32_t *node_id, char **node_name,
+ char **uuid, char **state, bool *have_quorum,
+ bool *is_remote, bool show_output,
+ unsigned int message_timeout_ms)
+{
+ data_t data = {
+ .out = out,
+ .show_output = show_output,
+ .rc = EAGAIN,
+ .message_timeout_ms = message_timeout_ms,
+ .node_info = {
+ .id = (node_id == NULL)? 0 : *node_id,
+ .node_name = node_name,
+ .uuid = uuid,
+ .state = state,
+ },
+ };
+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll;
+ pcmk_ipc_api_t *controld_api = NULL;
+
+ if (node_name != NULL) {
+ *node_name = NULL;
+ }
+ if (uuid != NULL) {
+ *uuid = NULL;
+ }
+ if (state != NULL) {
+ *state = NULL;
+ }
+
+ if (message_timeout_ms == 0) {
+ dispatch_type = pcmk_ipc_dispatch_sync;
+ }
+ controld_api = ipc_connect(&data, pcmk_ipc_controld, node_info_event_cb,
+ dispatch_type, false);
+
+ if (controld_api != NULL) {
+ int rc = pcmk_controld_api_node_info(controld_api,
+ (node_id != NULL)? *node_id : 0);
+
+ if (rc != pcmk_rc_ok) {
+ out->err(out,
+ "error: Could not send request to controller API on local "
+ "node: %s", pcmk_rc_str(rc));
+ data.rc = rc;
+ }
+
+ if (dispatch_type == pcmk_ipc_dispatch_poll) {
+ poll_until_reply(&data, controld_api, "local node");
+ }
+ pcmk_free_ipc_api(controld_api);
+ }
+
+ if (data.rc != pcmk_rc_ok) {
+ return data.rc;
+ }
+
+ // String outputs are set in callback
+ if (node_id != NULL) {
+ *node_id = data.node_info.id;
+ }
+ if (have_quorum != NULL) {
+ *have_quorum = data.node_info.have_quorum;
+ }
+ if (is_remote != NULL) {
+ *is_remote = data.node_info.is_remote;
+ }
+
+ return data.rc;
+}
+
+// Documented in header
+int
+pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name,
+ char **uuid, char **state, bool *have_quorum,
+ bool *is_remote, bool show_output,
+ unsigned int message_timeout_ms)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ CRM_ASSERT(node_name != NULL);
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__query_node_info(out, node_id, node_name, uuid, state,
+ have_quorum, is_remote, show_output,
+ message_timeout_ms);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Get and optionally output \p pacemakerd status
+ *
+ * \param[in,out] out Output object
+ * \param[in] ipc_name IPC name for request
+ * \param[in] message_timeout_ms How long to wait for a reply from the
+ * \p pacemakerd API. If 0,
+ * \p pcmk_ipc_dispatch_sync will be used.
+ * Otherwise, \p pcmk_ipc_dispatch_poll will
+ * be used.
+ * \param[in] show_output Whether to output the \p pacemakerd state
+ * \param[out] state Where to store the \p pacemakerd state, if
+ * not \p NULL
+ *
+ * \return Standard Pacemaker return code
+ *
+ * \note This function sets \p state to \p pcmk_pacemakerd_state_remote and
+ * returns \p pcmk_rc_ok if the IPC connection attempt returns
+ * \p EREMOTEIO. That code indicates that this is a Pacemaker Remote node
+ * with \p pacemaker-remoted running. The node may be connected to the
+ * cluster.
+ */
+int
+pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name,
+ unsigned int message_timeout_ms, bool show_output,
+ enum pcmk_pacemakerd_state *state)
+{
+ data_t data = {
+ .out = out,
+ .show_output = show_output,
+ .rc = EAGAIN,
+ .message_timeout_ms = message_timeout_ms,
+ .pcmkd_state = pcmk_pacemakerd_state_invalid,
+ };
+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll;
+ pcmk_ipc_api_t *pacemakerd_api = NULL;
+
+ if (message_timeout_ms == 0) {
+ dispatch_type = pcmk_ipc_dispatch_sync;
+ }
+ pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd,
+ pacemakerd_event_cb, dispatch_type, true);
+
+ if (pacemakerd_api != NULL) {
+ int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name);
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "error: Could not ping launcher API: %s",
+ pcmk_rc_str(rc));
+ data.rc = rc;
+ }
+
+ if (dispatch_type == pcmk_ipc_dispatch_poll) {
+ poll_until_reply(&data, pacemakerd_api, NULL);
+ }
+ pcmk_free_ipc_api(pacemakerd_api);
+
+ } else if ((data.pcmkd_state == pcmk_pacemakerd_state_remote)
+ && show_output) {
+ // No API connection so the callback wasn't run
+ out->message(out, "pacemakerd-health",
+ NULL, data.pcmkd_state, NULL, time(NULL));
+ }
+
+ if (state != NULL) {
+ *state = data.pcmkd_state;
+ }
+ return data.rc;
+}
+
+// Documented in header
+int
+pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name,
+ unsigned int message_timeout_ms)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__pacemakerd_status(out, ipc_name, message_timeout_ms, true, NULL);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+
+/* user data for looping through remote node xpath searches */
+struct node_data {
+ pcmk__output_t *out;
+ int found;
+ const char *field; /* XML attribute to check for node name */
+ const char *type;
+ gboolean bash_export;
+};
+
+static void
+remote_node_print_helper(xmlNode *result, void *user_data)
+{
+ struct node_data *data = user_data;
+ pcmk__output_t *out = data->out;
+ const char *name = crm_element_value(result, XML_ATTR_UNAME);
+ const char *id = crm_element_value(result, data->field);
+
+ // node name and node id are the same for remote/guest nodes
+ out->message(out, "crmadmin-node", data->type,
+ name ? name : id,
+ id,
+ data->bash_export);
+ data->found++;
+}
+
+// \return Standard Pacemaker return code
+int
+pcmk__list_nodes(pcmk__output_t *out, const char *node_types,
+ gboolean bash_export)
+{
+ xmlNode *xml_node = NULL;
+ int rc;
+
+ rc = cib__signon_query(out, NULL, &xml_node);
+
+ if (rc == pcmk_rc_ok) {
+ struct node_data data = {
+ .out = out,
+ .found = 0,
+ .bash_export = bash_export
+ };
+
+ out->begin_list(out, NULL, NULL, "nodes");
+
+ if (!pcmk__str_empty(node_types) && strstr(node_types, "all")) {
+ node_types = NULL;
+ }
+
+ if (pcmk__str_empty(node_types) || strstr(node_types, "cluster")) {
+ data.field = "id";
+ data.type = "cluster";
+ crm_foreach_xpath_result(xml_node, PCMK__XP_MEMBER_NODE_CONFIG,
+ remote_node_print_helper, &data);
+ }
+
+ if (pcmk__str_empty(node_types) || strstr(node_types, "guest")) {
+ data.field = "value";
+ data.type = "guest";
+ crm_foreach_xpath_result(xml_node, PCMK__XP_GUEST_NODE_CONFIG,
+ remote_node_print_helper, &data);
+ }
+
+ if (pcmk__str_empty(node_types) || !pcmk__strcmp(node_types, ",|^remote", pcmk__str_regex)) {
+ data.field = "id";
+ data.type = "remote";
+ crm_foreach_xpath_result(xml_node, PCMK__XP_REMOTE_NODE_CONFIG,
+ remote_node_print_helper, &data);
+ }
+
+ out->end_list(out);
+
+ if (data.found == 0) {
+ out->info(out, "No nodes configured");
+ }
+
+ free_xml(xml_node);
+ }
+
+ return rc;
+}
+
+int
+pcmk_list_nodes(xmlNodePtr *xml, const char *node_types)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__list_nodes(out, node_types, FALSE);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
new file mode 100644
index 0000000..7a0490f
--- /dev/null
+++ b/lib/pacemaker/pcmk_fence.c
@@ -0,0 +1,626 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/results.h>
+#include <crm/common/output.h>
+#include <crm/common/output_internal.h>
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+
+#include <glib.h>
+#include <libxml/tree.h>
+#include <pacemaker.h>
+#include <pacemaker-internal.h>
+
+static const int st_opts = st_opt_sync_call | st_opt_allow_suicide;
+
+static GMainLoop *mainloop = NULL;
+
+static struct {
+ stonith_t *st;
+ const char *target;
+ const char *action;
+ char *name;
+ unsigned int timeout;
+ unsigned int tolerance;
+ int delay;
+ pcmk__action_result_t result;
+} async_fence_data = { NULL, };
+
+static int
+handle_level(stonith_t *st, const char *target, int fence_level,
+ const stonith_key_value_t *devices, bool added)
+{
+ const char *node = NULL;
+ const char *pattern = NULL;
+ const char *name = NULL;
+ char *value = NULL;
+ int rc = pcmk_rc_ok;
+
+ if (target == NULL) {
+ // Not really possible, but makes static analysis happy
+ return EINVAL;
+ }
+
+ /* Determine if targeting by attribute, node name pattern or node name */
+ value = strchr(target, '=');
+ if (value != NULL) {
+ name = target;
+ *value++ = '\0';
+ } else if (*target == '@') {
+ pattern = target + 1;
+ } else {
+ node = target;
+ }
+
+ /* Register or unregister level as appropriate */
+ if (added) {
+ rc = st->cmds->register_level_full(st, st_opts, node, pattern,
+ name, value, fence_level,
+ devices);
+ } else {
+ rc = st->cmds->remove_level_full(st, st_opts, node, pattern,
+ name, value, fence_level);
+ }
+
+ return pcmk_legacy2rc(rc);
+}
+
+static stonith_history_t *
+reduce_fence_history(stonith_history_t *history)
+{
+ stonith_history_t *new, *hp, *np;
+
+ if (!history) {
+ return history;
+ }
+
+ new = history;
+ hp = new->next;
+ new->next = NULL;
+
+ while (hp) {
+ stonith_history_t *hp_next = hp->next;
+
+ hp->next = NULL;
+
+ for (np = new; ; np = np->next) {
+ if ((hp->state == st_done) || (hp->state == st_failed)) {
+ /* action not in progress */
+ if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) &&
+ pcmk__str_eq(hp->action, np->action, pcmk__str_none) &&
+ (hp->state == np->state) &&
+ ((hp->state == st_done) ||
+ pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) {
+ /* purge older hp */
+ stonith_history_free(hp);
+ break;
+ }
+ }
+
+ if (!np->next) {
+ np->next = hp;
+ break;
+ }
+ }
+ hp = hp_next;
+ }
+
+ return new;
+}
+
+static void
+notify_callback(stonith_t * st, stonith_event_t * e)
+{
+ if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
+
+ pcmk__set_result(&async_fence_data.result,
+ stonith__event_exit_status(e),
+ stonith__event_execution_status(e),
+ stonith__event_exit_reason(e));
+ g_main_loop_quit(mainloop);
+ }
+}
+
+static void
+fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
+{
+ pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
+ stonith__execution_status(data),
+ stonith__exit_reason(data));
+ g_main_loop_quit(mainloop);
+}
+
+static gboolean
+async_fence_helper(gpointer user_data)
+{
+ stonith_t *st = async_fence_data.st;
+ int call_id = 0;
+ int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
+
+ if (rc != pcmk_ok) {
+ g_main_loop_quit(mainloop);
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
+ PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
+ return TRUE;
+ }
+
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);
+
+ call_id = st->cmds->fence_with_delay(st,
+ st_opt_allow_suicide,
+ async_fence_data.target,
+ async_fence_data.action,
+ async_fence_data.timeout/1000,
+ async_fence_data.tolerance/1000,
+ async_fence_data.delay);
+
+ if (call_id < 0) {
+ g_main_loop_quit(mainloop);
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
+ PCMK_EXEC_ERROR, pcmk_strerror(call_id));
+ return TRUE;
+ }
+
+ st->cmds->register_callback(st,
+ call_id,
+ (async_fence_data.timeout/1000
+ + (async_fence_data.delay > 0 ? async_fence_data.delay : 0)),
+ st_opt_timeout_updates, NULL, "callback", fence_callback);
+
+ return TRUE;
+}
+
+int
+pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason)
+{
+ crm_trigger_t *trig;
+ int rc = pcmk_rc_ok;
+
+ async_fence_data.st = st;
+ async_fence_data.name = strdup(name);
+ async_fence_data.target = target;
+ async_fence_data.action = action;
+ async_fence_data.timeout = timeout;
+ async_fence_data.tolerance = tolerance;
+ async_fence_data.delay = delay;
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
+ NULL);
+
+ trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
+ mainloop_set_trigger(trig);
+
+ mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(mainloop);
+
+ free(async_fence_data.name);
+
+ if (reason != NULL) {
+ // Give the caller ownership of the exit reason
+ *reason = async_fence_data.result.exit_reason;
+ async_fence_data.result.exit_reason = NULL;
+ }
+ rc = stonith__result2rc(&async_fence_data.result);
+ pcmk__reset_result(&async_fence_data.result);
+ return rc;
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason)
+{
+ return pcmk__request_fencing(st, target, action, name, timeout, tolerance,
+ delay, reason);
+}
+#endif
+
+int
+pcmk__fence_history(pcmk__output_t *out, stonith_t *st, const char *target,
+ unsigned int timeout, int verbose, bool broadcast,
+ bool cleanup)
+{
+ stonith_history_t *history = NULL, *hp, *latest = NULL;
+ int rc = pcmk_rc_ok;
+ int opts = 0;
+
+ if (cleanup) {
+ out->info(out, "cleaning up fencing-history%s%s",
+ target ? " for node " : "", target ? target : "");
+ }
+ if (broadcast) {
+ out->info(out, "gather fencing-history from all nodes");
+ }
+
+ stonith__set_call_options(opts, target, st_opts);
+ if (cleanup) {
+ stonith__set_call_options(opts, target, st_opt_cleanup);
+ }
+ if (broadcast) {
+ stonith__set_call_options(opts, target, st_opt_broadcast);
+ }
+ rc = st->cmds->history(st, opts,
+ pcmk__str_eq(target, "*", pcmk__str_none)? NULL : target,
+ &history, timeout/1000);
+
+ if (cleanup) {
+ // Cleanup doesn't return a history list
+ stonith_history_free(history);
+ return pcmk_legacy2rc(rc);
+ }
+
+ out->begin_list(out, "event", "events", "Fencing history");
+
+ history = stonith__sort_history(history);
+ for (hp = history; hp; hp = hp->next) {
+ if (hp->state == st_done) {
+ latest = hp;
+ }
+
+ if (out->is_quiet(out) || !verbose) {
+ continue;
+ }
+
+ out->message(out, "stonith-event", hp, true, false,
+ stonith__later_succeeded(hp, history),
+ (uint32_t) pcmk_show_failed_detail);
+ out->increment_list(out);
+ }
+
+ if (latest) {
+ if (out->is_quiet(out)) {
+ out->message(out, "stonith-event", latest, false, true, NULL,
+ (uint32_t) pcmk_show_failed_detail);
+ } else if (!verbose) { // already printed if verbose
+ out->message(out, "stonith-event", latest, false, false, NULL,
+ (uint32_t) pcmk_show_failed_detail);
+ out->increment_list(out);
+ }
+ }
+
+ out->end_list(out);
+
+ stonith_history_free(history);
+ return pcmk_legacy2rc(rc);
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_history(xmlNodePtr *xml, stonith_t *st, const char *target,
+ unsigned int timeout, bool quiet, int verbose,
+ bool broadcast, bool cleanup)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ out->quiet = quiet;
+
+ rc = pcmk__fence_history(out, st, target, timeout, verbose, broadcast, cleanup);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_installed(pcmk__output_t *out, stonith_t *st, unsigned int timeout)
+{
+ stonith_key_value_t *devices = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout/1000);
+ /* list_agents returns a negative error code or a positive number of agents. */
+ if (rc < 0) {
+ return pcmk_legacy2rc(rc);
+ }
+
+ out->begin_list(out, "fence device", "fence devices", "Installed fence devices");
+ for (stonith_key_value_t *dIter = devices; dIter; dIter = dIter->next) {
+ out->list_item(out, "device", "%s", dIter->value);
+ }
+ out->end_list(out);
+
+ stonith_key_value_freeall(devices, 1, 1);
+ return pcmk_rc_ok;
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_installed(xmlNodePtr *xml, stonith_t *st, unsigned int timeout)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_installed(out, st, timeout);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_last(pcmk__output_t *out, const char *target, bool as_nodeid)
+{
+ time_t when = 0;
+
+ if (target == NULL) {
+ return pcmk_rc_ok;
+ }
+
+ if (as_nodeid) {
+ when = stonith_api_time(atol(target), NULL, FALSE);
+ } else {
+ when = stonith_api_time(0, target, FALSE);
+ }
+
+ return out->message(out, "last-fenced", target, when);
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_last(out, target, as_nodeid);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_list_targets(pcmk__output_t *out, stonith_t *st,
+ const char *device_id, unsigned int timeout)
+{
+ GList *targets = NULL;
+ char *lists = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = st->cmds->list(st, st_opts, device_id, &lists, timeout/1000);
+ if (rc != pcmk_rc_ok) {
+ return pcmk_legacy2rc(rc);
+ }
+
+ targets = stonith__parse_targets(lists);
+
+ out->begin_list(out, "fence target", "fence targets", "Fence Targets");
+ while (targets != NULL) {
+ out->list_item(out, NULL, "%s", (const char *) targets->data);
+ targets = targets->next;
+ }
+ out->end_list(out);
+
+ free(lists);
+ return rc;
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_list_targets(xmlNodePtr *xml, stonith_t *st, const char *device_id,
+ unsigned int timeout)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_list_targets(out, st, device_id, timeout);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_metadata(pcmk__output_t *out, stonith_t *st, const char *agent,
+ unsigned int timeout)
+{
+ char *buffer = NULL;
+ int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer,
+ timeout/1000);
+
+ if (rc != pcmk_rc_ok) {
+ return pcmk_legacy2rc(rc);
+ }
+
+ out->output_xml(out, "metadata", buffer);
+ free(buffer);
+ return rc;
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_metadata(xmlNodePtr *xml, stonith_t *st, const char *agent,
+ unsigned int timeout)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_metadata(out, st, agent, timeout);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_registered(pcmk__output_t *out, stonith_t *st, const char *target,
+ unsigned int timeout)
+{
+ stonith_key_value_t *devices = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = st->cmds->query(st, st_opts, target, &devices, timeout/1000);
+ /* query returns a negative error code or a positive number of results. */
+ if (rc < 0) {
+ return pcmk_legacy2rc(rc);
+ }
+
+ out->begin_list(out, "fence device", "fence devices", "Registered fence devices");
+ for (stonith_key_value_t *dIter = devices; dIter; dIter = dIter->next) {
+ out->list_item(out, "device", "%s", dIter->value);
+ }
+ out->end_list(out);
+
+ stonith_key_value_freeall(devices, 1, 1);
+
+ /* Return pcmk_rc_ok here, not the number of results. Callers probably
+ * don't care.
+ */
+ return pcmk_rc_ok;
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_registered(xmlNodePtr *xml, stonith_t *st, const char *target,
+ unsigned int timeout)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_registered(out, st, target, timeout);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__fence_register_level(stonith_t *st, const char *target, int fence_level,
+ const stonith_key_value_t *devices)
+{
+ return handle_level(st, target, fence_level, devices, true);
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_register_level(stonith_t *st, const char *target, int fence_level,
+ const stonith_key_value_t *devices)
+{
+ return pcmk__fence_register_level(st, target, fence_level, devices);
+}
+#endif
+
+int
+pcmk__fence_unregister_level(stonith_t *st, const char *target, int fence_level)
+{
+ return handle_level(st, target, fence_level, NULL, false);
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_unregister_level(stonith_t *st, const char *target, int fence_level)
+{
+ return pcmk__fence_unregister_level(st, target, fence_level);
+}
+#endif
+
+int
+pcmk__fence_validate(pcmk__output_t *out, stonith_t *st, const char *agent,
+ const char *id, const stonith_key_value_t *params,
+ unsigned int timeout)
+{
+ char *output = NULL;
+ char *error_output = NULL;
+ int rc;
+
+ rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params,
+ timeout/1000, &output, &error_output);
+ out->message(out, "validate", agent, id, output, error_output, rc);
+ return pcmk_legacy2rc(rc);
+}
+
+#ifdef BUILD_PUBLIC_LIBPACEMAKER
+int
+pcmk_fence_validate(xmlNodePtr *xml, stonith_t *st, const char *agent,
+ const char *id, const stonith_key_value_t *params,
+ unsigned int timeout)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ stonith__register_messages(out);
+
+ rc = pcmk__fence_validate(out, st, agent, id, params, timeout);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+#endif
+
+int
+pcmk__get_fencing_history(stonith_t *st, stonith_history_t **stonith_history,
+ enum pcmk__fence_history fence_history)
+{
+ int rc = pcmk_rc_ok;
+
+ if ((st == NULL) || (st->state == stonith_disconnected)) {
+ rc = ENOTCONN;
+ } else if (fence_history != pcmk__fence_history_none) {
+ rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120);
+
+ rc = pcmk_legacy2rc(rc);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ *stonith_history = stonith__sort_history(*stonith_history);
+ if (fence_history == pcmk__fence_history_reduced) {
+ *stonith_history = reduce_fence_history(*stonith_history);
+ }
+ }
+
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c
new file mode 100644
index 0000000..f2f172e
--- /dev/null
+++ b/lib/pacemaker/pcmk_graph_consumer.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/lrmd_internal.h>
+#include <pacemaker-internal.h>
+
+
+/*
+ * Functions for updating graph
+ */
+
+/*!
+ * \internal
+ * \brief Update synapse after completed prerequisite
+ *
+ * A synapse is ready to be executed once all its prerequisite actions (inputs)
+ * complete. Given a completed action, check whether it is an input for a given
+ * synapse, and if so, mark the input as confirmed, and mark the synapse as
+ * ready if appropriate.
+ *
+ * \param[in,out] synapse Transition graph synapse to update
+ * \param[in] action_id ID of an action that completed
+ *
+ * \note The only substantial effect here is confirming synapse inputs.
+ * should_fire_synapse() will recalculate pcmk__synapse_ready, so the only
+ * thing that uses the pcmk__synapse_ready from here is
+ * synapse_state_str().
+ */
+static void
+update_synapse_ready(pcmk__graph_synapse_t *synapse, int action_id)
+{
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
+ return; // All inputs have already been confirmed
+ }
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise
+ for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
+
+ if (prereq->id == action_id) {
+ crm_trace("Confirming input %d of synapse %d",
+ action_id, synapse->id);
+ pcmk__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
+
+ } else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
+ pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
+ crm_trace("Synapse %d still not ready after action %d",
+ synapse->id, action_id);
+ }
+ }
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
+ crm_trace("Synapse %d is now ready to execute", synapse->id);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update action and synapse confirmation after action completion
+ *
+ * \param[in,out] synapse Transition graph synapse that action belongs to
+ * \param[in] action_id ID of action that completed
+ */
+static void
+update_synapse_confirmed(pcmk__graph_synapse_t *synapse, int action_id)
+{
+ bool all_confirmed = true;
+
+ for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
+
+ if (action->id == action_id) {
+ crm_trace("Confirmed action %d of synapse %d",
+ action_id, synapse->id);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+
+ } else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
+ all_confirmed = false;
+ crm_trace("Synapse %d still not confirmed after action %d",
+ synapse->id, action_id);
+ }
+ }
+
+ if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
+ crm_trace("Confirmed synapse %d", synapse->id);
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update the transition graph with a completed action result
+ *
+ * \param[in,out] graph Transition graph to update
+ * \param[in] action Action that completed
+ */
+void
+pcmk__update_graph(pcmk__graph_t *graph, const pcmk__graph_action_t *action)
+{
+ for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
+
+ if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
+ continue; // This synapse already completed
+
+ } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ update_synapse_confirmed(synapse, action->id);
+
+ } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) {
+ update_synapse_ready(synapse, action->id);
+ }
+ }
+}
+
+
+/*
+ * Functions for executing graph
+ */
+
+/* A transition graph consists of various types of actions. The library caller
+ * registers execution functions for each action type, which will be stored
+ * here.
+ */
+static pcmk__graph_functions_t *graph_fns = NULL;
+
+/*!
+ * \internal
+ * \brief Set transition graph execution functions
+ *
+ * \param[in] Execution functions to use
+ */
+void
+pcmk__set_graph_functions(pcmk__graph_functions_t *fns)
+{
+ crm_debug("Setting custom functions for executing transition graphs");
+ graph_fns = fns;
+
+ CRM_ASSERT(graph_fns != NULL);
+ CRM_ASSERT(graph_fns->rsc != NULL);
+ CRM_ASSERT(graph_fns->cluster != NULL);
+ CRM_ASSERT(graph_fns->pseudo != NULL);
+ CRM_ASSERT(graph_fns->fence != NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a graph synapse is ready to be executed
+ *
+ * \param[in,out] graph Transition graph that synapse is part of
+ * \param[in,out] synapse Synapse to check
+ *
+ * \return true if synapse is ready, false otherwise
+ */
+static bool
+should_fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
+{
+ GList *lpc = NULL;
+
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
+ for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
+
+ if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
+ crm_trace("Input %d for synapse %d not yet confirmed",
+ prereq->id, synapse->id);
+ pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
+ break;
+
+ } else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) {
+ crm_trace("Input %d for synapse %d confirmed but failed",
+ prereq->id, synapse->id);
+ pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
+ break;
+ }
+ }
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
+ crm_trace("Synapse %d is ready to execute", synapse->id);
+ } else {
+ return false;
+ }
+
+ for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *a = (pcmk__graph_action_t *) lpc->data;
+
+ if (a->type == pcmk__pseudo_graph_action) {
+ /* None of the below applies to pseudo ops */
+
+ } else if (synapse->priority < graph->abort_priority) {
+ crm_trace("Skipping synapse %d: priority %d is less than "
+ "abort priority %d",
+ synapse->id, synapse->priority, graph->abort_priority);
+ graph->skipped++;
+ return false;
+
+ } else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) {
+ crm_trace("Deferring synapse %d: not allowed", synapse->id);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Initiate an action from a transition graph
+ *
+ * \param[in,out] graph Transition graph containing action
+ * \param[in,out] action Action to execute
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+initiate_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *id = ID(action->xml);
+
+ CRM_CHECK(id != NULL, return EINVAL);
+ CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed),
+ return pcmk_rc_already);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
+ switch (action->type) {
+ case pcmk__pseudo_graph_action:
+ crm_trace("Executing pseudo-action %d (%s)", action->id, id);
+ return graph_fns->pseudo(graph, action);
+
+ case pcmk__rsc_graph_action:
+ crm_trace("Executing resource action %d (%s)", action->id, id);
+ return graph_fns->rsc(graph, action);
+
+ case pcmk__cluster_graph_action:
+ if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK),
+ CRM_OP_FENCE, pcmk__str_casei)) {
+ crm_trace("Executing fencing action %d (%s)",
+ action->id, id);
+ return graph_fns->fence(graph, action);
+ }
+ crm_trace("Executing cluster action %d (%s)", action->id, id);
+ return graph_fns->cluster(graph, action);
+
+ default:
+ crm_err("Unsupported graph action type <%s " XML_ATTR_ID "='%s'> "
+ "(bug?)",
+ crm_element_name(action->xml), id);
+ return EINVAL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Execute a graph synapse
+ *
+ * \param[in,out] graph Transition graph with synapse to execute
+ * \param[in,out] synapse Synapse to execute
+ *
+ * \return Standard Pacemaker return value
+ */
+static int
+fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
+{
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_executed);
+ for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
+ int rc = initiate_action(graph, action);
+
+ if (rc != pcmk_rc_ok) {
+ crm_err("Failed initiating <%s " XML_ATTR_ID "=%d> in synapse %d: "
+ "%s",
+ crm_element_name(action->xml), action->id, synapse->id,
+ pcmk_rc_str(rc));
+ pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
+ pcmk__set_graph_action_flags(action,
+ pcmk__graph_action_confirmed
+ |pcmk__graph_action_failed);
+ return pcmk_rc_error;
+ }
+ }
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Dummy graph method that can be used with simulations
+ *
+ * \param[in,out] graph Transition graph containing action
+ * \param[in,out] action Graph action to be initiated
+ *
+ * \return Standard Pacemaker return code
+ * \note If the PE_fail environment variable is set to the action ID,
+ * then the graph action will be marked as failed.
+ */
+static int
+pseudo_action_dummy(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ static int fail = -1;
+
+ if (fail < 0) {
+ long long fail_ll;
+
+ if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok)
+ && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) {
+ fail = (int) fail_ll;
+ } else {
+ fail = 0;
+ }
+ }
+
+ if (action->id == fail) {
+ crm_err("Dummy event handler: pretending action %d failed", action->id);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ graph->abort_priority = INFINITY;
+ } else {
+ crm_trace("Dummy event handler: action %d initiated", action->id);
+ }
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__update_graph(graph, action);
+ return pcmk_rc_ok;
+}
+
+static pcmk__graph_functions_t default_fns = {
+ pseudo_action_dummy,
+ pseudo_action_dummy,
+ pseudo_action_dummy,
+ pseudo_action_dummy
+};
+
+/*!
+ * \internal
+ * \brief Execute all actions in a transition graph
+ *
+ * \param[in,out] graph Transition graph to execute
+ *
+ * \return Status of transition after execution
+ */
+enum pcmk__graph_status
+pcmk__execute_graph(pcmk__graph_t *graph)
+{
+ GList *lpc = NULL;
+ int log_level = LOG_DEBUG;
+ enum pcmk__graph_status pass_result = pcmk__graph_active;
+ const char *status = "In progress";
+
+ if (graph_fns == NULL) {
+ graph_fns = &default_fns;
+ }
+ if (graph == NULL) {
+ return pcmk__graph_complete;
+ }
+
+ graph->fired = 0;
+ graph->pending = 0;
+ graph->skipped = 0;
+ graph->completed = 0;
+ graph->incomplete = 0;
+
+ // Count completed and in-flight synapses
+ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
+
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
+ graph->completed++;
+
+ } else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ graph->pending++;
+ }
+ }
+ crm_trace("Executing graph %d (%d synapses already completed, %d pending)",
+ graph->id, graph->completed, graph->pending);
+
+ // Execute any synapses that are ready
+ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
+
+ if ((graph->batch_limit > 0)
+ && (graph->pending >= graph->batch_limit)) {
+
+ crm_debug("Throttling graph execution: batch limit (%d) reached",
+ graph->batch_limit);
+ break;
+
+ } else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
+ graph->skipped++;
+ continue;
+
+ } else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) {
+ continue; // Already handled
+
+ } else if (should_fire_synapse(graph, synapse)) {
+ graph->fired++;
+ if (fire_synapse(graph, synapse) != pcmk_rc_ok) {
+ crm_err("Synapse %d failed to fire", synapse->id);
+ log_level = LOG_ERR;
+ graph->abort_priority = INFINITY;
+ graph->incomplete++;
+ graph->fired--;
+ }
+
+ if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
+ graph->pending++;
+ }
+
+ } else {
+ crm_trace("Synapse %d cannot fire", synapse->id);
+ graph->incomplete++;
+ }
+ }
+
+ if ((graph->pending == 0) && (graph->fired == 0)) {
+ graph->complete = true;
+
+ if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) {
+ log_level = LOG_WARNING;
+ pass_result = pcmk__graph_terminated;
+ status = "Terminated";
+
+ } else if (graph->skipped != 0) {
+ log_level = LOG_NOTICE;
+ pass_result = pcmk__graph_complete;
+ status = "Stopped";
+
+ } else {
+ log_level = LOG_NOTICE;
+ pass_result = pcmk__graph_complete;
+ status = "Complete";
+ }
+
+ } else if (graph->fired == 0) {
+ pass_result = pcmk__graph_pending;
+ }
+
+ do_crm_log(log_level,
+ "Transition %d (Complete=%d, Pending=%d,"
+ " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s",
+ graph->id, graph->completed, graph->pending, graph->fired,
+ graph->skipped, graph->incomplete, graph->source, status);
+
+ return pass_result;
+}
+
+
+/*
+ * Functions for unpacking transition graph XML into structs
+ */
+
+/*!
+ * \internal
+ * \brief Unpack a transition graph action from XML
+ *
+ * \param[in] parent Synapse that action is part of
+ * \param[in] xml_action Action XML to unparse
+ *
+ * \return Newly allocated action on success, or NULL otherwise
+ */
+static pcmk__graph_action_t *
+unpack_action(pcmk__graph_synapse_t *parent, xmlNode *xml_action)
+{
+ enum pcmk__graph_action_type action_type;
+ pcmk__graph_action_t *action = NULL;
+ const char *element = TYPE(xml_action);
+ const char *value = ID(xml_action);
+
+ if (value == NULL) {
+ crm_err("Ignoring transition graph action without id (bug?)");
+ crm_log_xml_trace(xml_action, "invalid");
+ return NULL;
+ }
+
+ if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) {
+ action_type = pcmk__rsc_graph_action;
+
+ } else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT,
+ pcmk__str_casei)) {
+ action_type = pcmk__pseudo_graph_action;
+
+ } else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT,
+ pcmk__str_casei)) {
+ action_type = pcmk__cluster_graph_action;
+
+ } else {
+ crm_err("Ignoring transition graph action of unknown type '%s' (bug?)",
+ element);
+ crm_log_xml_trace(xml_action, "invalid");
+ return NULL;
+ }
+
+ action = calloc(1, sizeof(pcmk__graph_action_t));
+ if (action == NULL) {
+ crm_perror(LOG_CRIT, "Cannot unpack transition graph action");
+ crm_log_xml_trace(xml_action, "lost");
+ return NULL;
+ }
+
+ pcmk__scan_min_int(value, &(action->id), -1);
+ action->type = pcmk__rsc_graph_action;
+ action->xml = copy_xml(xml_action);
+ action->synapse = parent;
+ action->type = action_type;
+ action->params = xml2list(action->xml);
+
+ value = g_hash_table_lookup(action->params, "CRM_meta_timeout");
+ pcmk__scan_min_int(value, &(action->timeout), 0);
+
+ /* Take start-delay into account for the timeout of the action timer */
+ value = g_hash_table_lookup(action->params, "CRM_meta_start_delay");
+ {
+ int start_delay;
+
+ pcmk__scan_min_int(value, &start_delay, 0);
+ action->timeout += start_delay;
+ }
+
+ if (pcmk__guint_from_hash(action->params,
+ CRM_META "_" XML_LRM_ATTR_INTERVAL, 0,
+ &(action->interval_ms)) != pcmk_rc_ok) {
+ action->interval_ms = 0;
+ }
+
+ value = g_hash_table_lookup(action->params, "CRM_meta_can_fail");
+ if (value != NULL) {
+
+ gboolean can_fail = FALSE;
+ crm_str_to_boolean(value, &can_fail);
+ if (can_fail) {
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_can_fail);
+ } else {
+ pcmk__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
+ }
+
+#ifndef PCMK__COMPAT_2_0
+ if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) {
+ crm_warn("Support for the can_fail meta-attribute is deprecated"
+ " and will be removed in a future release");
+ }
+#endif
+ }
+
+ crm_trace("Action %d has timer set to %dms", action->id, action->timeout);
+
+ return action;
+}
+
+/*!
+ * \internal
+ * \brief Unpack transition graph synapse from XML
+ *
+ * \param[in,out] new_graph Transition graph that synapse is part of
+ * \param[in] xml_synapse Synapse XML
+ *
+ * \return Newly allocated synapse on success, or NULL otherwise
+ */
+static pcmk__graph_synapse_t *
+unpack_synapse(pcmk__graph_t *new_graph, const xmlNode *xml_synapse)
+{
+ const char *value = NULL;
+ xmlNode *action_set = NULL;
+ pcmk__graph_synapse_t *new_synapse = NULL;
+
+ crm_trace("Unpacking synapse %s", ID(xml_synapse));
+
+ new_synapse = calloc(1, sizeof(pcmk__graph_synapse_t));
+ if (new_synapse == NULL) {
+ return NULL;
+ }
+
+ pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0);
+
+ value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY);
+ pcmk__scan_min_int(value, &(new_synapse->priority), 0);
+
+ CRM_CHECK(new_synapse->id >= 0, free(new_synapse);
+ return NULL);
+
+ new_graph->num_synapses++;
+
+ crm_trace("Unpacking synapse %s action sets",
+ crm_element_value(xml_synapse, XML_ATTR_ID));
+
+ for (action_set = first_named_child(xml_synapse, "action_set");
+ action_set != NULL; action_set = crm_next_same_xml(action_set)) {
+
+ for (xmlNode *action = pcmk__xml_first_child(action_set);
+ action != NULL; action = pcmk__xml_next(action)) {
+
+ pcmk__graph_action_t *new_action = unpack_action(new_synapse,
+ action);
+
+ if (new_action == NULL) {
+ continue;
+ }
+
+ crm_trace("Adding action %d to synapse %d",
+ new_action->id, new_synapse->id);
+ new_graph->num_actions++;
+ new_synapse->actions = g_list_append(new_synapse->actions,
+ new_action);
+ }
+ }
+
+ crm_trace("Unpacking synapse %s inputs", ID(xml_synapse));
+
+ for (xmlNode *inputs = first_named_child(xml_synapse, "inputs");
+ inputs != NULL; inputs = crm_next_same_xml(inputs)) {
+
+ for (xmlNode *trigger = first_named_child(inputs, "trigger");
+ trigger != NULL; trigger = crm_next_same_xml(trigger)) {
+
+ for (xmlNode *input = pcmk__xml_first_child(trigger);
+ input != NULL; input = pcmk__xml_next(input)) {
+
+ pcmk__graph_action_t *new_input = unpack_action(new_synapse,
+ input);
+
+ if (new_input == NULL) {
+ continue;
+ }
+
+ crm_trace("Adding input %d to synapse %d",
+ new_input->id, new_synapse->id);
+
+ new_synapse->inputs = g_list_append(new_synapse->inputs,
+ new_input);
+ }
+ }
+ }
+
+ return new_synapse;
+}
+
+/*!
+ * \internal
+ * \brief Unpack transition graph XML
+ *
+ * \param[in] xml_graph Transition graph XML to unpack
+ * \param[in] reference Where the XML came from (for logging)
+ *
+ * \return Newly allocated transition graph on success, NULL otherwise
+ * \note The caller is responsible for freeing the return value using
+ * pcmk__free_graph().
+ * \note The XML is expected to be structured like:
+ <transition_graph ...>
+ <synapse id="0">
+ <action_set>
+ <rsc_op id="2" ...>
+ ...
+ </action_set>
+ <inputs>
+ <rsc_op id="1" ...
+ ...
+ </inputs>
+ </synapse>
+ ...
+ </transition_graph>
+ */
+pcmk__graph_t *
+pcmk__unpack_graph(const xmlNode *xml_graph, const char *reference)
+{
+ pcmk__graph_t *new_graph = NULL;
+
+ new_graph = calloc(1, sizeof(pcmk__graph_t));
+ if (new_graph == NULL) {
+ return NULL;
+ }
+
+ new_graph->source = strdup((reference == NULL)? "unknown" : reference);
+ if (new_graph->source == NULL) {
+ free(new_graph);
+ return NULL;
+ }
+
+ new_graph->id = -1;
+ new_graph->abort_priority = 0;
+ new_graph->network_delay = 0;
+ new_graph->stonith_timeout = 0;
+ new_graph->completion_action = pcmk__graph_done;
+
+ // Parse top-level attributes from <transition_graph>
+ if (xml_graph != NULL) {
+ const char *buf = crm_element_value(xml_graph, "transition_id");
+
+ CRM_CHECK(buf != NULL, free(new_graph);
+ return NULL);
+ pcmk__scan_min_int(buf, &(new_graph->id), -1);
+
+ buf = crm_element_value(xml_graph, "cluster-delay");
+ CRM_CHECK(buf != NULL, free(new_graph);
+ return NULL);
+ new_graph->network_delay = crm_parse_interval_spec(buf);
+
+ buf = crm_element_value(xml_graph, "stonith-timeout");
+ if (buf == NULL) {
+ new_graph->stonith_timeout = new_graph->network_delay;
+ } else {
+ new_graph->stonith_timeout = crm_parse_interval_spec(buf);
+ }
+
+ // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum
+ buf = crm_element_value(xml_graph, "batch-limit");
+ if ((buf == NULL)
+ || (pcmk__scan_min_int(buf, &(new_graph->batch_limit),
+ -1) != pcmk_rc_ok)) {
+ new_graph->batch_limit = 0;
+ }
+
+ buf = crm_element_value(xml_graph, "migration-limit");
+ pcmk__scan_min_int(buf, &(new_graph->migration_limit), -1);
+
+ pcmk__str_update(&(new_graph->failed_stop_offset),
+ crm_element_value(xml_graph, "failed-stop-offset"));
+ pcmk__str_update(&(new_graph->failed_start_offset),
+ crm_element_value(xml_graph, "failed-start-offset"));
+
+ if (crm_element_value_epoch(xml_graph, "recheck-by",
+ &(new_graph->recheck_by)) != pcmk_ok) {
+ new_graph->recheck_by = 0;
+ }
+ }
+
+ // Unpack each child <synapse> element
+ for (const xmlNode *synapse_xml = first_named_child(xml_graph, "synapse");
+ synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) {
+
+ pcmk__graph_synapse_t *new_synapse = unpack_synapse(new_graph,
+ synapse_xml);
+
+ if (new_synapse != NULL) {
+ new_graph->synapses = g_list_append(new_graph->synapses,
+ new_synapse);
+ }
+ }
+
+ crm_debug("Unpacked transition %d from %s: %d actions in %d synapses",
+ new_graph->id, new_graph->source, new_graph->num_actions,
+ new_graph->num_synapses);
+
+ return new_graph;
+}
+
+
+/*
+ * Functions for freeing transition graph objects
+ */
+
+/*!
+ * \internal
+ * \brief Free a transition graph action object
+ *
+ * \param[in,out] user_data Action to free
+ */
+static void
+free_graph_action(gpointer user_data)
+{
+ pcmk__graph_action_t *action = user_data;
+
+ if (action->timer != 0) {
+ crm_warn("Cancelling timer for graph action %d", action->id);
+ g_source_remove(action->timer);
+ }
+ if (action->params != NULL) {
+ g_hash_table_destroy(action->params);
+ }
+ free_xml(action->xml);
+ free(action);
+}
+
+/*!
+ * \internal
+ * \brief Free a transition graph synapse object
+ *
+ * \param[in,out] user_data Synapse to free
+ */
+static void
+free_graph_synapse(gpointer user_data)
+{
+ pcmk__graph_synapse_t *synapse = user_data;
+
+ g_list_free_full(synapse->actions, free_graph_action);
+ g_list_free_full(synapse->inputs, free_graph_action);
+ free(synapse);
+}
+
+/*!
+ * \internal
+ * \brief Free a transition graph object
+ *
+ * \param[in,out] graph Transition graph to free
+ */
+void
+pcmk__free_graph(pcmk__graph_t *graph)
+{
+ if (graph != NULL) {
+ g_list_free_full(graph->synapses, free_graph_synapse);
+ free(graph->source);
+ free(graph->failed_stop_offset);
+ free(graph->failed_start_offset);
+ free(graph);
+ }
+}
+
+
+/*
+ * Other transition graph utilities
+ */
+
+/*!
+ * \internal
+ * \brief Synthesize an executor event from a graph action
+ *
+ * \param[in] resource If not NULL, use greater call ID than in this XML
+ * \param[in] action Graph action
+ * \param[in] status What to use as event execution status
+ * \param[in] rc What to use as event exit status
+ * \param[in] exit_reason What to use as event exit reason
+ *
+ * \return Newly allocated executor event on success, or NULL otherwise
+ */
+lrmd_event_data_t *
+pcmk__event_from_graph_action(const xmlNode *resource,
+ const pcmk__graph_action_t *action,
+ int status, int rc, const char *exit_reason)
+{
+ lrmd_event_data_t *op = NULL;
+ GHashTableIter iter;
+ const char *name = NULL;
+ const char *value = NULL;
+ xmlNode *action_resource = NULL;
+
+ CRM_CHECK(action != NULL, return NULL);
+ CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL);
+
+ action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
+ CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid");
+ return NULL);
+
+ op = lrmd_new_event(ID(action_resource),
+ crm_element_value(action->xml, XML_LRM_ATTR_TASK),
+ action->interval_ms);
+ lrmd__set_result(op, rc, status, exit_reason);
+ op->t_run = time(NULL);
+ op->t_rcchange = op->t_run;
+ op->params = pcmk__strkey_table(free, free);
+
+ g_hash_table_iter_init(&iter, action->params);
+ while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) {
+ g_hash_table_insert(op->params, strdup(name), strdup(value));
+ }
+
+ for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL;
+ xop = pcmk__xml_next(xop)) {
+ int tmp = 0;
+
+ crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
+ crm_debug("Got call_id=%d for %s", tmp, ID(resource));
+ if (tmp > op->call_id) {
+ op->call_id = tmp;
+ }
+ }
+
+ op->call_id++;
+ return op;
+}
diff --git a/lib/pacemaker/pcmk_graph_logging.c b/lib/pacemaker/pcmk_graph_logging.c
new file mode 100644
index 0000000..b922a3e
--- /dev/null
+++ b/lib/pacemaker/pcmk_graph_logging.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <pacemaker-internal.h>
+
+/*!
+ * \internal
+ * \brief Return text equivalent of an enum pcmk__graph_status for logging
+ *
+ * \param[in] state Transition status
+ *
+ * \return Human-readable text equivalent of \p state
+ */
+const char *
+pcmk__graph_status2text(enum pcmk__graph_status state)
+{
+ switch (state) {
+ case pcmk__graph_active:
+ return "active";
+ case pcmk__graph_pending:
+ return "pending";
+ case pcmk__graph_complete:
+ return "complete";
+ case pcmk__graph_terminated:
+ return "terminated";
+ }
+ return "unknown";
+}
+
+static const char *
+actiontype2text(enum pcmk__graph_action_type type)
+{
+ switch (type) {
+ case pcmk__pseudo_graph_action:
+ return "pseudo";
+ case pcmk__rsc_graph_action:
+ return "resource";
+ case pcmk__cluster_graph_action:
+ return "cluster";
+ }
+ return "invalid";
+}
+
+/*!
+ * \internal
+ * \brief Find a transition graph action by ID
+ *
+ * \param[in] graph Transition graph to search
+ * \param[in] id Action ID to search for
+ *
+ * \return Transition graph action corresponding to \p id, or NULL if none
+ */
+static const pcmk__graph_action_t *
+find_graph_action_by_id(const pcmk__graph_t *graph, int id)
+{
+ if (graph == NULL) {
+ return NULL;
+ }
+
+ for (const GList *sIter = graph->synapses; sIter != NULL;
+ sIter = sIter->next) {
+
+ const pcmk__graph_synapse_t *synapse = NULL;
+
+ synapse = (const pcmk__graph_synapse_t *) sIter->data;
+ for (const GList *aIter = synapse->actions; aIter != NULL;
+ aIter = aIter->next) {
+
+ const pcmk__graph_action_t *action = NULL;
+
+ action = (const pcmk__graph_action_t *) aIter->data;
+ if (action->id == id) {
+ return action;
+ }
+ }
+ }
+ return NULL;
+}
+
+static const char *
+synapse_state_str(pcmk__graph_synapse_t *synapse)
+{
+ if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
+ return "Failed";
+
+ } else if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
+ return "Completed";
+
+ } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ return "In-flight";
+
+ } else if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
+ return "Ready";
+ }
+ return "Pending";
+}
+
+/*!
+ * \internal
+ * \brief List the action IDs of pending inputs to a transition graph synapse
+ *
+ * \param[in] graph Transition graph to which \p synapse belongs
+ * \param[in] synapse Synapse whose inputs to check
+ *
+ * \return A \p GString containing the space-delimited action IDs of inputs to
+ * \p synapse that haven't completed successfully
+ *
+ * \note The caller is responsible for freeing the return value using
+ * \p g_string_free().
+ */
+static GString *
+synapse_pending_inputs(const pcmk__graph_t *graph,
+ const pcmk__graph_synapse_t *synapse)
+{
+ GString *pending = NULL;
+
+ for (const GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
+ const pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data;
+
+ if (pcmk_is_set(input->flags, pcmk__graph_action_failed)) {
+ pcmk__add_word(&pending, 1024, ID(input->xml));
+
+ } else if (pcmk_is_set(input->flags, pcmk__graph_action_confirmed)) {
+ // Confirmed successful inputs are not pending
+
+ } else if (find_graph_action_by_id(graph, input->id) != NULL) {
+ // In-flight or pending
+ pcmk__add_word(&pending, 1024, ID(input->xml));
+ }
+ }
+ return pending;
+}
+
+// Log synapse inputs that aren't in graph
+static void
+log_unresolved_inputs(unsigned int log_level, pcmk__graph_t *graph,
+ pcmk__graph_synapse_t *synapse)
+{
+ for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
+ pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data;
+ const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY);
+ const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
+
+ if (find_graph_action_by_id(graph, input->id) == NULL) {
+ do_crm_log(log_level,
+ " * [Input %2d]: Unresolved dependency %s op %s%s%s",
+ input->id, actiontype2text(input->type), key,
+ (host? " on " : ""), (host? host : ""));
+ }
+ }
+}
+
+static void
+log_synapse_action(unsigned int log_level, pcmk__graph_synapse_t *synapse,
+ pcmk__graph_action_t *action, const char *pending_inputs)
+{
+ const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+ const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ char *desc = crm_strdup_printf("%s %s op %s",
+ synapse_state_str(synapse),
+ actiontype2text(action->type), key);
+
+ do_crm_log(log_level,
+ "[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)",
+ action->id, desc, (host? " on " : ""), (host? host : ""),
+ synapse->priority, pending_inputs);
+ free(desc);
+}
+
+static void
+log_synapse(unsigned int log_level, pcmk__graph_t *graph,
+ pcmk__graph_synapse_t *synapse)
+{
+ GString *g_pending = NULL;
+ const char *pending = "none";
+
+ if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ g_pending = synapse_pending_inputs(graph, synapse);
+
+ if (g_pending != NULL) {
+ pending = (const char *) g_pending->str;
+ }
+ }
+
+ for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
+ log_synapse_action(log_level, synapse,
+ (pcmk__graph_action_t *) lpc->data, pending);
+ }
+
+ if (g_pending != NULL) {
+ g_string_free(g_pending, TRUE);
+ }
+
+ if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
+ log_unresolved_inputs(log_level, graph, synapse);
+ }
+}
+
+void
+pcmk__log_graph_action(int log_level, pcmk__graph_action_t *action)
+{
+ log_synapse(log_level, NULL, action->synapse);
+}
+
+void
+pcmk__log_graph(unsigned int log_level, pcmk__graph_t *graph)
+{
+ if ((graph == NULL) || (graph->num_actions == 0)) {
+ if (log_level == LOG_TRACE) {
+ crm_debug("Empty transition graph");
+ }
+ return;
+ }
+
+ do_crm_log(log_level, "Graph %d with %d actions:"
+ " batch-limit=%d jobs, network-delay=%ums",
+ graph->id, graph->num_actions,
+ graph->batch_limit, graph->network_delay);
+
+ for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
+ log_synapse(log_level, graph, (pcmk__graph_synapse_t *) lpc->data);
+ }
+}
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
new file mode 100644
index 0000000..5484e8b
--- /dev/null
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -0,0 +1,1078 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+#include <glib.h>
+
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+// Convenience macros for logging action properties
+
+#define action_type_str(flags) \
+ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
+
+#define action_optional_str(flags) \
+ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
+
+#define action_runnable_str(flags) \
+ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
+
+#define action_node_str(a) \
+ (((a)->node == NULL)? "no node" : (a)->node->details->uname)
+
+/*!
+ * \internal
+ * \brief Add an XML node tag for a specified ID
+ *
+ * \param[in] id Node UUID to add
+ * \param[in,out] xml Parent XML tag to add to
+ */
+static xmlNode*
+add_node_to_xml_by_id(const char *id, xmlNode *xml)
+{
+ xmlNode *node_xml;
+
+ node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
+ crm_xml_add(node_xml, XML_ATTR_ID, id);
+
+ return node_xml;
+}
+
+/*!
+ * \internal
+ * \brief Add an XML node tag for a specified node
+ *
+ * \param[in] node Node to add
+ * \param[in,out] xml XML to add node to
+ */
+static void
+add_node_to_xml(const pe_node_t *node, void *xml)
+{
+ add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
+}
+
+/*!
+ * \internal
+ * \brief Add XML with nodes that need an update of their maintenance state
+ *
+ * \param[in,out] xml Parent XML tag to add to
+ * \param[in] data_set Working set for cluster
+ */
+static int
+add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
+{
+ GList *gIter = NULL;
+ xmlNode *maintenance =
+ xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL;
+ int count = 0;
+
+ for (gIter = data_set->nodes; gIter != NULL;
+ gIter = gIter->next) {
+ pe_node_t *node = (pe_node_t *) gIter->data;
+ struct pe_node_shared_s *details = node->details;
+
+ if (!pe__is_guest_or_remote_node(node)) {
+ continue; /* just remote nodes need to know atm */
+ }
+
+ if (details->maintenance != details->remote_maintenance) {
+ if (maintenance) {
+ crm_xml_add(
+ add_node_to_xml_by_id(node->details->id, maintenance),
+ XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0");
+ }
+ count++;
+ }
+ }
+ crm_trace("%s %d nodes to adjust maintenance-mode "
+ "to transition", maintenance?"Added":"Counted", count);
+ return count;
+}
+
+/*!
+ * \internal
+ * \brief Add pseudo action with nodes needing maintenance state update
+ *
+ * \param[in,out] data_set Working set for cluster
+ */
+static void
+add_maintenance_update(pe_working_set_t *data_set)
+{
+ pe_action_t *action = NULL;
+
+ if (add_maintenance_nodes(NULL, data_set)) {
+ crm_trace("adding maintenance state update pseudo action");
+ action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
+ pe__set_action_flags(action, pe_action_print_always);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add XML with nodes that an action is expected to bring down
+ *
+ * If a specified action is expected to bring any nodes down, add an XML block
+ * with their UUIDs. When a node is lost, this allows the controller to
+ * determine whether it was expected.
+ *
+ * \param[in,out] xml Parent XML tag to add to
+ * \param[in] action Action to check for downed nodes
+ * \param[in] data_set Working set for cluster
+ */
+static void
+add_downed_nodes(xmlNode *xml, const pe_action_t *action,
+ const pe_working_set_t *data_set)
+{
+ CRM_CHECK(xml && action && action->node && data_set, return);
+
+ if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
+
+ /* Shutdown makes the action's node down */
+ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
+ add_node_to_xml_by_id(action->node->details->id, downed);
+
+ } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
+
+ /* Fencing makes the action's node and any hosted guest nodes down */
+ const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
+
+ if (pcmk__is_fencing_action(fence)) {
+ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
+ add_node_to_xml_by_id(action->node->details->id, downed);
+ pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed);
+ }
+
+ } else if (action->rsc && action->rsc->is_remote_node
+ && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
+
+ /* Stopping a remote connection resource makes connected node down,
+ * unless it's part of a migration
+ */
+ GList *iter;
+ pe_action_t *input;
+ gboolean migrating = FALSE;
+
+ for (iter = action->actions_before; iter != NULL; iter = iter->next) {
+ input = ((pe_action_wrapper_t *) iter->data)->action;
+ if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei)
+ && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
+ migrating = TRUE;
+ break;
+ }
+ }
+ if (!migrating) {
+ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
+ add_node_to_xml_by_id(action->rsc->id, downed);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a transition graph operation key for a clone action
+ *
+ * \param[in] action Clone action
+ * \param[in] interval_ms Action interval in milliseconds
+ *
+ * \return Newly allocated string with transition graph operation key
+ */
+static char *
+clone_op_key(const pe_action_t *action, guint interval_ms)
+{
+ if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) {
+ const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
+ const char *n_task = g_hash_table_lookup(action->meta,
+ "notify_operation");
+
+ CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL));
+ return pcmk__notify_key(action->rsc->clone_name, n_type, n_task);
+
+ } else if (action->cancel_task != NULL) {
+ return pcmk__op_key(action->rsc->clone_name, action->cancel_task,
+ interval_ms);
+ } else {
+ return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add node details to transition graph action XML
+ *
+ * \param[in] action Scheduled action
+ * \param[in,out] xml Transition graph action XML for \p action
+ */
+static void
+add_node_details(const pe_action_t *action, xmlNode *xml)
+{
+ pe_node_t *router_node = pcmk__connection_host_for_action(action);
+
+ crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname);
+ crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id);
+ if (router_node != NULL) {
+ crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add resource details to transition graph action XML
+ *
+ * \param[in] action Scheduled action
+ * \param[in,out] action_xml Transition graph action XML for \p action
+ */
+static void
+add_resource_details(const pe_action_t *action, xmlNode *action_xml)
+{
+ xmlNode *rsc_xml = NULL;
+ const char *attr_list[] = {
+ XML_AGENT_ATTR_CLASS,
+ XML_AGENT_ATTR_PROVIDER,
+ XML_ATTR_TYPE
+ };
+
+ /* If a resource is locked to a node via shutdown-lock, mark its actions
+ * so the controller can preserve the lock when the action completes.
+ */
+ if (pcmk__action_locks_rsc_to_node(action)) {
+ crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ (long long) action->rsc->lock_time);
+ }
+
+ // List affected resource
+
+ rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml));
+ if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan)
+ && (action->rsc->clone_name != NULL)) {
+ /* Use the numbered instance name here, because if there is more
+ * than one instance on a node, we need to make sure the command
+ * goes to the right one.
+ *
+ * This is important even for anonymous clones, because the clone's
+ * unique meta-attribute might have just been toggled from on to
+ * off.
+ */
+ crm_debug("Using orphan clone name %s instead of %s",
+ action->rsc->id, action->rsc->clone_name);
+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
+
+ } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) {
+ const char *xml_id = ID(action->rsc->xml);
+
+ crm_debug("Using anonymous clone name %s for %s (aka %s)",
+ xml_id, action->rsc->id, action->rsc->clone_name);
+
+ /* ID is what we'd like client to use
+ * ID_LONG is what they might know it as instead
+ *
+ * ID_LONG is only strictly needed /here/ during the
+ * transition period until all nodes in the cluster
+ * are running the new software /and/ have rebooted
+ * once (meaning that they've only ever spoken to a DC
+ * supporting this feature).
+ *
+ * If anyone toggles the unique flag to 'on', the
+ * 'instance free' name will correspond to an orphan
+ * and fall into the clause above instead
+ */
+ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
+ if ((action->rsc->clone_name != NULL)
+ && !pcmk__str_eq(xml_id, action->rsc->clone_name,
+ pcmk__str_none)) {
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
+ } else {
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
+ }
+
+ } else {
+ CRM_ASSERT(action->rsc->clone_name == NULL);
+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
+ }
+
+ for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
+ crm_xml_add(rsc_xml, attr_list[lpc],
+ g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add action attributes to transition graph action XML
+ *
+ * \param[in,out] action Scheduled action
+ * \param[in,out] action_xml Transition graph action XML for \p action
+ */
+static void
+add_action_attributes(pe_action_t *action, xmlNode *action_xml)
+{
+ xmlNode *args_xml = NULL;
+
+ /* We create free-standing XML to start, so we can sort the attributes
+ * before adding it to action_xml, which keeps the scheduler regression
+ * test graphs comparable.
+ */
+ args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
+
+ crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
+ g_hash_table_foreach(action->extra, hash2field, args_xml);
+
+ if ((action->rsc != NULL) && (action->node != NULL)) {
+ // Get the resource instance attributes, evaluated properly for node
+ GHashTable *params = pe_rsc_params(action->rsc, action->node,
+ action->rsc->cluster);
+
+ pcmk__substitute_remote_addr(action->rsc, params);
+
+ g_hash_table_foreach(params, hash2smartfield, args_xml);
+
+ } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) {
+ GHashTable *params = pe_rsc_params(action->rsc, NULL,
+ action->rsc->cluster);
+
+ g_hash_table_foreach(params, hash2smartfield, args_xml);
+ }
+
+ g_hash_table_foreach(action->meta, hash2metafield, args_xml);
+ if (action->rsc != NULL) {
+ pe_resource_t *parent = action->rsc;
+
+ while (parent != NULL) {
+ parent->cmds->add_graph_meta(parent, args_xml);
+ parent = parent->parent;
+ }
+
+ pcmk__add_bundle_meta_to_xml(args_xml, action);
+
+ } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)
+ && (action->node != NULL)) {
+ /* Pass the node's attributes as meta-attributes.
+ *
+ * @TODO: Determine whether it is still necessary to do this. It was
+ * added in 33d99707, probably for the libfence-based implementation in
+ * c9a90bd, which is no longer used.
+ */
+ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml);
+ }
+
+ sorted_xml(args_xml, action_xml, FALSE);
+ free_xml(args_xml);
+}
+
+/*!
+ * \internal
+ * \brief Create the transition graph XML for a scheduled action
+ *
+ * \param[in,out] parent Parent XML element to add action to
+ * \param[in,out] action Scheduled action
+ * \param[in] skip_details If false, add action details as sub-elements
+ * \param[in] data_set Cluster working set
+ */
+static void
+create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
+ const pe_working_set_t *data_set)
+{
+ bool needs_node_info = true;
+ bool needs_maintenance_info = false;
+ xmlNode *action_xml = NULL;
+
+ if ((action == NULL) || (data_set == NULL)) {
+ return;
+ }
+
+ // Create the top-level element based on task
+
+ if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
+ /* All fences need node info; guest node fences are pseudo-events */
+ action_xml = create_xml_node(parent,
+ pcmk_is_set(action->flags, pe_action_pseudo)?
+ XML_GRAPH_TAG_PSEUDO_EVENT :
+ XML_GRAPH_TAG_CRM_EVENT);
+
+ } else if (pcmk__str_any_of(action->task,
+ CRM_OP_SHUTDOWN,
+ CRM_OP_CLEAR_FAILCOUNT, NULL)) {
+ action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
+
+ } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
+ // CIB-only clean-up for shutdown locks
+ action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
+ crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB);
+
+ } else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
+ if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES,
+ pcmk__str_none)) {
+ needs_maintenance_info = true;
+ }
+ action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
+ needs_node_info = false;
+
+ } else {
+ action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP);
+ }
+
+ crm_xml_add_int(action_xml, XML_ATTR_ID, action->id);
+ crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task);
+
+ if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) {
+ char *clone_key = NULL;
+ guint interval_ms;
+
+ if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0,
+ &interval_ms) != pcmk_rc_ok) {
+ interval_ms = 0;
+ }
+ clone_key = clone_op_key(action, interval_ms);
+ crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key);
+ crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid);
+ free(clone_key);
+ } else {
+ crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid);
+ }
+
+ if (needs_node_info && (action->node != NULL)) {
+ add_node_details(action, action_xml);
+ g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET),
+ strdup(action->node->details->uname));
+ g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID),
+ strdup(action->node->details->id));
+ }
+
+ if (skip_details) {
+ return;
+ }
+
+ if ((action->rsc != NULL)
+ && !pcmk_is_set(action->flags, pe_action_pseudo)) {
+
+ // This is a real resource action, so add resource details
+ add_resource_details(action, action_xml);
+ }
+
+ /* List any attributes in effect */
+ add_action_attributes(action, action_xml);
+
+ /* List any nodes this action is expected to make down */
+ if (needs_node_info && (action->node != NULL)) {
+ add_downed_nodes(action_xml, action, data_set);
+ }
+
+ if (needs_maintenance_info) {
+ add_maintenance_nodes(action_xml, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action should be added to the transition graph
+ *
+ * \param[in] action Action to check
+ *
+ * \return true if action should be added to graph, otherwise false
+ */
+static bool
+should_add_action_to_graph(const pe_action_t *action)
+{
+ if (!pcmk_is_set(action->flags, pe_action_runnable)) {
+ crm_trace("Ignoring action %s (%d): unrunnable",
+ action->uuid, action->id);
+ return false;
+ }
+
+ if (pcmk_is_set(action->flags, pe_action_optional)
+ && !pcmk_is_set(action->flags, pe_action_print_always)) {
+ crm_trace("Ignoring action %s (%d): optional",
+ action->uuid, action->id);
+ return false;
+ }
+
+ /* Actions for unmanaged resources should be excluded from the graph,
+ * with the exception of monitors and cancellation of recurring monitors.
+ */
+ if ((action->rsc != NULL)
+ && !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
+ && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) {
+ const char *interval_ms_s;
+
+ /* A cancellation of a recurring monitor will get here because the task
+ * is cancel rather than monitor, but the interval can still be used to
+ * recognize it. The interval has been normalized to milliseconds by
+ * this point, so a string comparison is sufficient.
+ */
+ interval_ms_s = g_hash_table_lookup(action->meta,
+ XML_LRM_ATTR_INTERVAL_MS);
+ if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
+ crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
+ action->uuid, action->id, action->rsc->id);
+ return false;
+ }
+ }
+
+ /* Always add pseudo-actions, fence actions, and shutdown actions (already
+ * determined to be required and runnable by this point)
+ */
+ if (pcmk_is_set(action->flags, pe_action_pseudo)
+ || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN,
+ NULL)) {
+ return true;
+ }
+
+ if (action->node == NULL) {
+ pe_err("Skipping action %s (%d) "
+ "because it was not allocated to a node (bug?)",
+ action->uuid, action->id);
+ pcmk__log_action("Unallocated", action, false);
+ return false;
+ }
+
+ if (pcmk_is_set(action->flags, pe_action_dc)) {
+ crm_trace("Action %s (%d) should be dumped: "
+ "can run on DC instead of %s",
+ action->uuid, action->id, pe__node_name(action->node));
+
+ } else if (pe__is_guest_node(action->node)
+ && !action->node->details->remote_requires_reset) {
+ crm_trace("Action %s (%d) should be dumped: "
+ "assuming will be runnable on guest %s",
+ action->uuid, action->id, pe__node_name(action->node));
+
+ } else if (!action->node->details->online) {
+ pe_err("Skipping action %s (%d) "
+ "because it was scheduled for offline node (bug?)",
+ action->uuid, action->id);
+ pcmk__log_action("Offline node", action, false);
+ return false;
+
+ } else if (action->node->details->unclean) {
+ pe_err("Skipping action %s (%d) "
+ "because it was scheduled for unclean node (bug?)",
+ action->uuid, action->id);
+ pcmk__log_action("Unclean node", action, false);
+ return false;
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an ordering's flags can change an action
+ *
+ * \param[in] ordering Ordering to check
+ *
+ * \return true if ordering has flags that can change an action, false otherwise
+ */
+static bool
+ordering_can_change_actions(const pe_action_wrapper_t *ordering)
+{
+ return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed
+ |pe_order_implies_then_printed
+ |pe_order_optional));
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action input should be in the transition graph
+ *
+ * \param[in] action Action to check
+ * \param[in,out] input Action input to check
+ *
+ * \return true if input should be in graph, false otherwise
+ * \note This function may not only check an input, but disable it under certian
+ * circumstances (load or anti-colocation orderings that are not needed).
+ */
+static bool
+should_add_input_to_graph(const pe_action_t *action, pe_action_wrapper_t *input)
+{
+ if (input->state == pe_link_dumped) {
+ return true;
+ }
+
+ if (input->type == pe_order_none) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "ordering disabled",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
+ && !ordering_can_change_actions(input)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "optional and input unrunnable",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
+ && pcmk_is_set(input->type, pe_order_one_or_more)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "one-or-more and input unrunnable",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable)
+ && !pcmk_is_set(input->action->flags, pe_action_runnable)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "implies input migratable but input unrunnable",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable)
+ && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "only if input unmigratable but input unrunnable",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if ((input->type == pe_order_optional)
+ && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)
+ && pcmk__ends_with(input->action->uuid, "_stop_0")) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "optional but stop in migration",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+
+ } else if (input->type == pe_order_load) {
+ pe_node_t *input_node = input->action->node;
+
+ // load orderings are relevant only if actions are for same node
+
+ if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) {
+ pe_node_t *allocated = action->rsc->allocated_to;
+
+ /* For load_stopped -> migrate_to orderings, we care about where it
+ * has been allocated to, not where it will be executed.
+ */
+ if ((input_node == NULL) || (allocated == NULL)
+ || (input_node->details != allocated->details)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "load ordering node mismatch %s vs %s",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id,
+ (allocated? allocated->details->uname : "<none>"),
+ (input_node? input_node->details->uname : "<none>"));
+ input->type = pe_order_none;
+ return false;
+ }
+
+ } else if ((input_node == NULL) || (action->node == NULL)
+ || (input_node->details != action->node->details)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "load ordering node mismatch %s vs %s",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id,
+ (action->node? action->node->details->uname : "<none>"),
+ (input_node? input_node->details->uname : "<none>"));
+ input->type = pe_order_none;
+ return false;
+
+ } else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "load ordering input optional",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ input->type = pe_order_none;
+ return false;
+ }
+
+ } else if (input->type == pe_order_anti_colocation) {
+ if (input->action->node && action->node
+ && (input->action->node->details != action->node->details)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "anti-colocation node mismatch %s vs %s",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id,
+ pe__node_name(action->node),
+ pe__node_name(input->action->node));
+ input->type = pe_order_none;
+ return false;
+
+ } else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "anti-colocation input optional",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ input->type = pe_order_none;
+ return false;
+ }
+
+ } else if (input->action->rsc
+ && input->action->rsc != action->rsc
+ && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed)
+ && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed)
+ && pcmk__ends_with(input->action->uuid, "_stop_0")
+ && action->rsc && pe_rsc_is_clone(action->rsc)) {
+ crm_warn("Ignoring requirement that %s complete before %s:"
+ " unmanaged failed resources cannot prevent clone shutdown",
+ input->action->uuid, action->uuid);
+ return false;
+
+ } else if (pcmk_is_set(input->action->flags, pe_action_optional)
+ && !pcmk_any_flags_set(input->action->flags,
+ pe_action_print_always|pe_action_dumped)
+ && !should_add_action_to_graph(input->action)) {
+ crm_trace("Ignoring %s (%d) input %s (%d): "
+ "input optional",
+ action->uuid, action->id,
+ input->action->uuid, input->action->id);
+ return false;
+ }
+
+ crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
+ action->uuid, action->id, action_type_str(input->action->flags),
+ input->action->uuid, input->action->id,
+ action_node_str(input->action),
+ action_runnable_str(input->action->flags),
+ action_optional_str(input->action->flags), input->type);
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an ordering creates an ordering loop
+ *
+ * \param[in] init_action "First" action in ordering
+ * \param[in] action Callers should always set this the same as
+ * \p init_action (this function may use a different
+ * value for recursive calls)
+ * \param[in,out] input Action wrapper for "then" action in ordering
+ *
+ * \return true if the ordering creates a loop, otherwise false
+ */
+bool
+pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action,
+ pe_action_wrapper_t *input)
+{
+ bool has_loop = false;
+
+ if (pcmk_is_set(input->action->flags, pe_action_tracking)) {
+ crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
+ input->action->uuid,
+ input->action->node? input->action->node->details->uname : "",
+ action->uuid,
+ action->node? action->node->details->uname : "",
+ input->type);
+ return false;
+ }
+
+ // Don't need to check inputs that won't be used
+ if (!should_add_input_to_graph(action, input)) {
+ return false;
+ }
+
+ if (input->action == init_action) {
+ crm_debug("Input loop found in %s@%s ->...-> %s@%s",
+ action->uuid,
+ action->node? action->node->details->uname : "",
+ init_action->uuid,
+ init_action->node? init_action->node->details->uname : "");
+ return true;
+ }
+
+ pe__set_action_flags(input->action, pe_action_tracking);
+
+ crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
+ "for graph loop with %s@%s ",
+ action->uuid,
+ action->node? action->node->details->uname : "",
+ input->action->uuid,
+ input->action->node? input->action->node->details->uname : "",
+ input->type,
+ init_action->uuid,
+ init_action->node? init_action->node->details->uname : "");
+
+ // Recursively check input itself for loops
+ for (GList *iter = input->action->actions_before;
+ iter != NULL; iter = iter->next) {
+
+ if (pcmk__graph_has_loop(init_action, input->action,
+ (pe_action_wrapper_t *) iter->data)) {
+ // Recursive call already logged a debug message
+ has_loop = true;
+ break;
+ }
+ }
+
+ pe__clear_action_flags(input->action, pe_action_tracking);
+
+ if (!has_loop) {
+ crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
+ input->action->uuid,
+ input->action->node? input->action->node->details->uname : "",
+ action->uuid,
+ action->node? action->node->details->uname : "",
+ input->type);
+ }
+ return has_loop;
+}
+
+/*!
+ * \internal
+ * \brief Create a synapse XML element for a transition graph
+ *
+ * \param[in] action Action that synapse is for
+ * \param[in,out] data_set Cluster working set containing graph
+ *
+ * \return Newly added XML element for new graph synapse
+ */
+static xmlNode *
+create_graph_synapse(const pe_action_t *action, pe_working_set_t *data_set)
+{
+ int synapse_priority = 0;
+ xmlNode *syn = create_xml_node(data_set->graph, "synapse");
+
+ crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse);
+ data_set->num_synapse++;
+
+ if (action->rsc != NULL) {
+ synapse_priority = action->rsc->priority;
+ }
+ if (action->priority > synapse_priority) {
+ synapse_priority = action->priority;
+ }
+ if (synapse_priority > 0) {
+ crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority);
+ }
+ return syn;
+}
+
+/*!
+ * \internal
+ * \brief Add an action to the transition graph XML if appropriate
+ *
+ * \param[in,out] data Action to possibly add
+ * \param[in,out] user_data Cluster working set
+ *
+ * \note This will de-duplicate the action inputs, meaning that the
+ * pe_action_wrapper_t:type flags can no longer be relied on to retain
+ * their original settings. That means this MUST be called after
+ * pcmk__apply_orderings() is complete, and nothing after this should rely
+ * on those type flags. (For example, some code looks for type equal to
+ * some flag rather than whether the flag is set, and some code looks for
+ * particular combinations of flags -- such code must be done before
+ * pcmk__create_graph().)
+ */
+static void
+add_action_to_graph(gpointer data, gpointer user_data)
+{
+ pe_action_t *action = (pe_action_t *) data;
+ pe_working_set_t *data_set = (pe_working_set_t *) user_data;
+
+ xmlNode *syn = NULL;
+ xmlNode *set = NULL;
+ xmlNode *in = NULL;
+
+ /* If we haven't already, de-duplicate inputs (even if we won't be adding
+ * the action to the graph, so that crm_simulate's dot graphs don't have
+ * duplicates).
+ */
+ if (!pcmk_is_set(action->flags, pe_action_dedup)) {
+ pcmk__deduplicate_action_inputs(action);
+ pe__set_action_flags(action, pe_action_dedup);
+ }
+
+ if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or
+ || !should_add_action_to_graph(action)) { // shouldn't be added
+ return;
+ }
+ pe__set_action_flags(action, pe_action_dumped);
+
+ crm_trace("Adding action %d (%s%s%s) to graph",
+ action->id, action->uuid,
+ ((action->node == NULL)? "" : " on "),
+ ((action->node == NULL)? "" : action->node->details->uname));
+
+ syn = create_graph_synapse(action, data_set);
+ set = create_xml_node(syn, "action_set");
+ in = create_xml_node(syn, "inputs");
+
+ create_graph_action(set, action, false, data_set);
+
+ for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
+ pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data;
+
+ if (should_add_input_to_graph(action, input)) {
+ xmlNode *input_xml = create_xml_node(in, "trigger");
+
+ input->state = pe_link_dumped;
+ create_graph_action(input_xml, input->action, true, data_set);
+ }
+ }
+}
+
+static int transition_id = -1;
+
+/*!
+ * \internal
+ * \brief Log a message after calculating a transition
+ *
+ * \param[in] filename Where transition input is stored
+ */
+void
+pcmk__log_transition_summary(const char *filename)
+{
+ if (was_processing_error) {
+ crm_err("Calculated transition %d (with errors)%s%s",
+ transition_id,
+ (filename == NULL)? "" : ", saving inputs in ",
+ (filename == NULL)? "" : filename);
+
+ } else if (was_processing_warning) {
+ crm_warn("Calculated transition %d (with warnings)%s%s",
+ transition_id,
+ (filename == NULL)? "" : ", saving inputs in ",
+ (filename == NULL)? "" : filename);
+
+ } else {
+ crm_notice("Calculated transition %d%s%s",
+ transition_id,
+ (filename == NULL)? "" : ", saving inputs in ",
+ (filename == NULL)? "" : filename);
+ }
+ if (crm_config_error) {
+ crm_notice("Configuration errors found during scheduler processing,"
+ " please run \"crm_verify -L\" to identify issues");
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a resource's actions to the transition graph
+ *
+ * \param[in,out] rsc Resource whose actions should be added
+ */
+void
+pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc)
+{
+ GList *iter = NULL;
+
+ CRM_ASSERT(rsc != NULL);
+ pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
+
+ // First add the resource's own actions
+ g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster);
+
+ // Then recursively add its children's actions (appropriate to variant)
+ for (iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
+
+ child_rsc->cmds->add_actions_to_graph(child_rsc);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a transition graph with all cluster actions needed
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__create_graph(pe_working_set_t *data_set)
+{
+ GList *iter = NULL;
+ const char *value = NULL;
+ long long limit = 0LL;
+
+ transition_id++;
+ crm_trace("Creating transition graph %d", transition_id);
+
+ data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
+
+ value = pe_pref(data_set->config_hash, "cluster-delay");
+ crm_xml_add(data_set->graph, "cluster-delay", value);
+
+ value = pe_pref(data_set->config_hash, "stonith-timeout");
+ crm_xml_add(data_set->graph, "stonith-timeout", value);
+
+ crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
+
+ if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) {
+ crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
+ } else {
+ crm_xml_add(data_set->graph, "failed-start-offset", "1");
+ }
+
+ value = pe_pref(data_set->config_hash, "batch-limit");
+ crm_xml_add(data_set->graph, "batch-limit", value);
+
+ crm_xml_add_int(data_set->graph, "transition_id", transition_id);
+
+ value = pe_pref(data_set->config_hash, "migration-limit");
+ if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
+ crm_xml_add(data_set->graph, "migration-limit", value);
+ }
+
+ if (data_set->recheck_by > 0) {
+ char *recheck_epoch = NULL;
+
+ recheck_epoch = crm_strdup_printf("%llu",
+ (long long) data_set->recheck_by);
+ crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
+ free(recheck_epoch);
+ }
+
+ /* The following code will de-duplicate action inputs, so nothing past this
+ * should rely on the action input type flags retaining their original
+ * values.
+ */
+
+ // Add resource actions to graph
+ for (iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ pe_rsc_trace(rsc, "Processing actions for %s", rsc->id);
+ rsc->cmds->add_actions_to_graph(rsc);
+ }
+
+ // Add pseudo-action for list of nodes with maintenance state update
+ add_maintenance_update(data_set);
+
+ // Add non-resource (node) actions
+ for (iter = data_set->actions; iter != NULL; iter = iter->next) {
+ pe_action_t *action = (pe_action_t *) iter->data;
+
+ if ((action->rsc != NULL)
+ && (action->node != NULL)
+ && action->node->details->shutdown
+ && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)
+ && !pcmk_any_flags_set(action->flags,
+ pe_action_optional|pe_action_runnable)
+ && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) {
+ /* Eventually we should just ignore the 'fence' case, but for now
+ * it's the best way to detect (in CTS) when CIB resource updates
+ * are being lost.
+ */
+ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)
+ || (data_set->no_quorum_policy == no_quorum_ignore)) {
+ crm_crit("Cannot %s %s because of %s:%s%s (%s)",
+ action->node->details->unclean? "fence" : "shut down",
+ pe__node_name(action->node), action->rsc->id,
+ pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged",
+ pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "",
+ action->uuid);
+ }
+ }
+
+ add_action_to_graph((gpointer) action, (gpointer) data_set);
+ }
+
+ crm_log_xml_trace(data_set->graph, "graph");
+}
diff --git a/lib/pacemaker/pcmk_injections.c b/lib/pacemaker/pcmk_injections.c
new file mode 100644
index 0000000..ea8fc17
--- /dev/null
+++ b/lib/pacemaker/pcmk_injections.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <crm/crm.h>
+#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
+#include <crm/cib.h>
+#include <crm/cib/internal.h>
+#include <crm/common/util.h>
+#include <crm/common/iso8601.h>
+#include <crm/common/xml_internal.h>
+#include <crm/lrmd_internal.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+bool pcmk__simulate_node_config = false;
+
+#define XPATH_NODE_CONFIG "//" XML_CIB_TAG_NODE "[@" XML_ATTR_UNAME "='%s']"
+#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
+#define XPATH_RSC_HISTORY XPATH_NODE_STATE \
+ "//" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']"
+
+
+/*!
+ * \internal
+ * \brief Inject a fictitious transient node attribute into scheduler input
+ *
+ * \param[in,out] out Output object for displaying error messages
+ * \param[in,out] cib_node node_state XML to inject attribute into
+ * \param[in] name Transient node attribute name to inject
+ * \param[in] value Transient node attribute value to inject
+ */
+static void
+inject_transient_attr(pcmk__output_t *out, xmlNode *cib_node,
+ const char *name, const char *value)
+{
+ xmlNode *attrs = NULL;
+ xmlNode *instance_attrs = NULL;
+ const char *node_uuid = ID(cib_node);
+
+ out->message(out, "inject-attr", name, value, cib_node);
+
+ attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
+ if (attrs == NULL) {
+ attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
+ crm_xml_add(attrs, XML_ATTR_ID, node_uuid);
+ }
+
+ instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS);
+ if (instance_attrs == NULL) {
+ instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS);
+ crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid);
+ }
+
+ crm_create_nvpair_xml(instance_attrs, NULL, name, value);
+}
+
+/*!
+ * \internal
+ * \brief Inject a fictitious fail count into a scheduler input
+ *
+ * \param[in,out] out Output object for displaying error messages
+ * \param[in,out] cib_node Node state XML to inject into
+ * \param[in] resource ID of resource for fail count to inject
+ * \param[in] task Action name for fail count to inject
+ * \param[in] interval_ms Action interval (in milliseconds) for fail count
+ * \param[in] rc Action result for fail count to inject (if 0, or
+ * 7 when interval_ms is 0, inject nothing)
+ */
+void
+pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
+ const char *resource, const char *task,
+ guint interval_ms, int rc)
+{
+ if (rc == 0) {
+ return;
+
+ } else if ((rc == 7) && (interval_ms == 0)) {
+ return;
+
+ } else {
+ char *name = NULL;
+ char *now = pcmk__ttoa(time(NULL));
+
+ name = pcmk__failcount_name(resource, task, interval_ms);
+ inject_transient_attr(out, cib_node, name, "value++");
+ free(name);
+
+ name = pcmk__lastfailure_name(resource, task, interval_ms);
+ inject_transient_attr(out, cib_node, name, now);
+ free(name);
+
+ free(now);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a CIB configuration entry for a fictitious node
+ *
+ * \param[in,out] cib_conn CIB object to use
+ * \param[in] node Node name to use
+ */
+static void
+create_node_entry(cib_t *cib_conn, const char *node)
+{
+ int rc = pcmk_ok;
+ char *xpath = crm_strdup_printf(XPATH_NODE_CONFIG, node);
+
+ rc = cib_conn->cmds->query(cib_conn, xpath, NULL,
+ cib_xpath|cib_sync_call|cib_scope_local);
+
+ if (rc == -ENXIO) { // Only add if not already existing
+ xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE);
+
+ crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID
+ crm_xml_add(cib_object, XML_ATTR_UNAME, node);
+ cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object,
+ cib_sync_call|cib_scope_local);
+ /* Not bothering with subsequent query to see if it exists,
+ we'll bomb out later in the call to query_node_uuid()... */
+
+ free_xml(cib_object);
+ }
+
+ free(xpath);
+}
+
+/*!
+ * \internal
+ * \brief Synthesize a fake executor event for an action
+ *
+ * \param[in] cib_resource XML for any existing resource action history
+ * \param[in] task Name of action to synthesize
+ * \param[in] interval_ms Interval of action to synthesize
+ * \param[in] outcome Result of action to synthesize
+ *
+ * \return Newly allocated executor event
+ * \note It is the caller's responsibility to free the result with
+ * lrmd_free_event().
+ */
+static lrmd_event_data_t *
+create_op(const xmlNode *cib_resource, const char *task, guint interval_ms,
+ int outcome)
+{
+ lrmd_event_data_t *op = NULL;
+ xmlNode *xop = NULL;
+
+ op = lrmd_new_event(ID(cib_resource), task, interval_ms);
+ lrmd__set_result(op, outcome, PCMK_EXEC_DONE, "Simulated action result");
+ op->params = NULL; // Not needed for simulation purposes
+ op->t_run = (unsigned int) time(NULL);
+ op->t_rcchange = op->t_run;
+
+ // Use a call ID higher than any existing history entries
+ op->call_id = 0;
+ for (xop = pcmk__xe_first_child(cib_resource); xop != NULL;
+ xop = pcmk__xe_next(xop)) {
+
+ int tmp = 0;
+
+ crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
+ if (tmp > op->call_id) {
+ op->call_id = tmp;
+ }
+ }
+ op->call_id++;
+
+ return op;
+}
+
+/*!
+ * \internal
+ * \brief Inject a fictitious resource history entry into a scheduler input
+ *
+ * \param[in,out] cib_resource Resource history XML to inject entry into
+ * \param[in,out] op Action result to inject
+ * \param[in] target_rc Expected result for action to inject
+ *
+ * \return XML of injected resource history entry
+ */
+xmlNode *
+pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op,
+ int target_rc)
+{
+ return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
+ target_rc, NULL, crm_system_name);
+}
+
+/*!
+ * \internal
+ * \brief Inject a fictitious node into a scheduler input
+ *
+ * \param[in,out] cib_conn Scheduler input CIB to inject node into
+ * \param[in] node Name of node to inject
+ * \param[in] uuid UUID of node to inject
+ *
+ * \return XML of node_state entry for new node
+ * \note If the global pcmk__simulate_node_config has been set to true, a
+ * node entry in the configuration section will be added, as well as a
+ * node state entry in the status section.
+ */
+xmlNode *
+pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid)
+{
+ int rc = pcmk_ok;
+ xmlNode *cib_object = NULL;
+ char *xpath = crm_strdup_printf(XPATH_NODE_STATE, node);
+ bool duplicate = false;
+ char *found_uuid = NULL;
+
+ if (pcmk__simulate_node_config) {
+ create_node_entry(cib_conn, node);
+ }
+
+ rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
+ cib_xpath|cib_sync_call|cib_scope_local);
+
+ if ((cib_object != NULL) && (ID(cib_object) == NULL)) {
+ crm_err("Detected multiple node_state entries for xpath=%s, bailing",
+ xpath);
+ duplicate = true;
+ goto done;
+ }
+
+ if (rc == -ENXIO) {
+ if (uuid == NULL) {
+ query_node_uuid(cib_conn, node, &found_uuid, NULL);
+ } else {
+ found_uuid = strdup(uuid);
+ }
+
+ if (found_uuid) {
+ char *xpath_by_uuid = crm_strdup_printf("//" XML_CIB_TAG_STATE "[@" XML_ATTR_ID "='%s']",
+ found_uuid);
+
+ // It's possible that a node_state entry doesn't have an uname yet.
+ rc = cib_conn->cmds->query(cib_conn, xpath_by_uuid, &cib_object,
+ cib_xpath|cib_sync_call|cib_scope_local);
+
+ if ((cib_object != NULL) && (ID(cib_object) == NULL)) {
+ crm_err("Detected multiple node_state entries for xpath=%s, bailing",
+ xpath_by_uuid);
+ duplicate = true;
+ free(xpath_by_uuid);
+ goto done;
+
+ } else if (cib_object != NULL) {
+ crm_xml_add(cib_object, XML_ATTR_UNAME, node);
+
+ rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, cib_object,
+ cib_sync_call|cib_scope_local);
+ }
+
+ free(xpath_by_uuid);
+ }
+ }
+
+ if (rc == -ENXIO) {
+ cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE);
+ crm_xml_add(cib_object, XML_ATTR_ID, found_uuid);
+ crm_xml_add(cib_object, XML_ATTR_UNAME, node);
+ cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object,
+ cib_sync_call|cib_scope_local);
+ free_xml(cib_object);
+
+ rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
+ cib_xpath|cib_sync_call|cib_scope_local);
+ crm_trace("Injecting node state for %s (rc=%d)", node, rc);
+ }
+
+done:
+ free(found_uuid);
+ free(xpath);
+
+ if (duplicate) {
+ crm_log_xml_warn(cib_object, "Duplicates");
+ crm_exit(CRM_EX_SOFTWARE);
+ return NULL; // not reached, but makes static analysis happy
+ }
+
+ CRM_ASSERT(rc == pcmk_ok);
+ return cib_object;
+}
+
+/*!
+ * \internal
+ * \brief Inject a fictitious node state change into a scheduler input
+ *
+ * \param[in,out] cib_conn Scheduler input CIB to inject into
+ * \param[in] node Name of node to inject change for
+ * \param[in] up If true, change state to online, otherwise offline
+ *
+ * \return XML of changed (or added) node state entry
+ */
+xmlNode *
+pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up)
+{
+ xmlNode *cib_node = pcmk__inject_node(cib_conn, node, NULL);
+
+ if (up) {
+ pcmk__xe_set_props(cib_node,
+ XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES,
+ XML_NODE_IS_PEER, ONLINESTATUS,
+ XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER,
+ XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER,
+ NULL);
+ } else {
+ pcmk__xe_set_props(cib_node,
+ XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO,
+ XML_NODE_IS_PEER, OFFLINESTATUS,
+ XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN,
+ XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN,
+ NULL);
+ }
+ crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name);
+ return cib_node;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node has history for a given resource
+ *
+ * \param[in,out] cib_node Node state XML to check
+ * \param[in] resource Resource name to check for
+ *
+ * \return Resource's lrm_resource XML entry beneath \p cib_node if found,
+ * otherwise NULL
+ */
+static xmlNode *
+find_resource_xml(xmlNode *cib_node, const char *resource)
+{
+ const char *node = crm_element_value(cib_node, XML_ATTR_UNAME);
+ char *xpath = crm_strdup_printf(XPATH_RSC_HISTORY, node, resource);
+ xmlNode *match = get_xpath_object(xpath, cib_node, LOG_TRACE);
+
+ free(xpath);
+ return match;
+}
+
+/*!
+ * \internal
+ * \brief Inject a resource history element into a scheduler input
+ *
+ * \param[in,out] out Output object for displaying error messages
+ * \param[in,out] cib_node Node state XML to inject resource history entry into
+ * \param[in] resource ID (in configuration) of resource to inject
+ * \param[in] lrm_name ID as used in history (could be clone instance)
+ * \param[in] rclass Resource agent class of resource to inject
+ * \param[in] rtype Resource agent type of resource to inject
+ * \param[in] rprovider Resource agent provider of resource to inject
+ *
+ * \return XML of injected resource history element
+ * \note If a history element already exists under either \p resource or
+ * \p lrm_name, this will return it rather than injecting a new one.
+ */
+xmlNode *
+pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
+ const char *resource, const char *lrm_name,
+ const char *rclass, const char *rtype,
+ const char *rprovider)
+{
+ xmlNode *lrm = NULL;
+ xmlNode *container = NULL;
+ xmlNode *cib_resource = NULL;
+
+ cib_resource = find_resource_xml(cib_node, resource);
+ if (cib_resource != NULL) {
+ /* If an existing LRM history entry uses the resource name,
+ * continue using it, even if lrm_name is different.
+ */
+ return cib_resource;
+ }
+
+ // Check for history entry under preferred name
+ if (strcmp(resource, lrm_name) != 0) {
+ cib_resource = find_resource_xml(cib_node, lrm_name);
+ if (cib_resource != NULL) {
+ return cib_resource;
+ }
+ }
+
+ if ((rclass == NULL) || (rtype == NULL)) {
+ // @TODO query configuration for class, provider, type
+ out->err(out, "Resource %s not found in the status section of %s."
+ " Please supply the class and type to continue", resource, ID(cib_node));
+ return NULL;
+
+ } else if (!pcmk__strcase_any_of(rclass,
+ PCMK_RESOURCE_CLASS_OCF,
+ PCMK_RESOURCE_CLASS_STONITH,
+ PCMK_RESOURCE_CLASS_SERVICE,
+ PCMK_RESOURCE_CLASS_UPSTART,
+ PCMK_RESOURCE_CLASS_SYSTEMD,
+ PCMK_RESOURCE_CLASS_LSB, NULL)) {
+ out->err(out, "Invalid class for %s: %s", resource, rclass);
+ return NULL;
+
+ } else if (pcmk_is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider)
+ && (rprovider == NULL)) {
+ // @TODO query configuration for provider
+ out->err(out, "Please specify the provider for resource %s", resource);
+ return NULL;
+ }
+
+ crm_info("Injecting new resource %s into node state '%s'",
+ lrm_name, ID(cib_node));
+
+ lrm = first_named_child(cib_node, XML_CIB_TAG_LRM);
+ if (lrm == NULL) {
+ const char *node_uuid = ID(cib_node);
+
+ lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM);
+ crm_xml_add(lrm, XML_ATTR_ID, node_uuid);
+ }
+
+ container = first_named_child(lrm, XML_LRM_TAG_RESOURCES);
+ if (container == NULL) {
+ container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES);
+ }
+
+ cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE);
+
+ // If we're creating a new entry, use the preferred name
+ crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name);
+
+ crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass);
+ crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider);
+ crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype);
+
+ return cib_resource;
+}
+
+static int
+find_ticket_state(pcmk__output_t *out, cib_t *the_cib, const char *ticket_id,
+ xmlNode **ticket_state_xml)
+{
+ int rc = pcmk_ok;
+ xmlNode *xml_search = NULL;
+
+ GString *xpath = g_string_sized_new(256);
+
+ CRM_ASSERT(ticket_state_xml != NULL);
+ *ticket_state_xml = NULL;
+
+ g_string_append(xpath,
+ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS
+ "/" XML_CIB_TAG_TICKETS);
+
+ if (ticket_id) {
+ pcmk__g_strcat(xpath,
+ "/" XML_CIB_TAG_TICKET_STATE
+ "[@" XML_ATTR_ID "=\"", ticket_id, "\"]", NULL);
+ }
+ rc = the_cib->cmds->query(the_cib, (const char *) xpath->str, &xml_search,
+ cib_sync_call|cib_scope_local|cib_xpath);
+ g_string_free(xpath, TRUE);
+
+ if (rc != pcmk_ok) {
+ return rc;
+ }
+
+ crm_log_xml_debug(xml_search, "Match");
+ if (xml_has_children(xml_search) && (ticket_id != NULL)) {
+ out->err(out, "Multiple ticket_states match ticket_id=%s", ticket_id);
+ }
+ *ticket_state_xml = xml_search;
+
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Inject a ticket attribute into ticket state
+ *
+ * \param[in,out] out Output object for displaying error messages
+ * \param[in] ticket_id Ticket whose state should be changed
+ * \param[in] attr_name Ticket attribute name to inject
+ * \param[in] attr_value Boolean value of ticket attribute to inject
+ * \param[in,out] cib CIB object to use
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+set_ticket_state_attr(pcmk__output_t *out, const char *ticket_id,
+ const char *attr_name, bool attr_value, cib_t *cib)
+{
+ int rc = pcmk_rc_ok;
+ xmlNode *xml_top = NULL;
+ xmlNode *ticket_state_xml = NULL;
+
+ // Check for an existing ticket state entry
+ rc = find_ticket_state(out, cib, ticket_id, &ticket_state_xml);
+ rc = pcmk_legacy2rc(rc);
+
+ if (rc == pcmk_rc_ok) { // Ticket state found, use it
+ crm_debug("Injecting attribute into existing ticket state %s",
+ ticket_id);
+ xml_top = ticket_state_xml;
+
+ } else if (rc == ENXIO) { // No ticket state, create it
+ xmlNode *xml_obj = NULL;
+
+ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS);
+ ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE);
+ crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id);
+
+ } else { // Error
+ return rc;
+ }
+
+ // Add the attribute to the ticket state
+ pcmk__xe_set_bool_attr(ticket_state_xml, attr_name, attr_value);
+ crm_log_xml_debug(xml_top, "Update");
+
+ // Commit the change to the CIB
+ rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top,
+ cib_sync_call|cib_scope_local);
+ rc = pcmk_legacy2rc(rc);
+
+ free_xml(xml_top);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Inject a fictitious action into the cluster
+ *
+ * \param[in,out] out Output object for displaying error messages
+ * \param[in] spec Action specification to inject
+ * \param[in,out] cib CIB object for scheduler input
+ * \param[in] data_set Cluster working set
+ */
+static void
+inject_action(pcmk__output_t *out, const char *spec, cib_t *cib,
+ const pe_working_set_t *data_set)
+{
+ int rc;
+ int outcome = PCMK_OCF_OK;
+ guint interval_ms = 0;
+
+ char *key = NULL;
+ char *node = NULL;
+ char *task = NULL;
+ char *resource = NULL;
+
+ const char *rtype = NULL;
+ const char *rclass = NULL;
+ const char *rprovider = NULL;
+
+ xmlNode *cib_op = NULL;
+ xmlNode *cib_node = NULL;
+ xmlNode *cib_resource = NULL;
+ const pe_resource_t *rsc = NULL;
+ lrmd_event_data_t *op = NULL;
+
+ out->message(out, "inject-spec", spec);
+
+ key = calloc(1, strlen(spec) + 1);
+ node = calloc(1, strlen(spec) + 1);
+ rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome);
+ if (rc != 3) {
+ out->err(out, "Invalid operation spec: %s. Only found %d fields",
+ spec, rc);
+ goto done;
+ }
+
+ parse_op_key(key, &resource, &task, &interval_ms);
+
+ rsc = pe_find_resource(data_set->resources, resource);
+ if (rsc == NULL) {
+ out->err(out, "Invalid resource name: %s", resource);
+ goto done;
+ }
+
+ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+ rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
+ rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
+
+ cib_node = pcmk__inject_node(cib, node, NULL);
+ CRM_ASSERT(cib_node != NULL);
+
+ pcmk__inject_failcount(out, cib_node, resource, task, interval_ms, outcome);
+
+ cib_resource = pcmk__inject_resource_history(out, cib_node,
+ resource, resource,
+ rclass, rtype, rprovider);
+ CRM_ASSERT(cib_resource != NULL);
+
+ op = create_op(cib_resource, task, interval_ms, outcome);
+ CRM_ASSERT(op != NULL);
+
+ cib_op = pcmk__inject_action_result(cib_resource, op, 0);
+ CRM_ASSERT(cib_op != NULL);
+ lrmd_free_event(op);
+
+ rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+
+done:
+ free(task);
+ free(node);
+ free(key);
+}
+
+/*!
+ * \internal
+ * \brief Inject fictitious scheduler inputs
+ *
+ * \param[in,out] data_set Cluster working set
+ * \param[in,out] cib CIB object for scheduler input to modify
+ * \param[in] injections Injections to apply
+ */
+void
+pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
+ const pcmk_injections_t *injections)
+{
+ int rc = pcmk_ok;
+ const GList *iter = NULL;
+ xmlNode *cib_node = NULL;
+ pcmk__output_t *out = data_set->priv;
+
+ out->message(out, "inject-modify-config", injections->quorum,
+ injections->watchdog);
+ if (injections->quorum != NULL) {
+ xmlNode *top = create_xml_node(NULL, XML_TAG_CIB);
+
+ /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */
+ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, injections->quorum);
+
+ rc = cib->cmds->modify(cib, NULL, top, cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+ }
+
+ if (injections->watchdog != NULL) {
+ rc = cib__update_node_attr(out, cib, cib_sync_call|cib_scope_local,
+ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
+ XML_ATTR_HAVE_WATCHDOG, injections->watchdog,
+ NULL, NULL);
+ CRM_ASSERT(rc == pcmk_rc_ok);
+ }
+
+ for (iter = injections->node_up; iter != NULL; iter = iter->next) {
+ const char *node = (const char *) iter->data;
+
+ out->message(out, "inject-modify-node", "Online", node);
+
+ cib_node = pcmk__inject_node_state_change(cib, node, true);
+ CRM_ASSERT(cib_node != NULL);
+
+ rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+ free_xml(cib_node);
+ }
+
+ for (iter = injections->node_down; iter != NULL; iter = iter->next) {
+ const char *node = (const char *) iter->data;
+ char *xpath = NULL;
+
+ out->message(out, "inject-modify-node", "Offline", node);
+
+ cib_node = pcmk__inject_node_state_change(cib, node, false);
+ CRM_ASSERT(cib_node != NULL);
+
+ rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+ free_xml(cib_node);
+
+ xpath = crm_strdup_printf("//node_state[@uname='%s']/%s",
+ node, XML_CIB_TAG_LRM);
+ cib->cmds->remove(cib, xpath, NULL,
+ cib_xpath|cib_sync_call|cib_scope_local);
+ free(xpath);
+
+ xpath = crm_strdup_printf("//node_state[@uname='%s']/%s",
+ node, XML_TAG_TRANSIENT_NODEATTRS);
+ cib->cmds->remove(cib, xpath, NULL,
+ cib_xpath|cib_sync_call|cib_scope_local);
+ free(xpath);
+ }
+
+ for (iter = injections->node_fail; iter != NULL; iter = iter->next) {
+ const char *node = (const char *) iter->data;
+
+ out->message(out, "inject-modify-node", "Failing", node);
+
+ cib_node = pcmk__inject_node_state_change(cib, node, true);
+ crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
+ CRM_ASSERT(cib_node != NULL);
+
+ rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+ free_xml(cib_node);
+ }
+
+ for (iter = injections->ticket_grant; iter != NULL; iter = iter->next) {
+ const char *ticket_id = (const char *) iter->data;
+
+ out->message(out, "inject-modify-ticket", "Granting", ticket_id);
+
+ rc = set_ticket_state_attr(out, ticket_id, "granted", true, cib);
+ CRM_ASSERT(rc == pcmk_rc_ok);
+ }
+
+ for (iter = injections->ticket_revoke; iter != NULL; iter = iter->next) {
+ const char *ticket_id = (const char *) iter->data;
+
+ out->message(out, "inject-modify-ticket", "Revoking", ticket_id);
+
+ rc = set_ticket_state_attr(out, ticket_id, "granted", false, cib);
+ CRM_ASSERT(rc == pcmk_rc_ok);
+ }
+
+ for (iter = injections->ticket_standby; iter != NULL; iter = iter->next) {
+ const char *ticket_id = (const char *) iter->data;
+
+ out->message(out, "inject-modify-ticket", "Standby", ticket_id);
+
+ rc = set_ticket_state_attr(out, ticket_id, "standby", true, cib);
+ CRM_ASSERT(rc == pcmk_rc_ok);
+ }
+
+ for (iter = injections->ticket_activate; iter != NULL; iter = iter->next) {
+ const char *ticket_id = (const char *) iter->data;
+
+ out->message(out, "inject-modify-ticket", "Activating", ticket_id);
+
+ rc = set_ticket_state_attr(out, ticket_id, "standby", false, cib);
+ CRM_ASSERT(rc == pcmk_rc_ok);
+ }
+
+ for (iter = injections->op_inject; iter != NULL; iter = iter->next) {
+ inject_action(out, (const char *) iter->data, cib, data_set);
+ }
+
+ if (!out->is_quiet(out)) {
+ out->end_list(out);
+ }
+}
+
+void
+pcmk_free_injections(pcmk_injections_t *injections)
+{
+ if (injections == NULL) {
+ return;
+ }
+
+ g_list_free_full(injections->node_up, g_free);
+ g_list_free_full(injections->node_down, g_free);
+ g_list_free_full(injections->node_fail, g_free);
+ g_list_free_full(injections->op_fail, g_free);
+ g_list_free_full(injections->op_inject, g_free);
+ g_list_free_full(injections->ticket_grant, g_free);
+ g_list_free_full(injections->ticket_revoke, g_free);
+ g_list_free_full(injections->ticket_standby, g_free);
+ g_list_free_full(injections->ticket_activate, g_free);
+ free(injections->quorum);
+ free(injections->watchdog);
+
+ free(injections);
+}
diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
new file mode 100644
index 0000000..7379516
--- /dev/null
+++ b/lib/pacemaker/pcmk_output.c
@@ -0,0 +1,2331 @@
+/*
+ * Copyright 2019-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/common/output.h>
+#include <crm/common/results.h>
+#include <crm/msg_xml.h>
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/pengine/internal.h>
+#include <libxml/tree.h>
+#include <pacemaker-internal.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+
+static char *
+colocations_header(pe_resource_t *rsc, pcmk__colocation_t *cons,
+ bool dependents) {
+ char *retval = NULL;
+
+ if (cons->primary_role > RSC_ROLE_STARTED) {
+ retval = crm_strdup_printf("%s (score=%s, %s role=%s, id=%s)",
+ rsc->id, pcmk_readable_score(cons->score),
+ (dependents? "needs" : "with"),
+ role2text(cons->primary_role), cons->id);
+ } else {
+ retval = crm_strdup_printf("%s (score=%s, id=%s)",
+ rsc->id, pcmk_readable_score(cons->score),
+ cons->id);
+ }
+ return retval;
+}
+
+static void
+colocations_xml_node(pcmk__output_t *out, pe_resource_t *rsc,
+ pcmk__colocation_t *cons) {
+ xmlNodePtr node = NULL;
+
+ node = pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_DEPEND,
+ "id", cons->id,
+ "rsc", cons->dependent->id,
+ "with-rsc", cons->primary->id,
+ "score", pcmk_readable_score(cons->score),
+ NULL);
+
+ if (cons->node_attribute) {
+ xmlSetProp(node, (pcmkXmlStr) "node-attribute", (pcmkXmlStr) cons->node_attribute);
+ }
+
+ if (cons->dependent_role != RSC_ROLE_UNKNOWN) {
+ xmlSetProp(node, (pcmkXmlStr) "rsc-role",
+ (pcmkXmlStr) role2text(cons->dependent_role));
+ }
+
+ if (cons->primary_role != RSC_ROLE_UNKNOWN) {
+ xmlSetProp(node, (pcmkXmlStr) "with-rsc-role",
+ (pcmkXmlStr) role2text(cons->primary_role));
+ }
+}
+
+static int
+do_locations_list_xml(pcmk__output_t *out, pe_resource_t *rsc, bool add_header)
+{
+ GList *lpc = NULL;
+ GList *list = rsc->rsc_location;
+ int rc = pcmk_rc_no_output;
+
+ for (lpc = list; lpc != NULL; lpc = lpc->next) {
+ pe__location_t *cons = lpc->data;
+
+ GList *lpc2 = NULL;
+
+ for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) {
+ pe_node_t *node = (pe_node_t *) lpc2->data;
+
+ if (add_header) {
+ PCMK__OUTPUT_LIST_HEADER(out, false, rc, "locations");
+ }
+
+ pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_LOCATION,
+ "node", node->details->uname,
+ "rsc", rsc->id,
+ "id", cons->id,
+ "score", pcmk_readable_score(node->weight),
+ NULL);
+ }
+ }
+
+ if (add_header) {
+ PCMK__OUTPUT_LIST_FOOTER(out, rc);
+ }
+
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *",
+ "pe_node_t *", "pe_node_t *", "pe_action_t *",
+ "pe_action_t *")
+static int
+rsc_action_item(pcmk__output_t *out, va_list args)
+{
+ const char *change = va_arg(args, const char *);
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ pe_node_t *origin = va_arg(args, pe_node_t *);
+ pe_node_t *destination = va_arg(args, pe_node_t *);
+ pe_action_t *action = va_arg(args, pe_action_t *);
+ pe_action_t *source = va_arg(args, pe_action_t *);
+
+ int len = 0;
+ char *reason = NULL;
+ char *details = NULL;
+ bool same_host = false;
+ bool same_role = false;
+ bool need_role = false;
+
+ static int rsc_width = 5;
+ static int detail_width = 5;
+
+ CRM_ASSERT(action);
+ CRM_ASSERT(destination != NULL || origin != NULL);
+
+ if(source == NULL) {
+ source = action;
+ }
+
+ len = strlen(rsc->id);
+ if(len > rsc_width) {
+ rsc_width = len + 2;
+ }
+
+ if ((rsc->role > RSC_ROLE_STARTED)
+ || (rsc->next_role > RSC_ROLE_UNPROMOTED)) {
+ need_role = true;
+ }
+
+ if(origin != NULL && destination != NULL && origin->details == destination->details) {
+ same_host = true;
+ }
+
+ if(rsc->role == rsc->next_role) {
+ same_role = true;
+ }
+
+ if (need_role && (origin == NULL)) {
+ /* Starting and promoting a promotable clone instance */
+ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role),
+ role2text(rsc->next_role),
+ pe__node_name(destination));
+
+ } else if (origin == NULL) {
+ /* Starting a resource */
+ details = crm_strdup_printf("%s", pe__node_name(destination));
+
+ } else if (need_role && (destination == NULL)) {
+ /* Stopping a promotable clone instance */
+ details = crm_strdup_printf("%s %s", role2text(rsc->role),
+ pe__node_name(origin));
+
+ } else if (destination == NULL) {
+ /* Stopping a resource */
+ details = crm_strdup_printf("%s", pe__node_name(origin));
+
+ } else if (need_role && same_role && same_host) {
+ /* Recovering, restarting or re-promoting a promotable clone instance */
+ details = crm_strdup_printf("%s %s", role2text(rsc->role),
+ pe__node_name(origin));
+
+ } else if (same_role && same_host) {
+ /* Recovering or Restarting a normal resource */
+ details = crm_strdup_printf("%s", pe__node_name(origin));
+
+ } else if (need_role && same_role) {
+ /* Moving a promotable clone instance */
+ details = crm_strdup_printf("%s -> %s %s", pe__node_name(origin),
+ pe__node_name(destination),
+ role2text(rsc->role));
+
+ } else if (same_role) {
+ /* Moving a normal resource */
+ details = crm_strdup_printf("%s -> %s", pe__node_name(origin),
+ pe__node_name(destination));
+
+ } else if (same_host) {
+ /* Promoting or demoting a promotable clone instance */
+ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role),
+ role2text(rsc->next_role),
+ pe__node_name(origin));
+
+ } else {
+ /* Moving and promoting/demoting */
+ details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role),
+ pe__node_name(origin),
+ role2text(rsc->next_role),
+ pe__node_name(destination));
+ }
+
+ len = strlen(details);
+ if(len > detail_width) {
+ detail_width = len;
+ }
+
+ if(source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) {
+ reason = crm_strdup_printf("due to %s (blocked)", source->reason);
+
+ } else if(source->reason) {
+ reason = crm_strdup_printf("due to %s", source->reason);
+
+ } else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
+ reason = strdup("blocked");
+
+ }
+
+ out->list_item(out, NULL, "%-8s %-*s ( %*s )%s%s", change, rsc_width,
+ rsc->id, detail_width, details, reason ? " " : "", reason ? reason : "");
+
+ free(details);
+ free(reason);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *",
+ "pe_node_t *", "pe_node_t *", "pe_action_t *",
+ "pe_action_t *")
+static int
+rsc_action_item_xml(pcmk__output_t *out, va_list args)
+{
+ const char *change = va_arg(args, const char *);
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ pe_node_t *origin = va_arg(args, pe_node_t *);
+ pe_node_t *destination = va_arg(args, pe_node_t *);
+ pe_action_t *action = va_arg(args, pe_action_t *);
+ pe_action_t *source = va_arg(args, pe_action_t *);
+
+ char *change_str = NULL;
+
+ bool same_host = false;
+ bool same_role = false;
+ bool need_role = false;
+ xmlNode *xml = NULL;
+
+ CRM_ASSERT(action);
+ CRM_ASSERT(destination != NULL || origin != NULL);
+
+ if (source == NULL) {
+ source = action;
+ }
+
+ if ((rsc->role > RSC_ROLE_STARTED)
+ || (rsc->next_role > RSC_ROLE_UNPROMOTED)) {
+ need_role = true;
+ }
+
+ if(origin != NULL && destination != NULL && origin->details == destination->details) {
+ same_host = true;
+ }
+
+ if(rsc->role == rsc->next_role) {
+ same_role = true;
+ }
+
+ change_str = g_ascii_strdown(change, -1);
+ xml = pcmk__output_create_xml_node(out, "rsc_action",
+ "action", change_str,
+ "resource", rsc->id,
+ NULL);
+ g_free(change_str);
+
+ if (need_role && (origin == NULL)) {
+ /* Starting and promoting a promotable clone instance */
+ pcmk__xe_set_props(xml,
+ "role", role2text(rsc->role),
+ "next-role", role2text(rsc->next_role),
+ "dest", destination->details->uname,
+ NULL);
+
+ } else if (origin == NULL) {
+ /* Starting a resource */
+ crm_xml_add(xml, "node", destination->details->uname);
+
+ } else if (need_role && (destination == NULL)) {
+ /* Stopping a promotable clone instance */
+ pcmk__xe_set_props(xml,
+ "role", role2text(rsc->role),
+ "node", origin->details->uname,
+ NULL);
+
+ } else if (destination == NULL) {
+ /* Stopping a resource */
+ crm_xml_add(xml, "node", origin->details->uname);
+
+ } else if (need_role && same_role && same_host) {
+ /* Recovering, restarting or re-promoting a promotable clone instance */
+ pcmk__xe_set_props(xml,
+ "role", role2text(rsc->role),
+ "source", origin->details->uname,
+ NULL);
+
+ } else if (same_role && same_host) {
+ /* Recovering or Restarting a normal resource */
+ crm_xml_add(xml, "source", origin->details->uname);
+
+ } else if (need_role && same_role) {
+ /* Moving a promotable clone instance */
+ pcmk__xe_set_props(xml,
+ "source", origin->details->uname,
+ "dest", destination->details->uname,
+ "role", role2text(rsc->role),
+ NULL);
+
+ } else if (same_role) {
+ /* Moving a normal resource */
+ pcmk__xe_set_props(xml,
+ "source", origin->details->uname,
+ "dest", destination->details->uname,
+ NULL);
+
+ } else if (same_host) {
+ /* Promoting or demoting a promotable clone instance */
+ pcmk__xe_set_props(xml,
+ "role", role2text(rsc->role),
+ "next-role", role2text(rsc->next_role),
+ "source", origin->details->uname,
+ NULL);
+
+ } else {
+ /* Moving and promoting/demoting */
+ pcmk__xe_set_props(xml,
+ "role", role2text(rsc->role),
+ "source", origin->details->uname,
+ "next-role", role2text(rsc->next_role),
+ "dest", destination->details->uname,
+ NULL);
+ }
+
+ if (source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) {
+ pcmk__xe_set_props(xml,
+ "reason", source->reason,
+ "blocked", "true",
+ NULL);
+
+ } else if(source->reason) {
+ crm_xml_add(xml, "reason", source->reason);
+
+ } else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
+ pcmk__xe_set_bool_attr(xml, "blocked", true);
+
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool")
+static int
+rsc_is_colocated_with_list(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ bool recursive = va_arg(args, int);
+
+ int rc = pcmk_rc_no_output;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
+ return rc;
+ }
+
+ /* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons
+ * directly rather than rsc->cmds->this_with_colocations().
+ */
+ pe__set_resource_flags(rsc, pe_rsc_detect_loop);
+ for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) {
+ pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
+ char *hdr = NULL;
+
+ PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources %s is colocated with", rsc->id);
+
+ if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) {
+ out->list_item(out, NULL, "%s (id=%s - loop)",
+ cons->primary->id, cons->id);
+ continue;
+ }
+
+ hdr = colocations_header(cons->primary, cons, false);
+ out->list_item(out, NULL, "%s", hdr);
+ free(hdr);
+
+ /* Empty list header just for indentation of information about this resource. */
+ out->begin_list(out, NULL, NULL, NULL);
+
+ out->message(out, "locations-list", cons->primary);
+ if (recursive) {
+ out->message(out, "rsc-is-colocated-with-list",
+ cons->primary, recursive);
+ }
+
+ out->end_list(out);
+ }
+
+ PCMK__OUTPUT_LIST_FOOTER(out, rc);
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool")
+static int
+rsc_is_colocated_with_list_xml(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ bool recursive = va_arg(args, int);
+
+ int rc = pcmk_rc_no_output;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
+ return rc;
+ }
+
+ /* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons
+ * directly rather than rsc->cmds->this_with_colocations().
+ */
+ pe__set_resource_flags(rsc, pe_rsc_detect_loop);
+ for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) {
+ pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
+
+ if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) {
+ colocations_xml_node(out, cons->primary, cons);
+ continue;
+ }
+
+ colocations_xml_node(out, cons->primary, cons);
+ do_locations_list_xml(out, cons->primary, false);
+
+ if (recursive) {
+ out->message(out, "rsc-is-colocated-with-list",
+ cons->primary, recursive);
+ }
+ }
+
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool")
+static int
+rscs_colocated_with_list(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ bool recursive = va_arg(args, int);
+
+ int rc = pcmk_rc_no_output;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
+ return rc;
+ }
+
+ /* We're listing constraints explicitly involving rsc, so use
+ * rsc->rsc_cons_lhs directly rather than
+ * rsc->cmds->with_this_colocations().
+ */
+ pe__set_resource_flags(rsc, pe_rsc_detect_loop);
+ for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
+ pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
+ char *hdr = NULL;
+
+ PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources colocated with %s", rsc->id);
+
+ if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) {
+ out->list_item(out, NULL, "%s (id=%s - loop)",
+ cons->dependent->id, cons->id);
+ continue;
+ }
+
+ hdr = colocations_header(cons->dependent, cons, true);
+ out->list_item(out, NULL, "%s", hdr);
+ free(hdr);
+
+ /* Empty list header just for indentation of information about this resource. */
+ out->begin_list(out, NULL, NULL, NULL);
+
+ out->message(out, "locations-list", cons->dependent);
+ if (recursive) {
+ out->message(out, "rscs-colocated-with-list",
+ cons->dependent, recursive);
+ }
+
+ out->end_list(out);
+ }
+
+ PCMK__OUTPUT_LIST_FOOTER(out, rc);
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool")
+static int
+rscs_colocated_with_list_xml(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ bool recursive = va_arg(args, int);
+
+ int rc = pcmk_rc_no_output;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
+ return rc;
+ }
+
+ /* We're listing constraints explicitly involving rsc, so use
+ * rsc->rsc_cons_lhs directly rather than
+ * rsc->cmds->with_this_colocations().
+ */
+ pe__set_resource_flags(rsc, pe_rsc_detect_loop);
+ for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
+ pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
+
+ if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) {
+ colocations_xml_node(out, cons->dependent, cons);
+ continue;
+ }
+
+ colocations_xml_node(out, cons->dependent, cons);
+ do_locations_list_xml(out, cons->dependent, false);
+
+ if (recursive) {
+ out->message(out, "rscs-colocated-with-list",
+ cons->dependent, recursive);
+ }
+ }
+
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *")
+static int
+locations_list(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+
+ GList *lpc = NULL;
+ GList *list = rsc->rsc_location;
+ int rc = pcmk_rc_no_output;
+
+ for (lpc = list; lpc != NULL; lpc = lpc->next) {
+ pe__location_t *cons = lpc->data;
+
+ GList *lpc2 = NULL;
+
+ for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) {
+ pe_node_t *node = (pe_node_t *) lpc2->data;
+
+ PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Locations");
+ out->list_item(out, NULL, "Node %s (score=%s, id=%s, rsc=%s)",
+ pe__node_name(node),
+ pcmk_readable_score(node->weight), cons->id,
+ rsc->id);
+ }
+ }
+
+ PCMK__OUTPUT_LIST_FOOTER(out, rc);
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *")
+static int
+locations_list_xml(pcmk__output_t *out, va_list args) {
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ return do_locations_list_xml(out, rsc, true);
+}
+
+PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *",
+ "pe_working_set_t *", "bool", "bool")
+static int
+locations_and_colocations(pcmk__output_t *out, va_list args)
+{
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
+ bool recursive = va_arg(args, int);
+ bool force = va_arg(args, int);
+
+ pcmk__unpack_constraints(data_set);
+
+ // Constraints apply to group/clone, not member/instance
+ if (!force) {
+ rsc = uber_parent(rsc);
+ }
+
+ out->message(out, "locations-list", rsc);
+
+ pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop);
+ out->message(out, "rscs-colocated-with-list", rsc, recursive);
+
+ pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop);
+ out->message(out, "rsc-is-colocated-with-list", rsc, recursive);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *",
+ "pe_working_set_t *", "bool", "bool")
+static int
+locations_and_colocations_xml(pcmk__output_t *out, va_list args)
+{
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
+ bool recursive = va_arg(args, int);
+ bool force = va_arg(args, int);
+
+ pcmk__unpack_constraints(data_set);
+
+ // Constraints apply to group/clone, not member/instance
+ if (!force) {
+ rsc = uber_parent(rsc);
+ }
+
+ pcmk__output_xml_create_parent(out, "constraints", NULL);
+ do_locations_list_xml(out, rsc, false);
+
+ pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop);
+ out->message(out, "rscs-colocated-with-list", rsc, recursive);
+
+ pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop);
+ out->message(out, "rsc-is-colocated-with-list", rsc, recursive);
+
+ pcmk__output_xml_pop_parent(out);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *")
+static int
+health(pcmk__output_t *out, va_list args)
+{
+ const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
+ const char *host_from = va_arg(args, const char *);
+ const char *fsa_state = va_arg(args, const char *);
+ const char *result = va_arg(args, const char *);
+
+ return out->info(out, "Controller on %s in state %s: %s",
+ pcmk__s(host_from, "unknown node"),
+ pcmk__s(fsa_state, "unknown"),
+ pcmk__s(result, "unknown result"));
+}
+
+PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *")
+static int
+health_text(pcmk__output_t *out, va_list args)
+{
+ if (!out->is_quiet(out)) {
+ return health(out, args);
+ } else {
+ const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
+ const char *host_from G_GNUC_UNUSED = va_arg(args, const char *);
+ const char *fsa_state = va_arg(args, const char *);
+ const char *result G_GNUC_UNUSED = va_arg(args, const char *);
+
+ if (fsa_state != NULL) {
+ pcmk__formatted_printf(out, "%s\n", fsa_state);
+ return pcmk_rc_ok;
+ }
+ }
+
+ return pcmk_rc_no_output;
+}
+
+PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *")
+static int
+health_xml(pcmk__output_t *out, va_list args)
+{
+ const char *sys_from = va_arg(args, const char *);
+ const char *host_from = va_arg(args, const char *);
+ const char *fsa_state = va_arg(args, const char *);
+ const char *result = va_arg(args, const char *);
+
+ pcmk__output_create_xml_node(out, pcmk__s(sys_from, ""),
+ "node_name", pcmk__s(host_from, ""),
+ "state", pcmk__s(fsa_state, ""),
+ "result", pcmk__s(result, ""),
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
+ "enum pcmk_pacemakerd_state", "const char *", "time_t")
+static int
+pacemakerd_health(pcmk__output_t *out, va_list args)
+{
+ const char *sys_from = va_arg(args, const char *);
+ enum pcmk_pacemakerd_state state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ const char *state_s = va_arg(args, const char *);
+ time_t last_updated = va_arg(args, time_t);
+
+ char *last_updated_s = NULL;
+ int rc = pcmk_rc_ok;
+
+ if (sys_from == NULL) {
+ if (state == pcmk_pacemakerd_state_remote) {
+ sys_from = "pacemaker-remoted";
+ } else {
+ sys_from = CRM_SYSTEM_MCP;
+ }
+ }
+
+ if (state_s == NULL) {
+ state_s = pcmk__pcmkd_state_enum2friendly(state);
+ }
+
+ if (last_updated != 0) {
+ last_updated_s = pcmk__epoch2str(&last_updated,
+ crm_time_log_date
+ |crm_time_log_timeofday
+ |crm_time_log_with_timezone);
+ }
+
+ rc = out->info(out, "Status of %s: '%s' (last updated %s)",
+ sys_from, state_s,
+ pcmk__s(last_updated_s, "at unknown time"));
+
+ free(last_updated_s);
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
+ "enum pcmk_pacemakerd_state", "const char *", "time_t")
+static int
+pacemakerd_health_html(pcmk__output_t *out, va_list args)
+{
+ const char *sys_from = va_arg(args, const char *);
+ enum pcmk_pacemakerd_state state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ const char *state_s = va_arg(args, const char *);
+ time_t last_updated = va_arg(args, time_t);
+
+ char *last_updated_s = NULL;
+ char *msg = NULL;
+
+ if (sys_from == NULL) {
+ if (state == pcmk_pacemakerd_state_remote) {
+ sys_from = "pacemaker-remoted";
+ } else {
+ sys_from = CRM_SYSTEM_MCP;
+ }
+ }
+
+ if (state_s == NULL) {
+ state_s = pcmk__pcmkd_state_enum2friendly(state);
+ }
+
+ if (last_updated != 0) {
+ last_updated_s = pcmk__epoch2str(&last_updated,
+ crm_time_log_date
+ |crm_time_log_timeofday
+ |crm_time_log_with_timezone);
+ }
+
+ msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)",
+ sys_from, state_s,
+ pcmk__s(last_updated_s, "at unknown time"));
+ pcmk__output_create_html_node(out, "li", NULL, NULL, msg);
+
+ free(msg);
+ free(last_updated_s);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
+ "enum pcmk_pacemakerd_state", "const char *", "time_t")
+static int
+pacemakerd_health_text(pcmk__output_t *out, va_list args)
+{
+ if (!out->is_quiet(out)) {
+ return pacemakerd_health(out, args);
+ } else {
+ const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
+ enum pcmk_pacemakerd_state state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ const char *state_s = va_arg(args, const char *);
+ time_t last_updated G_GNUC_UNUSED = va_arg(args, time_t);
+
+ if (state_s == NULL) {
+ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
+ }
+ pcmk__formatted_printf(out, "%s\n", state_s);
+ return pcmk_rc_ok;
+ }
+}
+
+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
+ "enum pcmk_pacemakerd_state", "const char *", "time_t")
+static int
+pacemakerd_health_xml(pcmk__output_t *out, va_list args)
+{
+ const char *sys_from = va_arg(args, const char *);
+ enum pcmk_pacemakerd_state state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ const char *state_s = va_arg(args, const char *);
+ time_t last_updated = va_arg(args, time_t);
+
+ char *last_updated_s = NULL;
+
+ if (sys_from == NULL) {
+ if (state == pcmk_pacemakerd_state_remote) {
+ sys_from = "pacemaker-remoted";
+ } else {
+ sys_from = CRM_SYSTEM_MCP;
+ }
+ }
+
+ if (state_s == NULL) {
+ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
+ }
+
+ if (last_updated != 0) {
+ last_updated_s = pcmk__epoch2str(&last_updated,
+ crm_time_log_date
+ |crm_time_log_timeofday
+ |crm_time_log_with_timezone);
+ }
+
+ pcmk__output_create_xml_node(out, "pacemakerd",
+ "sys_from", sys_from,
+ "state", state_s,
+ "last_updated", last_updated_s,
+ NULL);
+ free(last_updated_s);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t")
+static int
+profile_default(pcmk__output_t *out, va_list args) {
+ const char *xml_file = va_arg(args, const char *);
+ clock_t start = va_arg(args, clock_t);
+ clock_t end = va_arg(args, clock_t);
+
+ out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file,
+ (end - start) / (float) CLOCKS_PER_SEC);
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t")
+static int
+profile_xml(pcmk__output_t *out, va_list args) {
+ const char *xml_file = va_arg(args, const char *);
+ clock_t start = va_arg(args, clock_t);
+ clock_t end = va_arg(args, clock_t);
+
+ char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC);
+
+ pcmk__output_create_xml_node(out, "timing",
+ "file", xml_file,
+ "duration", duration,
+ NULL);
+
+ free(duration);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("dc", "const char *")
+static int
+dc(pcmk__output_t *out, va_list args)
+{
+ const char *dc = va_arg(args, const char *);
+
+ return out->info(out, "Designated Controller is: %s",
+ pcmk__s(dc, "not yet elected"));
+}
+
+PCMK__OUTPUT_ARGS("dc", "const char *")
+static int
+dc_text(pcmk__output_t *out, va_list args)
+{
+ if (!out->is_quiet(out)) {
+ return dc(out, args);
+ } else {
+ const char *dc = va_arg(args, const char *);
+
+ if (dc != NULL) {
+ pcmk__formatted_printf(out, "%s\n", pcmk__s(dc, ""));
+ return pcmk_rc_ok;
+ }
+ }
+
+ return pcmk_rc_no_output;
+}
+
+PCMK__OUTPUT_ARGS("dc", "const char *")
+static int
+dc_xml(pcmk__output_t *out, va_list args)
+{
+ const char *dc = va_arg(args, const char *);
+
+ pcmk__output_create_xml_node(out, "dc",
+ "node_name", pcmk__s(dc, ""),
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool")
+static int
+crmadmin_node(pcmk__output_t *out, va_list args)
+{
+ const char *type = va_arg(args, const char *);
+ const char *name = va_arg(args, const char *);
+ const char *id = va_arg(args, const char *);
+ bool bash_export = va_arg(args, int);
+
+ if (bash_export) {
+ return out->info(out, "export %s=%s",
+ pcmk__s(name, "<null>"), pcmk__s(id, ""));
+ } else {
+ return out->info(out, "%s node: %s (%s)", type ? type : "cluster",
+ pcmk__s(name, "<null>"), pcmk__s(id, "<null>"));
+ }
+}
+
+PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool")
+static int
+crmadmin_node_text(pcmk__output_t *out, va_list args)
+{
+ if (!out->is_quiet(out)) {
+ return crmadmin_node(out, args);
+ } else {
+ const char *type G_GNUC_UNUSED = va_arg(args, const char *);
+ const char *name = va_arg(args, const char *);
+ const char *id G_GNUC_UNUSED = va_arg(args, const char *);
+ bool bash_export G_GNUC_UNUSED = va_arg(args, int);
+
+ pcmk__formatted_printf(out, "%s\n", pcmk__s(name, "<null>"));
+ return pcmk_rc_ok;
+ }
+}
+
+PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool")
+static int
+crmadmin_node_xml(pcmk__output_t *out, va_list args)
+{
+ const char *type = va_arg(args, const char *);
+ const char *name = va_arg(args, const char *);
+ const char *id = va_arg(args, const char *);
+ bool bash_export G_GNUC_UNUSED = va_arg(args, int);
+
+ pcmk__output_create_xml_node(out, "node",
+ "type", type ? type : "cluster",
+ "name", pcmk__s(name, ""),
+ "id", pcmk__s(id, ""),
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *",
+ "const char *", "guint", "const op_digest_cache_t *")
+static int
+digests_text(pcmk__output_t *out, va_list args)
+{
+ const pe_resource_t *rsc = va_arg(args, const pe_resource_t *);
+ const pe_node_t *node = va_arg(args, const pe_node_t *);
+ const char *task = va_arg(args, const char *);
+ guint interval_ms = va_arg(args, guint);
+ const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *);
+
+ char *action_desc = NULL;
+ const char *rsc_desc = "unknown resource";
+ const char *node_desc = "unknown node";
+
+ if (interval_ms != 0) {
+ action_desc = crm_strdup_printf("%ums-interval %s action", interval_ms,
+ ((task == NULL)? "unknown" : task));
+ } else if (pcmk__str_eq(task, "monitor", pcmk__str_none)) {
+ action_desc = strdup("probe action");
+ } else {
+ action_desc = crm_strdup_printf("%s action",
+ ((task == NULL)? "unknown" : task));
+ }
+ if ((rsc != NULL) && (rsc->id != NULL)) {
+ rsc_desc = rsc->id;
+ }
+ if ((node != NULL) && (node->details->uname != NULL)) {
+ node_desc = node->details->uname;
+ }
+ out->begin_list(out, NULL, NULL, "Digests for %s %s on %s",
+ rsc_desc, action_desc, node_desc);
+ free(action_desc);
+
+ if (digests == NULL) {
+ out->list_item(out, NULL, "none");
+ out->end_list(out);
+ return pcmk_rc_ok;
+ }
+ if (digests->digest_all_calc != NULL) {
+ out->list_item(out, NULL, "%s (all parameters)",
+ digests->digest_all_calc);
+ }
+ if (digests->digest_secure_calc != NULL) {
+ out->list_item(out, NULL, "%s (non-private parameters)",
+ digests->digest_secure_calc);
+ }
+ if (digests->digest_restart_calc != NULL) {
+ out->list_item(out, NULL, "%s (non-reloadable parameters)",
+ digests->digest_restart_calc);
+ }
+ out->end_list(out);
+ return pcmk_rc_ok;
+}
+
+static void
+add_digest_xml(xmlNode *parent, const char *type, const char *digest,
+ xmlNode *digest_source)
+{
+ if (digest != NULL) {
+ xmlNodePtr digest_xml = create_xml_node(parent, "digest");
+
+ crm_xml_add(digest_xml, "type", ((type == NULL)? "unspecified" : type));
+ crm_xml_add(digest_xml, "hash", digest);
+ if (digest_source != NULL) {
+ add_node_copy(digest_xml, digest_source);
+ }
+ }
+}
+
+PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *",
+ "const char *", "guint", "const op_digest_cache_t *")
+static int
+digests_xml(pcmk__output_t *out, va_list args)
+{
+ const pe_resource_t *rsc = va_arg(args, const pe_resource_t *);
+ const pe_node_t *node = va_arg(args, const pe_node_t *);
+ const char *task = va_arg(args, const char *);
+ guint interval_ms = va_arg(args, guint);
+ const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *);
+
+ char *interval_s = crm_strdup_printf("%ums", interval_ms);
+ xmlNode *xml = NULL;
+
+ xml = pcmk__output_create_xml_node(out, "digests",
+ "resource", pcmk__s(rsc->id, ""),
+ "node", pcmk__s(node->details->uname, ""),
+ "task", pcmk__s(task, ""),
+ "interval", interval_s,
+ NULL);
+ free(interval_s);
+ if (digests != NULL) {
+ add_digest_xml(xml, "all", digests->digest_all_calc,
+ digests->params_all);
+ add_digest_xml(xml, "nonprivate", digests->digest_secure_calc,
+ digests->params_secure);
+ add_digest_xml(xml, "nonreloadable", digests->digest_restart_calc,
+ digests->params_restart);
+ }
+ return pcmk_rc_ok;
+}
+
+#define STOP_SANITY_ASSERT(lineno) do { \
+ if(current && current->details->unclean) { \
+ /* It will be a pseudo op */ \
+ } else if(stop == NULL) { \
+ crm_err("%s:%d: No stop action exists for %s", \
+ __func__, lineno, rsc->id); \
+ CRM_ASSERT(stop != NULL); \
+ } else if (pcmk_is_set(stop->flags, pe_action_optional)) { \
+ crm_err("%s:%d: Action %s is still optional", \
+ __func__, lineno, stop->uuid); \
+ CRM_ASSERT(!pcmk_is_set(stop->flags, pe_action_optional)); \
+ } \
+ } while(0)
+
+PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *")
+static int
+rsc_action_default(pcmk__output_t *out, va_list args)
+{
+ pe_resource_t *rsc = va_arg(args, pe_resource_t *);
+ pe_node_t *current = va_arg(args, pe_node_t *);
+ pe_node_t *next = va_arg(args, pe_node_t *);
+
+ GList *possible_matches = NULL;
+ char *key = NULL;
+ int rc = pcmk_rc_no_output;
+ bool moving = false;
+
+ pe_node_t *start_node = NULL;
+ pe_action_t *start = NULL;
+ pe_action_t *stop = NULL;
+ pe_action_t *promote = NULL;
+ pe_action_t *demote = NULL;
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)
+ || (current == NULL && next == NULL)) {
+ pe_rsc_info(rsc, "Leave %s\t(%s%s)",
+ rsc->id, role2text(rsc->role),
+ !pcmk_is_set(rsc->flags, pe_rsc_managed)? " unmanaged" : "");
+ return rc;
+ }
+
+ moving = (current != NULL) && (next != NULL)
+ && (current->details != next->details);
+
+ possible_matches = pe__resource_actions(rsc, next, RSC_START, false);
+ if (possible_matches) {
+ start = possible_matches->data;
+ g_list_free(possible_matches);
+ }
+
+ if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) {
+ start_node = NULL;
+ } else {
+ start_node = current;
+ }
+ possible_matches = pe__resource_actions(rsc, start_node, RSC_STOP, false);
+ if (possible_matches) {
+ stop = possible_matches->data;
+ g_list_free(possible_matches);
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_stop_unexpected)) {
+ /* The resource is multiply active with multiple-active set to
+ * stop_unexpected, and not stopping on its current node, but it should
+ * be stopping elsewhere.
+ */
+ possible_matches = pe__resource_actions(rsc, NULL, RSC_STOP, false);
+ if (possible_matches != NULL) {
+ stop = possible_matches->data;
+ g_list_free(possible_matches);
+ }
+ }
+
+ possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, false);
+ if (possible_matches) {
+ promote = possible_matches->data;
+ g_list_free(possible_matches);
+ }
+
+ possible_matches = pe__resource_actions(rsc, next, RSC_DEMOTE, false);
+ if (possible_matches) {
+ demote = possible_matches->data;
+ g_list_free(possible_matches);
+ }
+
+ if (rsc->role == rsc->next_role) {
+ pe_action_t *migrate_op = NULL;
+
+ CRM_CHECK(next != NULL, return rc);
+
+ possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, false);
+ if (possible_matches) {
+ migrate_op = possible_matches->data;
+ }
+
+ if ((migrate_op != NULL) && (current != NULL)
+ && pcmk_is_set(migrate_op->flags, pe_action_runnable)) {
+ rc = out->message(out, "rsc-action-item", "Migrate", rsc, current,
+ next, start, NULL);
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) {
+ rc = out->message(out, "rsc-action-item", "Reload", rsc, current,
+ next, start, NULL);
+
+ } else if (start == NULL || pcmk_is_set(start->flags, pe_action_optional)) {
+ if ((demote != NULL) && (promote != NULL)
+ && !pcmk_is_set(demote->flags, pe_action_optional)
+ && !pcmk_is_set(promote->flags, pe_action_optional)) {
+ rc = out->message(out, "rsc-action-item", "Re-promote", rsc,
+ current, next, promote, demote);
+ } else {
+ pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id,
+ role2text(rsc->role), pe__node_name(next));
+ }
+
+ } else if (!pcmk_is_set(start->flags, pe_action_runnable)) {
+ rc = out->message(out, "rsc-action-item", "Stop", rsc, current,
+ NULL, stop, (stop && stop->reason)? stop : start);
+ STOP_SANITY_ASSERT(__LINE__);
+
+ } else if (moving && current) {
+ rc = out->message(out, "rsc-action-item", pcmk_is_set(rsc->flags, pe_rsc_failed)? "Recover" : "Move",
+ rsc, current, next, stop, NULL);
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ rc = out->message(out, "rsc-action-item", "Recover", rsc, current,
+ NULL, stop, NULL);
+ STOP_SANITY_ASSERT(__LINE__);
+
+ } else {
+ rc = out->message(out, "rsc-action-item", "Restart", rsc, current,
+ next, start, NULL);
+ /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */
+ }
+
+ g_list_free(possible_matches);
+ return rc;
+ }
+
+ if(stop
+ && (rsc->next_role == RSC_ROLE_STOPPED
+ || (start && !pcmk_is_set(start->flags, pe_action_runnable)))) {
+
+ GList *gIter = NULL;
+
+ key = stop_key(rsc);
+ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
+ pe_node_t *node = (pe_node_t *) gIter->data;
+ pe_action_t *stop_op = NULL;
+
+ possible_matches = find_actions(rsc->actions, key, node);
+ if (possible_matches) {
+ stop_op = possible_matches->data;
+ g_list_free(possible_matches);
+ }
+
+ if (stop_op && (stop_op->flags & pe_action_runnable)) {
+ STOP_SANITY_ASSERT(__LINE__);
+ }
+
+ if (out->message(out, "rsc-action-item", "Stop", rsc, node, NULL,
+ stop_op, (stop_op && stop_op->reason)? stop_op : start) == pcmk_rc_ok) {
+ rc = pcmk_rc_ok;
+ }
+ }
+
+ free(key);
+
+ } else if ((stop != NULL)
+ && pcmk_all_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_stop)) {
+ /* 'stop' may be NULL if the failure was ignored */
+ rc = out->message(out, "rsc-action-item", "Recover", rsc, current,
+ next, stop, start);
+ STOP_SANITY_ASSERT(__LINE__);
+
+ } else if (moving) {
+ rc = out->message(out, "rsc-action-item", "Move", rsc, current, next,
+ stop, NULL);
+ STOP_SANITY_ASSERT(__LINE__);
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) {
+ rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next,
+ start, NULL);
+
+ } else if (stop != NULL && !pcmk_is_set(stop->flags, pe_action_optional)) {
+ rc = out->message(out, "rsc-action-item", "Restart", rsc, current,
+ next, start, NULL);
+ STOP_SANITY_ASSERT(__LINE__);
+
+ } else if (rsc->role == RSC_ROLE_PROMOTED) {
+ CRM_LOG_ASSERT(current != NULL);
+ rc = out->message(out, "rsc-action-item", "Demote", rsc, current,
+ next, demote, NULL);
+
+ } else if (rsc->next_role == RSC_ROLE_PROMOTED) {
+ CRM_LOG_ASSERT(next);
+ rc = out->message(out, "rsc-action-item", "Promote", rsc, current,
+ next, promote, NULL);
+
+ } else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) {
+ rc = out->message(out, "rsc-action-item", "Start", rsc, current, next,
+ start, NULL);
+ }
+
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *")
+static int
+node_action(pcmk__output_t *out, va_list args)
+{
+ const char *task = va_arg(args, const char *);
+ const char *node_name = va_arg(args, const char *);
+ const char *reason = va_arg(args, const char *);
+
+ if (task == NULL) {
+ return pcmk_rc_no_output;
+ } else if (reason) {
+ out->list_item(out, NULL, "%s %s '%s'", task, node_name, reason);
+ } else {
+ crm_notice(" * %s %s", task, node_name);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *")
+static int
+node_action_xml(pcmk__output_t *out, va_list args)
+{
+ const char *task = va_arg(args, const char *);
+ const char *node_name = va_arg(args, const char *);
+ const char *reason = va_arg(args, const char *);
+
+ if (task == NULL) {
+ return pcmk_rc_no_output;
+ } else if (reason) {
+ pcmk__output_create_xml_node(out, "node_action",
+ "task", task,
+ "node", node_name,
+ "reason", reason,
+ NULL);
+ } else {
+ crm_notice(" * %s %s", task, node_name);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *",
+ "const char *", "bool", "bool")
+static int
+node_info_default(pcmk__output_t *out, va_list args)
+{
+ int node_id = va_arg(args, int);
+ const char *node_name = va_arg(args, const char *);
+ const char *uuid = va_arg(args, const char *);
+ const char *state = va_arg(args, const char *);
+ bool have_quorum = (bool) va_arg(args, int);
+ bool is_remote = (bool) va_arg(args, int);
+
+ return out->info(out,
+ "Node %d: %s "
+ "(uuid=%s, state=%s, have_quorum=%s, is_remote=%s)",
+ node_id, pcmk__s(node_name, "unknown"),
+ pcmk__s(uuid, "unknown"), pcmk__s(state, "unknown"),
+ pcmk__btoa(have_quorum), pcmk__btoa(is_remote));
+}
+
+PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *",
+ "const char *", "bool", "bool")
+static int
+node_info_xml(pcmk__output_t *out, va_list args)
+{
+ int node_id = va_arg(args, int);
+ const char *node_name = va_arg(args, const char *);
+ const char *uuid = va_arg(args, const char *);
+ const char *state = va_arg(args, const char *);
+ bool have_quorum = (bool) va_arg(args, int);
+ bool is_remote = (bool) va_arg(args, int);
+
+ char *id_s = crm_strdup_printf("%d", node_id);
+
+ pcmk__output_create_xml_node(out, "node-info",
+ "nodeid", id_s,
+ XML_ATTR_UNAME, node_name,
+ XML_ATTR_ID, uuid,
+ XML_NODE_IS_PEER, state,
+ XML_ATTR_HAVE_QUORUM, pcmk__btoa(have_quorum),
+ XML_NODE_IS_REMOTE, pcmk__btoa(is_remote),
+ NULL);
+ free(id_s);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr")
+static int
+inject_cluster_action(pcmk__output_t *out, va_list args)
+{
+ const char *node = va_arg(args, const char *);
+ const char *task = va_arg(args, const char *);
+ xmlNodePtr rsc = va_arg(args, xmlNodePtr);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ if(rsc) {
+ out->list_item(out, NULL, "Cluster action: %s for %s on %s", task, ID(rsc), node);
+ } else {
+ out->list_item(out, NULL, "Cluster action: %s on %s", task, node);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr")
+static int
+inject_cluster_action_xml(pcmk__output_t *out, va_list args)
+{
+ const char *node = va_arg(args, const char *);
+ const char *task = va_arg(args, const char *);
+ xmlNodePtr rsc = va_arg(args, xmlNodePtr);
+
+ xmlNodePtr xml_node = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ xml_node = pcmk__output_create_xml_node(out, "cluster_action",
+ "task", task,
+ "node", node,
+ NULL);
+
+ if (rsc) {
+ crm_xml_add(xml_node, "id", ID(rsc));
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *")
+static int
+inject_fencing_action(pcmk__output_t *out, va_list args)
+{
+ const char *target = va_arg(args, const char *);
+ const char *op = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ out->list_item(out, NULL, "Fencing %s (%s)", target, op);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *")
+static int
+inject_fencing_action_xml(pcmk__output_t *out, va_list args)
+{
+ const char *target = va_arg(args, const char *);
+ const char *op = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ pcmk__output_create_xml_node(out, "fencing_action",
+ "target", target,
+ "op", op,
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr")
+static int
+inject_attr(pcmk__output_t *out, va_list args)
+{
+ const char *name = va_arg(args, const char *);
+ const char *value = va_arg(args, const char *);
+ xmlNodePtr cib_node = va_arg(args, xmlNodePtr);
+
+ xmlChar *node_path = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ node_path = xmlGetNodePath(cib_node);
+
+ out->list_item(out, NULL, "Injecting attribute %s=%s into %s '%s'",
+ name, value, node_path, ID(cib_node));
+
+ free(node_path);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr")
+static int
+inject_attr_xml(pcmk__output_t *out, va_list args)
+{
+ const char *name = va_arg(args, const char *);
+ const char *value = va_arg(args, const char *);
+ xmlNodePtr cib_node = va_arg(args, xmlNodePtr);
+
+ xmlChar *node_path = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ node_path = xmlGetNodePath(cib_node);
+
+ pcmk__output_create_xml_node(out, "inject_attr",
+ "name", name,
+ "value", value,
+ "node_path", node_path,
+ "cib_node", ID(cib_node),
+ NULL);
+ free(node_path);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-spec", "const char *")
+static int
+inject_spec(pcmk__output_t *out, va_list args)
+{
+ const char *spec = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ out->list_item(out, NULL, "Injecting %s into the configuration", spec);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-spec", "const char *")
+static int
+inject_spec_xml(pcmk__output_t *out, va_list args)
+{
+ const char *spec = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ pcmk__output_create_xml_node(out, "inject_spec",
+ "spec", spec,
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *")
+static int
+inject_modify_config(pcmk__output_t *out, va_list args)
+{
+ const char *quorum = va_arg(args, const char *);
+ const char *watchdog = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ out->begin_list(out, NULL, NULL, "Performing Requested Modifications");
+
+ if (quorum) {
+ out->list_item(out, NULL, "Setting quorum: %s", quorum);
+ }
+
+ if (watchdog) {
+ out->list_item(out, NULL, "Setting watchdog: %s", watchdog);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *")
+static int
+inject_modify_config_xml(pcmk__output_t *out, va_list args)
+{
+ const char *quorum = va_arg(args, const char *);
+ const char *watchdog = va_arg(args, const char *);
+
+ xmlNodePtr node = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ node = pcmk__output_xml_create_parent(out, "modifications", NULL);
+
+ if (quorum) {
+ crm_xml_add(node, "quorum", quorum);
+ }
+
+ if (watchdog) {
+ crm_xml_add(node, "watchdog", watchdog);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *")
+static int
+inject_modify_node(pcmk__output_t *out, va_list args)
+{
+ const char *action = va_arg(args, const char *);
+ const char *node = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ if (pcmk__str_eq(action, "Online", pcmk__str_none)) {
+ out->list_item(out, NULL, "Bringing node %s online", node);
+ return pcmk_rc_ok;
+ } else if (pcmk__str_eq(action, "Offline", pcmk__str_none)) {
+ out->list_item(out, NULL, "Taking node %s offline", node);
+ return pcmk_rc_ok;
+ } else if (pcmk__str_eq(action, "Failing", pcmk__str_none)) {
+ out->list_item(out, NULL, "Failing node %s", node);
+ return pcmk_rc_ok;
+ }
+
+ return pcmk_rc_no_output;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *")
+static int
+inject_modify_node_xml(pcmk__output_t *out, va_list args)
+{
+ const char *action = va_arg(args, const char *);
+ const char *node = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ pcmk__output_create_xml_node(out, "modify_node",
+ "action", action,
+ "node", node,
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *")
+static int
+inject_modify_ticket(pcmk__output_t *out, va_list args)
+{
+ const char *action = va_arg(args, const char *);
+ const char *ticket = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ if (pcmk__str_eq(action, "Standby", pcmk__str_none)) {
+ out->list_item(out, NULL, "Making ticket %s standby", ticket);
+ } else {
+ out->list_item(out, NULL, "%s ticket %s", action, ticket);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *")
+static int
+inject_modify_ticket_xml(pcmk__output_t *out, va_list args)
+{
+ const char *action = va_arg(args, const char *);
+ const char *ticket = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ pcmk__output_create_xml_node(out, "modify_ticket",
+ "action", action,
+ "ticket", ticket,
+ NULL);
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *")
+static int
+inject_pseudo_action(pcmk__output_t *out, va_list args)
+{
+ const char *node = va_arg(args, const char *);
+ const char *task = va_arg(args, const char *);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ out->list_item(out, NULL, "Pseudo action: %s%s%s", task, node ? " on " : "",
+ node ? node : "");
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *")
+static int
+inject_pseudo_action_xml(pcmk__output_t *out, va_list args)
+{
+ const char *node = va_arg(args, const char *);
+ const char *task = va_arg(args, const char *);
+
+ xmlNodePtr xml_node = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ xml_node = pcmk__output_create_xml_node(out, "pseudo_action",
+ "task", task,
+ NULL);
+ if (node) {
+ crm_xml_add(xml_node, "node", node);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *",
+ "const char *", "guint")
+static int
+inject_rsc_action(pcmk__output_t *out, va_list args)
+{
+ const char *rsc = va_arg(args, const char *);
+ const char *operation = va_arg(args, const char *);
+ const char *node = va_arg(args, const char *);
+ guint interval_ms = va_arg(args, guint);
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ if (interval_ms) {
+ out->list_item(out, NULL, "Resource action: %-15s %s=%u on %s",
+ rsc, operation, interval_ms, node);
+ } else {
+ out->list_item(out, NULL, "Resource action: %-15s %s on %s",
+ rsc, operation, node);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *",
+ "const char *", "guint")
+static int
+inject_rsc_action_xml(pcmk__output_t *out, va_list args)
+{
+ const char *rsc = va_arg(args, const char *);
+ const char *operation = va_arg(args, const char *);
+ const char *node = va_arg(args, const char *);
+ guint interval_ms = va_arg(args, guint);
+
+ xmlNodePtr xml_node = NULL;
+
+ if (out->is_quiet(out)) {
+ return pcmk_rc_no_output;
+ }
+
+ xml_node = pcmk__output_create_xml_node(out, "rsc_action",
+ "resource", rsc,
+ "op", operation,
+ "node", node,
+ NULL);
+
+ if (interval_ms) {
+ char *interval_s = pcmk__itoa(interval_ms);
+
+ crm_xml_add(xml_node, "interval", interval_s);
+ free(interval_s);
+ }
+
+ return pcmk_rc_ok;
+}
+
+#define CHECK_RC(retcode, retval) \
+ if (retval == pcmk_rc_ok) { \
+ retcode = pcmk_rc_ok; \
+ }
+
+PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
+ "enum pcmk_pacemakerd_state", "crm_exit_t",
+ "stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
+ "uint32_t", "const char *", "GList *", "GList *")
+int
+pcmk__cluster_status_text(pcmk__output_t *out, va_list args)
+{
+ pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
+ enum pcmk_pacemakerd_state pcmkd_state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ crm_exit_t history_rc = va_arg(args, crm_exit_t);
+ stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
+ enum pcmk__fence_history fence_history = va_arg(args, int);
+ uint32_t section_opts = va_arg(args, uint32_t);
+ uint32_t show_opts = va_arg(args, uint32_t);
+ const char *prefix = va_arg(args, const char *);
+ GList *unames = va_arg(args, GList *);
+ GList *resources = va_arg(args, GList *);
+
+ int rc = pcmk_rc_no_output;
+ bool already_printed_failure = false;
+
+ CHECK_RC(rc, out->message(out, "cluster-summary", data_set, pcmkd_state,
+ section_opts, show_opts));
+
+ if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) {
+ CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames,
+ resources, show_opts, rc == pcmk_rc_ok));
+ }
+
+ /* Print resources section, if needed */
+ if (pcmk_is_set(section_opts, pcmk_section_resources)) {
+ CHECK_RC(rc, out->message(out, "resource-list", data_set, show_opts,
+ true, unames, resources, rc == pcmk_rc_ok));
+ }
+
+ /* print Node Attributes section if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
+ CHECK_RC(rc, out->message(out, "node-attribute-list", data_set,
+ show_opts, rc == pcmk_rc_ok, unames, resources));
+ }
+
+ /* If requested, print resource operations (which includes failcounts)
+ * or just failcounts
+ */
+ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) {
+ CHECK_RC(rc, out->message(out, "node-summary", data_set, unames,
+ resources, section_opts, show_opts, rc == pcmk_rc_ok));
+ }
+
+ /* If there were any failed actions, print them */
+ if (pcmk_is_set(section_opts, pcmk_section_failures)
+ && xml_has_children(data_set->failed)) {
+
+ CHECK_RC(rc, out->message(out, "failed-action-list", data_set, unames,
+ resources, show_opts, rc == pcmk_rc_ok));
+ }
+
+ /* Print failed stonith actions */
+ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) &&
+ fence_history != pcmk__fence_history_none) {
+ if (history_rc == 0) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
+ GINT_TO_POINTER(st_failed));
+
+ if (hp) {
+ CHECK_RC(rc, out->message(out, "failed-fencing-list",
+ stonith_history, unames, section_opts,
+ show_opts, rc == pcmk_rc_ok));
+ }
+ } else {
+ PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
+ out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
+ out->list_item(out, NULL, "Failed to get fencing history: %s",
+ crm_exit_str(history_rc));
+ out->end_list(out);
+
+ already_printed_failure = true;
+ }
+ }
+
+ /* Print tickets if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
+ CHECK_RC(rc, out->message(out, "ticket-list", data_set, rc == pcmk_rc_ok));
+ }
+
+ /* Print negative location constraints if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_bans)) {
+ CHECK_RC(rc, out->message(out, "ban-list", data_set, prefix, resources,
+ show_opts, rc == pcmk_rc_ok));
+ }
+
+ /* Print stonith history */
+ if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) &&
+ fence_history != pcmk__fence_history_none) {
+ if (history_rc != 0) {
+ if (!already_printed_failure) {
+ PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
+ out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
+ out->list_item(out, NULL, "Failed to get fencing history: %s",
+ crm_exit_str(history_rc));
+ out->end_list(out);
+ }
+ } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
+ GINT_TO_POINTER(st_failed));
+
+ if (hp) {
+ CHECK_RC(rc, out->message(out, "fencing-list", hp, unames,
+ section_opts, show_opts,
+ rc == pcmk_rc_ok));
+ }
+ } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL);
+
+ if (hp) {
+ CHECK_RC(rc, out->message(out, "pending-fencing-list", hp,
+ unames, section_opts, show_opts,
+ rc == pcmk_rc_ok));
+ }
+ }
+ }
+
+ return rc;
+}
+
+PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
+ "enum pcmk_pacemakerd_state", "crm_exit_t",
+ "stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
+ "uint32_t", "const char *", "GList *", "GList *")
+static int
+cluster_status_xml(pcmk__output_t *out, va_list args)
+{
+ pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
+ enum pcmk_pacemakerd_state pcmkd_state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ crm_exit_t history_rc = va_arg(args, crm_exit_t);
+ stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
+ enum pcmk__fence_history fence_history = va_arg(args, int);
+ uint32_t section_opts = va_arg(args, uint32_t);
+ uint32_t show_opts = va_arg(args, uint32_t);
+ const char *prefix = va_arg(args, const char *);
+ GList *unames = va_arg(args, GList *);
+ GList *resources = va_arg(args, GList *);
+
+ out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts,
+ show_opts);
+
+ /*** NODES ***/
+ if (pcmk_is_set(section_opts, pcmk_section_nodes)) {
+ out->message(out, "node-list", data_set->nodes, unames, resources,
+ show_opts, false);
+ }
+
+ /* Print resources section, if needed */
+ if (pcmk_is_set(section_opts, pcmk_section_resources)) {
+ /* XML output always displays full details. */
+ uint32_t full_show_opts = show_opts & ~pcmk_show_brief;
+
+ out->message(out, "resource-list", data_set, full_show_opts,
+ false, unames, resources, false);
+ }
+
+ /* print Node Attributes section if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
+ out->message(out, "node-attribute-list", data_set, show_opts, false,
+ unames, resources);
+ }
+
+ /* If requested, print resource operations (which includes failcounts)
+ * or just failcounts
+ */
+ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) {
+ out->message(out, "node-summary", data_set, unames,
+ resources, section_opts, show_opts, false);
+ }
+
+ /* If there were any failed actions, print them */
+ if (pcmk_is_set(section_opts, pcmk_section_failures)
+ && xml_has_children(data_set->failed)) {
+
+ out->message(out, "failed-action-list", data_set, unames, resources,
+ show_opts, false);
+ }
+
+ /* Print stonith history */
+ if (pcmk_is_set(section_opts, pcmk_section_fencing_all) &&
+ fence_history != pcmk__fence_history_none) {
+ out->message(out, "full-fencing-list", history_rc, stonith_history,
+ unames, section_opts, show_opts, false);
+ }
+
+ /* Print tickets if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
+ out->message(out, "ticket-list", data_set, false);
+ }
+
+ /* Print negative location constraints if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_bans)) {
+ out->message(out, "ban-list", data_set, prefix, resources, show_opts,
+ false);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
+ "enum pcmk_pacemakerd_state", "crm_exit_t",
+ "stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
+ "uint32_t", "const char *", "GList *", "GList *")
+static int
+cluster_status_html(pcmk__output_t *out, va_list args)
+{
+ pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
+ enum pcmk_pacemakerd_state pcmkd_state =
+ (enum pcmk_pacemakerd_state) va_arg(args, int);
+ crm_exit_t history_rc = va_arg(args, crm_exit_t);
+ stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
+ enum pcmk__fence_history fence_history = va_arg(args, int);
+ uint32_t section_opts = va_arg(args, uint32_t);
+ uint32_t show_opts = va_arg(args, uint32_t);
+ const char *prefix = va_arg(args, const char *);
+ GList *unames = va_arg(args, GList *);
+ GList *resources = va_arg(args, GList *);
+ bool already_printed_failure = false;
+
+ out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts,
+ show_opts);
+
+ /*** NODE LIST ***/
+ if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) {
+ out->message(out, "node-list", data_set->nodes, unames, resources,
+ show_opts, false);
+ }
+
+ /* Print resources section, if needed */
+ if (pcmk_is_set(section_opts, pcmk_section_resources)) {
+ out->message(out, "resource-list", data_set, show_opts, true, unames,
+ resources, false);
+ }
+
+ /* print Node Attributes section if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
+ out->message(out, "node-attribute-list", data_set, show_opts, false,
+ unames, resources);
+ }
+
+ /* If requested, print resource operations (which includes failcounts)
+ * or just failcounts
+ */
+ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) {
+ out->message(out, "node-summary", data_set, unames,
+ resources, section_opts, show_opts, false);
+ }
+
+ /* If there were any failed actions, print them */
+ if (pcmk_is_set(section_opts, pcmk_section_failures)
+ && xml_has_children(data_set->failed)) {
+
+ out->message(out, "failed-action-list", data_set, unames, resources,
+ show_opts, false);
+ }
+
+ /* Print failed stonith actions */
+ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) &&
+ fence_history != pcmk__fence_history_none) {
+ if (history_rc == 0) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
+ GINT_TO_POINTER(st_failed));
+
+ if (hp) {
+ out->message(out, "failed-fencing-list", stonith_history, unames,
+ section_opts, show_opts, false);
+ }
+ } else {
+ out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
+ out->list_item(out, NULL, "Failed to get fencing history: %s",
+ crm_exit_str(history_rc));
+ out->end_list(out);
+ }
+ }
+
+ /* Print stonith history */
+ if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) &&
+ fence_history != pcmk__fence_history_none) {
+ if (history_rc != 0) {
+ if (!already_printed_failure) {
+ out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
+ out->list_item(out, NULL, "Failed to get fencing history: %s",
+ crm_exit_str(history_rc));
+ out->end_list(out);
+ }
+ } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
+ GINT_TO_POINTER(st_failed));
+
+ if (hp) {
+ out->message(out, "fencing-list", hp, unames, section_opts,
+ show_opts, false);
+ }
+ } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) {
+ stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL);
+
+ if (hp) {
+ out->message(out, "pending-fencing-list", hp, unames,
+ section_opts, show_opts, false);
+ }
+ }
+ }
+
+ /* Print tickets if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
+ out->message(out, "ticket-list", data_set, false);
+ }
+
+ /* Print negative location constraints if requested */
+ if (pcmk_is_set(section_opts, pcmk_section_bans)) {
+ out->message(out, "ban-list", data_set, prefix, resources, show_opts,
+ false);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *",
+ "const char *", "const char *")
+static int
+attribute_default(pcmk__output_t *out, va_list args)
+{
+ const char *scope = va_arg(args, const char *);
+ const char *instance = va_arg(args, const char *);
+ const char *name = va_arg(args, const char *);
+ const char *value = va_arg(args, const char *);
+ const char *host = va_arg(args, const char *);
+
+ GString *s = g_string_sized_new(50);
+
+ if (!pcmk__str_empty(scope)) {
+ pcmk__g_strcat(s, "scope=\"", scope, "\" ", NULL);
+ }
+
+ if (!pcmk__str_empty(instance)) {
+ pcmk__g_strcat(s, "id=\"", instance, "\" ", NULL);
+ }
+
+ pcmk__g_strcat(s, "name=\"", pcmk__s(name, ""), "\" ", NULL);
+
+ if (!pcmk__str_empty(host)) {
+ pcmk__g_strcat(s, "host=\"", host, "\" ", NULL);
+ }
+
+ pcmk__g_strcat(s, "value=\"", pcmk__s(value, ""), "\"", NULL);
+
+ out->info(out, "%s", s->str);
+ g_string_free(s, TRUE);
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *",
+ "const char *", "const char *")
+static int
+attribute_xml(pcmk__output_t *out, va_list args)
+{
+ const char *scope = va_arg(args, const char *);
+ const char *instance = va_arg(args, const char *);
+ const char *name = va_arg(args, const char *);
+ const char *value = va_arg(args, const char *);
+ const char *host = va_arg(args, const char *);
+
+ xmlNodePtr node = NULL;
+
+ node = pcmk__output_create_xml_node(out, "attribute",
+ "name", name,
+ "value", value ? value : "",
+ NULL);
+
+ if (!pcmk__str_empty(scope)) {
+ crm_xml_add(node, "scope", scope);
+ }
+
+ if (!pcmk__str_empty(instance)) {
+ crm_xml_add(node, "id", instance);
+ }
+
+ if (!pcmk__str_empty(host)) {
+ crm_xml_add(node, "host", host);
+ }
+
+ return pcmk_rc_ok;
+}
+
+PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *")
+static int
+rule_check_default(pcmk__output_t *out, va_list args)
+{
+ const char *rule_id = va_arg(args, const char *);
+ int result = va_arg(args, int);
+ const char *error = va_arg(args, const char *);
+
+ switch (result) {
+ case pcmk_rc_within_range:
+ return out->info(out, "Rule %s is still in effect", rule_id);
+ case pcmk_rc_ok:
+ return out->info(out, "Rule %s satisfies conditions", rule_id);
+ case pcmk_rc_after_range:
+ return out->info(out, "Rule %s is expired", rule_id);
+ case pcmk_rc_before_range:
+ return out->info(out, "Rule %s has not yet taken effect", rule_id);
+ case pcmk_rc_op_unsatisfied:
+ return out->info(out, "Rule %s does not satisfy conditions",
+ rule_id);
+ default:
+ out->err(out,
+ "Could not determine whether rule %s is in effect: %s",
+ rule_id, ((error != NULL)? error : "unexpected error"));
+ return pcmk_rc_ok;
+ }
+}
+
+PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *")
+static int
+rule_check_xml(pcmk__output_t *out, va_list args)
+{
+ const char *rule_id = va_arg(args, const char *);
+ int result = va_arg(args, int);
+ const char *error = va_arg(args, const char *);
+
+ char *rc_str = pcmk__itoa(pcmk_rc2exitc(result));
+
+ pcmk__output_create_xml_node(out, "rule-check",
+ "rule-id", rule_id,
+ "rc", rc_str,
+ NULL);
+ free(rc_str);
+
+ switch (result) {
+ case pcmk_rc_within_range:
+ case pcmk_rc_ok:
+ case pcmk_rc_after_range:
+ case pcmk_rc_before_range:
+ case pcmk_rc_op_unsatisfied:
+ return pcmk_rc_ok;
+ default:
+ out->err(out,
+ "Could not determine whether rule %s is in effect: %s",
+ rule_id, ((error != NULL)? error : "unexpected error"));
+ return pcmk_rc_ok;
+ }
+}
+
+PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
+static int
+result_code_none(pcmk__output_t *out, va_list args)
+{
+ return pcmk_rc_no_output;
+}
+
+PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
+static int
+result_code_text(pcmk__output_t *out, va_list args)
+{
+ int code = va_arg(args, int);
+ const char *name = va_arg(args, const char *);
+ const char *desc = va_arg(args, const char *);
+
+ static int code_width = 0;
+
+ if (out->is_quiet(out)) {
+ /* If out->is_quiet(), don't print the code. Print name and/or desc in a
+ * compact format for text output, or print nothing at all for none-type
+ * output.
+ */
+ if ((name != NULL) && (desc != NULL)) {
+ pcmk__formatted_printf(out, "%s - %s\n", name, desc);
+
+ } else if ((name != NULL) || (desc != NULL)) {
+ pcmk__formatted_printf(out, "%s\n", ((name != NULL)? name : desc));
+ }
+ return pcmk_rc_ok;
+ }
+
+ /* Get length of longest (most negative) standard Pacemaker return code
+ * This should be longer than all the values of any other type of return
+ * code.
+ */
+ if (code_width == 0) {
+ long long most_negative = pcmk_rc_error - (long long) pcmk__n_rc + 1;
+ code_width = (int) snprintf(NULL, 0, "%lld", most_negative);
+ }
+
+ if ((name != NULL) && (desc != NULL)) {
+ static int name_width = 0;
+
+ if (name_width == 0) {
+ // Get length of longest standard Pacemaker return code name
+ for (int lpc = 0; lpc < pcmk__n_rc; lpc++) {
+ int len = (int) strlen(pcmk_rc_name(pcmk_rc_error - lpc));
+ name_width = QB_MAX(name_width, len);
+ }
+ }
+ return out->info(out, "% *d: %-*s %s", code_width, code, name_width,
+ name, desc);
+ }
+
+ if ((name != NULL) || (desc != NULL)) {
+ return out->info(out, "% *d: %s", code_width, code,
+ ((name != NULL)? name : desc));
+ }
+
+ return out->info(out, "% *d", code_width, code);
+}
+
+PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
+static int
+result_code_xml(pcmk__output_t *out, va_list args)
+{
+ int code = va_arg(args, int);
+ const char *name = va_arg(args, const char *);
+ const char *desc = va_arg(args, const char *);
+
+ char *code_str = pcmk__itoa(code);
+
+ pcmk__output_create_xml_node(out, "result-code",
+ "code", code_str,
+ XML_ATTR_NAME, name,
+ XML_ATTR_DESC, desc,
+ NULL);
+ free(code_str);
+ return pcmk_rc_ok;
+}
+
+static pcmk__message_entry_t fmt_functions[] = {
+ { "attribute", "default", attribute_default },
+ { "attribute", "xml", attribute_xml },
+ { "cluster-status", "default", pcmk__cluster_status_text },
+ { "cluster-status", "html", cluster_status_html },
+ { "cluster-status", "xml", cluster_status_xml },
+ { "crmadmin-node", "default", crmadmin_node },
+ { "crmadmin-node", "text", crmadmin_node_text },
+ { "crmadmin-node", "xml", crmadmin_node_xml },
+ { "dc", "default", dc },
+ { "dc", "text", dc_text },
+ { "dc", "xml", dc_xml },
+ { "digests", "default", digests_text },
+ { "digests", "xml", digests_xml },
+ { "health", "default", health },
+ { "health", "text", health_text },
+ { "health", "xml", health_xml },
+ { "inject-attr", "default", inject_attr },
+ { "inject-attr", "xml", inject_attr_xml },
+ { "inject-cluster-action", "default", inject_cluster_action },
+ { "inject-cluster-action", "xml", inject_cluster_action_xml },
+ { "inject-fencing-action", "default", inject_fencing_action },
+ { "inject-fencing-action", "xml", inject_fencing_action_xml },
+ { "inject-modify-config", "default", inject_modify_config },
+ { "inject-modify-config", "xml", inject_modify_config_xml },
+ { "inject-modify-node", "default", inject_modify_node },
+ { "inject-modify-node", "xml", inject_modify_node_xml },
+ { "inject-modify-ticket", "default", inject_modify_ticket },
+ { "inject-modify-ticket", "xml", inject_modify_ticket_xml },
+ { "inject-pseudo-action", "default", inject_pseudo_action },
+ { "inject-pseudo-action", "xml", inject_pseudo_action_xml },
+ { "inject-rsc-action", "default", inject_rsc_action },
+ { "inject-rsc-action", "xml", inject_rsc_action_xml },
+ { "inject-spec", "default", inject_spec },
+ { "inject-spec", "xml", inject_spec_xml },
+ { "locations-list", "default", locations_list },
+ { "locations-list", "xml", locations_list_xml },
+ { "node-action", "default", node_action },
+ { "node-action", "xml", node_action_xml },
+ { "node-info", "default", node_info_default },
+ { "node-info", "xml", node_info_xml },
+ { "pacemakerd-health", "default", pacemakerd_health },
+ { "pacemakerd-health", "html", pacemakerd_health_html },
+ { "pacemakerd-health", "text", pacemakerd_health_text },
+ { "pacemakerd-health", "xml", pacemakerd_health_xml },
+ { "profile", "default", profile_default, },
+ { "profile", "xml", profile_xml },
+ { "result-code", "none", result_code_none },
+ { "result-code", "text", result_code_text },
+ { "result-code", "xml", result_code_xml },
+ { "rsc-action", "default", rsc_action_default },
+ { "rsc-action-item", "default", rsc_action_item },
+ { "rsc-action-item", "xml", rsc_action_item_xml },
+ { "rsc-is-colocated-with-list", "default", rsc_is_colocated_with_list },
+ { "rsc-is-colocated-with-list", "xml", rsc_is_colocated_with_list_xml },
+ { "rscs-colocated-with-list", "default", rscs_colocated_with_list },
+ { "rscs-colocated-with-list", "xml", rscs_colocated_with_list_xml },
+ { "rule-check", "default", rule_check_default },
+ { "rule-check", "xml", rule_check_xml },
+ { "locations-and-colocations", "default", locations_and_colocations },
+ { "locations-and-colocations", "xml", locations_and_colocations_xml },
+
+ { NULL, NULL, NULL }
+};
+
+void
+pcmk__register_lib_messages(pcmk__output_t *out) {
+ pcmk__register_messages(out, fmt_functions);
+}
diff --git a/lib/pacemaker/pcmk_resource.c b/lib/pacemaker/pcmk_resource.c
new file mode 100644
index 0000000..ee4c904
--- /dev/null
+++ b/lib/pacemaker/pcmk_resource.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2021-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <errno.h>
+#include <glib.h>
+#include <libxml/tree.h>
+
+#include <crm/common/mainloop.h>
+#include <crm/common/results.h>
+#include <crm/common/output_internal.h>
+#include <crm/pengine/internal.h>
+
+#include <pacemaker.h>
+#include <pacemaker-internal.h>
+
+// Search path for resource operation history (takes node name and resource ID)
+#define XPATH_OP_HISTORY "//" XML_CIB_TAG_STATUS \
+ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
+ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
+ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']"
+
+static xmlNode *
+best_op(const pe_resource_t *rsc, const pe_node_t *node,
+ pe_working_set_t *data_set)
+{
+ char *xpath = NULL;
+ xmlNode *history = NULL;
+ xmlNode *best = NULL;
+ bool best_effective_op = false;
+ guint best_interval = 0;
+ bool best_failure = false;
+ const char *best_digest = NULL;
+
+ // Find node's resource history
+ xpath = crm_strdup_printf(XPATH_OP_HISTORY, node->details->uname, rsc->id);
+ history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
+ free(xpath);
+
+ // Examine each history entry
+ for (xmlNode *lrm_rsc_op = first_named_child(history, XML_LRM_TAG_RSC_OP);
+ lrm_rsc_op != NULL; lrm_rsc_op = crm_next_same_xml(lrm_rsc_op)) {
+
+ const char *digest = crm_element_value(lrm_rsc_op,
+ XML_LRM_ATTR_RESTART_DIGEST);
+ guint interval_ms = 0;
+ const char *task = crm_element_value(lrm_rsc_op, XML_LRM_ATTR_TASK);
+ bool effective_op = false;
+ bool failure = pcmk__ends_with(ID(lrm_rsc_op), "_last_failure_0");
+
+
+ crm_element_value_ms(lrm_rsc_op, XML_LRM_ATTR_INTERVAL, &interval_ms);
+ effective_op = interval_ms == 0
+ && pcmk__strcase_any_of(task, RSC_STATUS,
+ RSC_START, RSC_PROMOTE,
+ RSC_MIGRATED, NULL);
+
+ if (best == NULL) {
+ goto is_best;
+ }
+
+ if (best_effective_op) {
+ // Do not use an ineffective op if there's an effective one.
+ if (!effective_op) {
+ continue;
+ }
+ // Do not use an ineffective non-recurring op if there's a recurring one.
+ } else if (best_interval != 0
+ && !effective_op
+ && interval_ms == 0) {
+ continue;
+ }
+
+ // Do not use last failure if there's a successful one.
+ if (!best_failure && failure) {
+ continue;
+ }
+
+ // Do not use an op without a restart digest if there's one with.
+ if (best_digest != NULL && digest == NULL) {
+ continue;
+ }
+
+ // Do not use an older op if there's a newer one.
+ if (pe__is_newer_op(best, lrm_rsc_op, true) > 0) {
+ continue;
+ }
+
+is_best:
+ best = lrm_rsc_op;
+ best_effective_op = effective_op;
+ best_interval = interval_ms;
+ best_failure = failure;
+ best_digest = digest;
+ }
+ return best;
+}
+
+/*!
+ * \internal
+ * \brief Calculate and output resource operation digests
+ *
+ * \param[in,out] out Output object
+ * \param[in,out] rsc Resource to calculate digests for
+ * \param[in] node Node whose operation history should be used
+ * \param[in] overrides Hash table of configuration parameters to override
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__resource_digests(pcmk__output_t *out, pe_resource_t *rsc,
+ const pe_node_t *node, GHashTable *overrides)
+{
+ const char *task = NULL;
+ xmlNode *xml_op = NULL;
+ op_digest_cache_t *digests = NULL;
+ guint interval_ms = 0;
+ int rc = pcmk_rc_ok;
+
+ if ((out == NULL) || (rsc == NULL) || (node == NULL)) {
+ return EINVAL;
+ }
+ if (rsc->variant != pe_native) {
+ // Only primitives get operation digests
+ return EOPNOTSUPP;
+ }
+
+ // Find XML of operation history to use
+ xml_op = best_op(rsc, node, rsc->cluster);
+
+ // Generate an operation key
+ if (xml_op != NULL) {
+ task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+ crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+ }
+ if (task == NULL) { // Assume start if no history is available
+ task = RSC_START;
+ interval_ms = 0;
+ }
+
+ // Calculate and show digests
+ digests = pe__calculate_digests(rsc, task, &interval_ms, node, xml_op,
+ overrides, true, rsc->cluster);
+ rc = out->message(out, "digests", rsc, node, task, interval_ms, digests);
+
+ pe__free_digests(digests);
+ return rc;
+}
+
+int
+pcmk_resource_digests(xmlNodePtr *xml, pe_resource_t *rsc,
+ const pe_node_t *node, GHashTable *overrides,
+ pe_working_set_t *data_set)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+ pcmk__register_lib_messages(out);
+ rc = pcmk__resource_digests(out, rsc, node, overrides);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_result_code.c b/lib/pacemaker/pcmk_result_code.c
new file mode 100644
index 0000000..4f50276
--- /dev/null
+++ b/lib/pacemaker/pcmk_result_code.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/cib/internal.h>
+#include <crm/crm.h>
+
+#include <pacemaker.h>
+#include <pacemaker-internal.h>
+
+#include <inttypes.h> // PRIx32
+#include <stdint.h> // uint32_t
+
+/*!
+ * \internal
+ * \brief Display the name and/or description of a result code
+ *
+ * \param[in,out] out Output object
+ * \param[in] code The result code
+ * \param[in] type Interpret \c code as this type of result code.
+ * Supported values: \c pcmk_result_legacy,
+ * \c pcmk_result_rc, \c pcmk_result_exitcode.
+ * \param[in] flags Group of \c pcmk_rc_disp_flags
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__show_result_code(pcmk__output_t *out, int code,
+ enum pcmk_result_type type, uint32_t flags)
+{
+ int rc = pcmk_rc_ok;
+ bool quiet_orig = out->quiet;
+ const char *name = NULL;
+ const char *desc = NULL;
+
+ rc = pcmk_result_get_strings(code, type, &name, &desc);
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "Error looking up result code %d", code);
+ return rc;
+ }
+
+ // out->quiet controls whether the code is shown (if quiet is supported)
+ out->quiet = !pcmk_is_set(flags, pcmk_rc_disp_code);
+
+ out->message(out, "result-code", code,
+ pcmk_is_set(flags, pcmk_rc_disp_name)? name : NULL,
+ pcmk_is_set(flags, pcmk_rc_disp_desc)? desc : NULL);
+ out->quiet = quiet_orig;
+
+ return rc;
+}
+
+// Documented in header
+int
+pcmk_show_result_code(xmlNodePtr *xml, int code, enum pcmk_result_type type,
+ uint32_t flags)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__show_result_code(out, code, type, flags);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief List all valid result codes in a particular family
+ *
+ * \param[in,out] out Output object
+ * \param[in] type The family of result codes to list. Supported
+ * values: \c pcmk_result_legacy, \c pcmk_result_rc,
+ * \c pcmk_result_exitcode.
+ * \param[in] flags Group of \c pcmk_rc_disp_flags
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__list_result_codes(pcmk__output_t *out, enum pcmk_result_type type,
+ uint32_t flags)
+{
+ int rc = pcmk_rc_ok;
+ int start = 0;
+ int end = 0;
+ int code = 0;
+
+ bool quiet_orig = out->quiet;
+ const char *name = NULL;
+ const char *desc = NULL;
+
+ rc = pcmk__result_bounds(type, &start, &end);
+ if (rc != pcmk_rc_ok) {
+ out->err(out,
+ "Failed to get result code bounds for result code type "
+ "%#010x" PRIx32, (uint32_t) type);
+ return rc;
+ }
+
+ code = start;
+ while (code <= end) {
+ int local_rc = pcmk_rc_ok;
+
+ if (code == (pcmk_rc_error + 1)) {
+ /* Values between pcmk_rc_error and pcmk_rc_ok are reserved for
+ * callers, so skip them
+ */
+ code = pcmk_rc_ok;
+ continue;
+ }
+
+ // Shouldn't affect the return code of the whole list operation
+ local_rc = pcmk_result_get_strings(code, type, &name, &desc);
+
+ if ((local_rc != pcmk_rc_ok) || (name == NULL)
+ || pcmk__str_any_of(name, "Unknown", "CRM_EX_UNKNOWN", NULL)) {
+
+ code++;
+ continue;
+ }
+
+ // out->quiet controls whether the code is shown (if quiet is supported)
+ out->quiet = !pcmk_is_set(flags, pcmk_rc_disp_code);
+
+ out->message(out, "result-code", code,
+ pcmk_is_set(flags, pcmk_rc_disp_name)? name : NULL,
+ pcmk_is_set(flags, pcmk_rc_disp_desc)? desc : NULL);
+ out->quiet = quiet_orig;
+
+ code++;
+ }
+
+ return rc;
+}
+
+// Documented in header
+int
+pcmk_list_result_codes(xmlNodePtr *xml, enum pcmk_result_type type,
+ uint32_t flags)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__list_result_codes(out, type, flags);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_rule.c b/lib/pacemaker/pcmk_rule.c
new file mode 100644
index 0000000..b8ca453
--- /dev/null
+++ b/lib/pacemaker/pcmk_rule.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright 2022-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/cib/internal.h>
+#include <crm/common/cib.h>
+#include <crm/common/iso8601.h>
+#include <crm/msg_xml.h>
+#include <crm/pengine/rules_internal.h>
+#include <pacemaker-internal.h>
+
+/*!
+ * \internal
+ * \brief Evaluate a date expression for a specific time
+ *
+ * \param[in] expr date_expression XML
+ * \param[in] now Time for which to evaluate expression
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+eval_date_expression(const xmlNode *expr, crm_time_t *now)
+{
+ pe_rule_eval_data_t rule_data = {
+ .node_hash = NULL,
+ .role = RSC_ROLE_UNKNOWN,
+ .now = now,
+ .match_data = NULL,
+ .rsc_data = NULL,
+ .op_data = NULL
+ };
+
+ return pe__eval_date_expr(expr, &rule_data, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Initialize the cluster working set for checking rules
+ *
+ * Make our own copies of the CIB XML and date/time object, if they're not
+ * \c NULL. This way we don't have to take ownership of the objects passed via
+ * the API.
+ *
+ * \param[in,out] out Output object
+ * \param[in] input The CIB XML to check (if \c NULL, use current CIB)
+ * \param[in] date Check whether the rule is in effect at this date
+ * and time (if \c NULL, use current date and time)
+ * \param[out] data_set Where to store the cluster working set
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+init_rule_check(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
+ pe_working_set_t **data_set)
+{
+ // Allows for cleaner syntax than dereferencing the data_set argument
+ pe_working_set_t *new_data_set = NULL;
+
+ new_data_set = pe_new_working_set();
+ if (new_data_set == NULL) {
+ return ENOMEM;
+ }
+
+ pe__set_working_set_flags(new_data_set,
+ pe_flag_no_counts|pe_flag_no_compat);
+
+ // Populate the working set instance
+
+ // Make our own copy of the given input or fetch the CIB and use that
+ if (input != NULL) {
+ new_data_set->input = copy_xml(input);
+ if (new_data_set->input == NULL) {
+ out->err(out, "Failed to copy input XML");
+ pe_free_working_set(new_data_set);
+ return ENOMEM;
+ }
+
+ } else {
+ int rc = cib__signon_query(out, NULL, &(new_data_set->input));
+
+ if (rc != pcmk_rc_ok) {
+ pe_free_working_set(new_data_set);
+ return rc;
+ }
+ }
+
+ // Make our own copy of the given crm_time_t object; otherwise
+ // cluster_status() populates with the current time
+ if (date != NULL) {
+ // pcmk_copy_time() guarantees non-NULL
+ new_data_set->now = pcmk_copy_time(date);
+ }
+
+ // Unpack everything
+ cluster_status(new_data_set);
+ *data_set = new_data_set;
+
+ return pcmk_rc_ok;
+}
+
+#define XPATH_NODE_RULE "//" XML_TAG_RULE "[@" XML_ATTR_ID "='%s']"
+
+/*!
+ * \internal
+ * \brief Check whether a given rule is in effect
+ *
+ * \param[in] data_set Cluster working set
+ * \param[in] rule_id The ID of the rule to check
+ * \param[out] error Where to store a rule evaluation error message
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+eval_rule(pe_working_set_t *data_set, const char *rule_id, const char **error)
+{
+ xmlNodePtr cib_constraints = NULL;
+ xmlNodePtr match = NULL;
+ xmlXPathObjectPtr xpath_obj = NULL;
+ char *xpath = NULL;
+ int rc = pcmk_rc_ok;
+ int num_results = 0;
+
+ *error = NULL;
+
+ /* Rules are under the constraints node in the XML, so first find that. */
+ cib_constraints = pcmk_find_cib_element(data_set->input,
+ XML_CIB_TAG_CONSTRAINTS);
+
+ /* Get all rules matching the given ID that are also simple enough for us
+ * to check. For the moment, these rules must only have a single
+ * date_expression child and:
+ * - Do not have a date_spec operation, or
+ * - Have a date_spec operation that contains years= but does not contain
+ * moon=.
+ *
+ * We do this in steps to provide better error messages. First, check that
+ * there's any rule with the given ID.
+ */
+ xpath = crm_strdup_printf(XPATH_NODE_RULE, rule_id);
+ xpath_obj = xpath_search(cib_constraints, xpath);
+ num_results = numXpathResults(xpath_obj);
+
+ free(xpath);
+ freeXpathObject(xpath_obj);
+
+ if (num_results == 0) {
+ *error = "Rule not found";
+ return ENXIO;
+ }
+
+ if (num_results > 1) {
+ // Should not be possible; schema prevents this
+ *error = "Found more than one rule with matching ID";
+ return pcmk_rc_duplicate_id;
+ }
+
+ /* Next, make sure it has exactly one date_expression. */
+ xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression", rule_id);
+ xpath_obj = xpath_search(cib_constraints, xpath);
+ num_results = numXpathResults(xpath_obj);
+
+ free(xpath);
+ freeXpathObject(xpath_obj);
+
+ if (num_results != 1) {
+ if (num_results == 0) {
+ *error = "Rule does not have a date expression";
+ } else {
+ *error = "Rule has more than one date expression";
+ }
+ return EOPNOTSUPP;
+ }
+
+ /* Then, check that it's something we actually support. */
+ xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression["
+ "@" XML_EXPR_ATTR_OPERATION "!='date_spec']",
+ rule_id);
+ xpath_obj = xpath_search(cib_constraints, xpath);
+ num_results = numXpathResults(xpath_obj);
+
+ free(xpath);
+
+ if (num_results == 0) {
+ freeXpathObject(xpath_obj);
+
+ xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression["
+ "@" XML_EXPR_ATTR_OPERATION "='date_spec' "
+ "and date_spec/@years "
+ "and not(date_spec/@moon)]", rule_id);
+ xpath_obj = xpath_search(cib_constraints, xpath);
+ num_results = numXpathResults(xpath_obj);
+
+ free(xpath);
+
+ if (num_results == 0) {
+ freeXpathObject(xpath_obj);
+ *error = "Rule must either not use date_spec, or use date_spec "
+ "with years= but not moon=";
+ return EOPNOTSUPP;
+ }
+ }
+
+ match = getXpathResult(xpath_obj, 0);
+
+ /* We should have ensured this with the xpath query above, but double-
+ * checking can't hurt.
+ */
+ CRM_ASSERT(match != NULL);
+ CRM_ASSERT(find_expression_type(match) == time_expr);
+
+ rc = eval_date_expression(match, data_set->now);
+ if (rc == pcmk_rc_undetermined) {
+ /* pe__eval_date_expr() should return this only if something is
+ * malformed or missing
+ */
+ *error = "Error parsing rule";
+ }
+
+ freeXpathObject(xpath_obj);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Check whether each rule in a list is in effect
+ *
+ * \param[in,out] out Output object
+ * \param[in] input The CIB XML to check (if \c NULL, use current CIB)
+ * \param[in] date Check whether the rule is in effect at this date and
+ * time (if \c NULL, use current date and time)
+ * \param[in] rule_ids The IDs of the rules to check, as a <tt>NULL</tt>-
+ * terminated list.
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__check_rules(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
+ const char **rule_ids)
+{
+ pe_working_set_t *data_set = NULL;
+ int rc = pcmk_rc_ok;
+
+ CRM_ASSERT(out != NULL);
+
+ if (rule_ids == NULL) {
+ // Trivial case; every rule specified is in effect
+ return pcmk_rc_ok;
+ }
+
+ rc = init_rule_check(out, input, date, &data_set);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ for (const char **rule_id = rule_ids; *rule_id != NULL; rule_id++) {
+ const char *error = NULL;
+ int last_rc = eval_rule(data_set, *rule_id, &error);
+
+ out->message(out, "rule-check", *rule_id, last_rc, error);
+
+ if (last_rc != pcmk_rc_ok) {
+ rc = last_rc;
+ }
+ }
+
+ pe_free_working_set(data_set);
+ return rc;
+}
+
+// Documented in pacemaker.h
+int
+pcmk_check_rules(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date,
+ const char **rule_ids)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__check_rules(out, input, date, rule_ids);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
new file mode 100644
index 0000000..06d7f00
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -0,0 +1,1919 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <sys/param.h>
+#include <glib.h>
+
+#include <crm/lrmd_internal.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Get the action flags relevant to ordering constraints
+ *
+ * \param[in,out] action Action to check
+ * \param[in] node Node that *other* action in the ordering is on
+ * (used only for clone resource actions)
+ *
+ * \return Action flags that should be used for orderings
+ */
+static enum pe_action_flags
+action_flags_for_ordering(pe_action_t *action, const pe_node_t *node)
+{
+ bool runnable = false;
+ enum pe_action_flags flags;
+
+ // For non-resource actions, return the action flags
+ if (action->rsc == NULL) {
+ return action->flags;
+ }
+
+ /* For non-clone resources, or a clone action not assigned to a node,
+ * return the flags as determined by the resource method without a node
+ * specified.
+ */
+ flags = action->rsc->cmds->action_flags(action, NULL);
+ if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
+ return flags;
+ }
+
+ /* Otherwise (i.e., for clone resource actions on a specific node), first
+ * remember whether the non-node-specific action is runnable.
+ */
+ runnable = pcmk_is_set(flags, pe_action_runnable);
+
+ // Then recheck the resource method with the node
+ flags = action->rsc->cmds->action_flags(action, node);
+
+ /* For clones in ordering constraints, the node-specific "runnable" doesn't
+ * matter, just the non-node-specific setting (i.e., is the action runnable
+ * anywhere).
+ *
+ * This applies only to runnable, and only for ordering constraints. This
+ * function shouldn't be used for other types of constraints without
+ * changes. Not very satisfying, but it's logical and appears to work well.
+ */
+ if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
+ pe__set_raw_action_flags(flags, action->rsc->id,
+ pe_action_runnable);
+ }
+ return flags;
+}
+
+/*!
+ * \internal
+ * \brief Get action UUID that should be used with a resource ordering
+ *
+ * When an action is ordered relative to an action for a collective resource
+ * (clone, group, or bundle), it actually needs to be ordered after all
+ * instances of the collective have completed the relevant action (for example,
+ * given "start CLONE then start RSC", RSC must wait until all instances of
+ * CLONE have started). Given the UUID and resource of the first action in an
+ * ordering, this returns the UUID of the action that should actually be used
+ * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
+ *
+ * \param[in] first_uuid UUID of first action in ordering
+ * \param[in] first_rsc Resource of first action in ordering
+ *
+ * \return Newly allocated copy of UUID to use with ordering
+ * \note It is the caller's responsibility to free the return value.
+ */
+static char *
+action_uuid_for_ordering(const char *first_uuid, const pe_resource_t *first_rsc)
+{
+ guint interval_ms = 0;
+ char *uuid = NULL;
+ char *rid = NULL;
+ char *first_task_str = NULL;
+ enum action_tasks first_task = no_action;
+ enum action_tasks remapped_task = no_action;
+
+ // Only non-notify actions for collective resources need remapping
+ if ((strstr(first_uuid, "notify") != NULL)
+ || (first_rsc->variant < pe_group)) {
+ goto done;
+ }
+
+ // Only non-recurring actions need remapping
+ CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
+ if (interval_ms > 0) {
+ goto done;
+ }
+
+ first_task = text2task(first_task_str);
+ switch (first_task) {
+ case stop_rsc:
+ case start_rsc:
+ case action_notify:
+ case action_promote:
+ case action_demote:
+ remapped_task = first_task + 1;
+ break;
+ case stopped_rsc:
+ case started_rsc:
+ case action_notified:
+ case action_promoted:
+ case action_demoted:
+ remapped_task = first_task;
+ break;
+ case monitor_rsc:
+ case shutdown_crm:
+ case stonith_node:
+ break;
+ default:
+ crm_err("Unknown action '%s' in ordering", first_task_str);
+ break;
+ }
+
+ if (remapped_task != no_action) {
+ /* If a (clone) resource has notifications enabled, we want to order
+ * relative to when all notifications have been sent for the remapped
+ * task. Only outermost resources or those in bundles have
+ * notifications.
+ */
+ if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
+ && ((first_rsc->parent == NULL)
+ || (pe_rsc_is_clone(first_rsc)
+ && (first_rsc->parent->variant == pe_container)))) {
+ uuid = pcmk__notify_key(rid, "confirmed-post",
+ task2text(remapped_task));
+ } else {
+ uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
+ }
+ pe_rsc_trace(first_rsc,
+ "Remapped action UUID %s to %s for ordering purposes",
+ first_uuid, uuid);
+ }
+
+done:
+ if (uuid == NULL) {
+ uuid = strdup(first_uuid);
+ CRM_ASSERT(uuid != NULL);
+ }
+ free(first_task_str);
+ free(rid);
+ return uuid;
+}
+
+/*!
+ * \internal
+ * \brief Get actual action that should be used with an ordering
+ *
+ * When an action is ordered relative to an action for a collective resource
+ * (clone, group, or bundle), it actually needs to be ordered after all
+ * instances of the collective have completed the relevant action (for example,
+ * given "start CLONE then start RSC", RSC must wait until all instances of
+ * CLONE have started). Given the first action in an ordering, this returns the
+ * the action that should actually be used for ordering (for example, the
+ * started action instead of the start action).
+ *
+ * \param[in] action First action in an ordering
+ *
+ * \return Actual action that should be used for the ordering
+ */
+static pe_action_t *
+action_for_ordering(pe_action_t *action)
+{
+ pe_action_t *result = action;
+ pe_resource_t *rsc = action->rsc;
+
+ if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
+ char *uuid = action_uuid_for_ordering(action->uuid, rsc);
+
+ result = find_first_action(rsc->actions, uuid, NULL, NULL);
+ if (result == NULL) {
+ crm_warn("Not remapping %s to %s because %s does not have "
+ "remapped action", action->uuid, uuid, rsc->id);
+ result = action;
+ }
+ free(uuid);
+ }
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Update flags for ordering's actions appropriately for ordering's flags
+ *
+ * \param[in,out] first First action in an ordering
+ * \param[in,out] then Then action in an ordering
+ * \param[in] first_flags Action flags for \p first for ordering purposes
+ * \param[in] then_flags Action flags for \p then for ordering purposes
+ * \param[in,out] order Action wrapper for \p first in ordering
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Group of enum pcmk__updated flags
+ */
+static uint32_t
+update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
+ enum pe_action_flags first_flags,
+ enum pe_action_flags then_flags,
+ pe_action_wrapper_t *order,
+ pe_working_set_t *data_set)
+{
+ uint32_t changed = pcmk__updated_none;
+
+ /* The node will only be used for clones. If interleaved, node will be NULL,
+ * otherwise the ordering scope will be limited to the node. Normally, the
+ * whole 'then' clone should restart if 'first' is restarted, so then->node
+ * is needed.
+ */
+ pe_node_t *node = then->node;
+
+ if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
+ /* For unfencing, only instances of 'then' on the same node as 'first'
+ * (the unfencing operation) should restart, so reset node to
+ * first->node, at which point this case is handled like a normal
+ * pe_order_implies_then.
+ */
+ pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
+ pe__set_order_flags(order->type, pe_order_implies_then);
+ node = first->node;
+ pe_rsc_trace(then->rsc,
+ "%s then %s: mapped pe_order_implies_then_on_node to "
+ "pe_order_implies_then on %s",
+ first->uuid, then->uuid, pe__node_name(node));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_implies_then)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags & pe_action_optional,
+ pe_action_optional,
+ pe_order_implies_then,
+ data_set);
+ } else if (!pcmk_is_set(first_flags, pe_action_optional)
+ && pcmk_is_set(then->flags, pe_action_optional)) {
+ pe__clear_action_flags(then, pe_action_optional);
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
+ enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
+
+ changed |= then->rsc->cmds->update_ordered_actions(first, then, node,
+ first_flags, restart,
+ pe_order_restart,
+ data_set);
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_implies_first)) {
+ if (first->rsc != NULL) {
+ changed |= first->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_optional,
+ pe_order_implies_first,
+ data_set);
+ } else if (!pcmk_is_set(first_flags, pe_action_optional)
+ && pcmk_is_set(first->flags, pe_action_runnable)) {
+ pe__clear_action_flags(first, pe_action_runnable);
+ pcmk__set_updated_flags(changed, first, pcmk__updated_first);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags & pe_action_optional,
+ pe_action_optional,
+ pe_order_promoted_implies_first,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc,
+ "%s then %s: %s after pe_order_promoted_implies_first",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_one_or_more)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_runnable,
+ pe_order_one_or_more,
+ data_set);
+
+ } else if (pcmk_is_set(first_flags, pe_action_runnable)) {
+ // We have another runnable instance of "first"
+ then->runnable_before++;
+
+ /* Mark "then" as runnable if it requires a certain number of
+ * "before" instances to be runnable, and they now are.
+ */
+ if ((then->runnable_before >= then->required_runnable_before)
+ && !pcmk_is_set(then->flags, pe_action_runnable)) {
+
+ pe__set_action_flags(then, pe_action_runnable);
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ }
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
+ if (!pcmk_is_set(first_flags, pe_action_runnable)
+ && (first->rsc->running_on != NULL)) {
+
+ pe_rsc_trace(then->rsc,
+ "%s then %s: ignoring because first is stopping",
+ first->uuid, then->uuid);
+ order->type = pe_order_none;
+ } else {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_runnable,
+ pe_order_runnable_left,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_runnable_left)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_runnable,
+ pe_order_runnable_left,
+ data_set);
+
+ } else if (!pcmk_is_set(first_flags, pe_action_runnable)
+ && pcmk_is_set(then->flags, pe_action_runnable)) {
+
+ pe__clear_action_flags(then, pe_action_runnable);
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_optional,
+ pe_order_implies_first_migratable,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after "
+ "pe_order_implies_first_migratable",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_optional,
+ pe_order_pseudo_left,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_optional)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_runnable,
+ pe_order_optional,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
+ if (then->rsc != NULL) {
+ changed |= then->rsc->cmds->update_ordered_actions(first, then,
+ node,
+ first_flags,
+ pe_action_runnable,
+ pe_order_asymmetrical,
+ data_set);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ if (pcmk_is_set(first->flags, pe_action_runnable)
+ && pcmk_is_set(order->type, pe_order_implies_then_printed)
+ && !pcmk_is_set(first_flags, pe_action_optional)) {
+
+ pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
+ then->uuid, first->uuid);
+ pe__set_action_flags(then, pe_action_print_always);
+ // Don't bother marking 'then' as changed just for this
+ }
+
+ if (pcmk_is_set(order->type, pe_order_implies_first_printed)
+ && !pcmk_is_set(then_flags, pe_action_optional)) {
+
+ pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
+ first->uuid, then->uuid);
+ pe__set_action_flags(first, pe_action_print_always);
+ // Don't bother marking 'first' as changed just for this
+ }
+
+ if (pcmk_any_flags_set(order->type, pe_order_implies_then
+ |pe_order_implies_first
+ |pe_order_restart)
+ && (first->rsc != NULL)
+ && !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
+ && pcmk_is_set(first->rsc->flags, pe_rsc_block)
+ && !pcmk_is_set(first->flags, pe_action_runnable)
+ && pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) {
+
+ if (pcmk_is_set(then->flags, pe_action_runnable)) {
+ pe__clear_action_flags(then, pe_action_runnable);
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ }
+ pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
+ "is blocked, unmanaged, unrunnable stop",
+ first->uuid, then->uuid,
+ (changed? "changed" : "unchanged"));
+ }
+
+ return changed;
+}
+
+// Convenience macros for logging action properties
+
+#define action_type_str(flags) \
+ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
+
+#define action_optional_str(flags) \
+ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
+
+#define action_runnable_str(flags) \
+ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
+
+#define action_node_str(a) \
+ (((a)->node == NULL)? "no node" : (a)->node->details->uname)
+
+/*!
+ * \internal
+ * \brief Update an action's flags for all orderings where it is "then"
+ *
+ * \param[in,out] then Action to update
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
+{
+ GList *lpc = NULL;
+ uint32_t changed = pcmk__updated_none;
+ int last_flags = then->flags;
+
+ pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
+ action_type_str(then->flags), then->uuid,
+ action_optional_str(then->flags),
+ action_runnable_str(then->flags), action_node_str(then));
+
+ if (pcmk_is_set(then->flags, pe_action_requires_any)) {
+ /* Initialize current known "runnable before" actions. As
+ * update_action_for_ordering_flags() is called for each of then's
+ * before actions, this number will increment as runnable 'first'
+ * actions are encountered.
+ */
+ then->runnable_before = 0;
+
+ if (then->required_runnable_before == 0) {
+ /* @COMPAT This ordering constraint uses the deprecated
+ * "require-all=false" attribute. Treat it like "clone-min=1".
+ */
+ then->required_runnable_before = 1;
+ }
+
+ /* The pe_order_one_or_more clause of update_action_for_ordering_flags()
+ * (called below) will reset runnable if appropriate.
+ */
+ pe__clear_action_flags(then, pe_action_runnable);
+ }
+
+ for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
+ pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
+ pe_action_t *first = other->action;
+
+ pe_node_t *then_node = then->node;
+ pe_node_t *first_node = first->node;
+
+ if ((first->rsc != NULL)
+ && (first->rsc->variant == pe_group)
+ && pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
+
+ first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
+ if (first_node != NULL) {
+ pe_rsc_trace(first->rsc, "Found %s for 'first' %s",
+ pe__node_name(first_node), first->uuid);
+ }
+ }
+
+ if ((then->rsc != NULL)
+ && (then->rsc->variant == pe_group)
+ && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) {
+
+ then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
+ if (then_node != NULL) {
+ pe_rsc_trace(then->rsc, "Found %s for 'then' %s",
+ pe__node_name(then_node), then->uuid);
+ }
+ }
+
+ // Disable constraint if it only applies when on same node, but isn't
+ if (pcmk_is_set(other->type, pe_order_same_node)
+ && (first_node != NULL) && (then_node != NULL)
+ && (first_node->details != then_node->details)) {
+
+ pe_rsc_trace(then->rsc,
+ "Disabled ordering %s on %s then %s on %s: not same node",
+ other->action->uuid, pe__node_name(first_node),
+ then->uuid, pe__node_name(then_node));
+ other->type = pe_order_none;
+ continue;
+ }
+
+ pcmk__clear_updated_flags(changed, then, pcmk__updated_first);
+
+ if ((first->rsc != NULL)
+ && pcmk_is_set(other->type, pe_order_then_cancels_first)
+ && !pcmk_is_set(then->flags, pe_action_optional)) {
+
+ /* 'then' is required, so we must abandon 'first'
+ * (e.g. a required stop cancels any agent reload).
+ */
+ pe__set_action_flags(other->action, pe_action_optional);
+ if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) {
+ pe__clear_resource_flags(first->rsc, pe_rsc_reload);
+ }
+ }
+
+ if ((first->rsc != NULL) && (then->rsc != NULL)
+ && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
+ first = action_for_ordering(first);
+ }
+ if (first != other->action) {
+ pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
+ then->uuid, first->uuid, other->action->uuid);
+ }
+
+ pe_rsc_trace(then->rsc,
+ "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s",
+ first->uuid, first->flags, then->uuid, then->flags,
+ other->type, action_node_str(first));
+
+ if (first == other->action) {
+ /* 'first' was not remapped (e.g. from 'start' to 'running'), which
+ * could mean it is a non-resource action, a primitive resource
+ * action, or already expanded.
+ */
+ enum pe_action_flags first_flags, then_flags;
+
+ first_flags = action_flags_for_ordering(first, then_node);
+ then_flags = action_flags_for_ordering(then, first_node);
+
+ changed |= update_action_for_ordering_flags(first, then,
+ first_flags, then_flags,
+ other, data_set);
+
+ /* 'first' was for a complex resource (clone, group, etc),
+ * create a new dependency if necessary
+ */
+ } else if (order_actions(first, then, other->type)) {
+ /* This was the first time 'first' and 'then' were associated,
+ * start again to get the new actions_before list
+ */
+ pcmk__set_updated_flags(changed, then, pcmk__updated_then);
+ pe_rsc_trace(then->rsc,
+ "Disabled ordering %s then %s in favor of %s then %s",
+ other->action->uuid, then->uuid, first->uuid,
+ then->uuid);
+ other->type = pe_order_none;
+ }
+
+
+ if (pcmk_is_set(changed, pcmk__updated_first)) {
+ crm_trace("Re-processing %s and its 'after' actions "
+ "because it changed", first->uuid);
+ for (GList *lpc2 = first->actions_after; lpc2 != NULL;
+ lpc2 = lpc2->next) {
+ pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
+
+ pcmk__update_action_for_orderings(other->action, data_set);
+ }
+ pcmk__update_action_for_orderings(first, data_set);
+ }
+ }
+
+ if (pcmk_is_set(then->flags, pe_action_requires_any)) {
+ if (last_flags == then->flags) {
+ pcmk__clear_updated_flags(changed, then, pcmk__updated_then);
+ } else {
+ pcmk__set_updated_flags(changed, then, pcmk__updated_then);
+ }
+ }
+
+ if (pcmk_is_set(changed, pcmk__updated_then)) {
+ crm_trace("Re-processing %s and its 'after' actions because it changed",
+ then->uuid);
+ if (pcmk_is_set(last_flags, pe_action_runnable)
+ && !pcmk_is_set(then->flags, pe_action_runnable)) {
+ pcmk__block_colocation_dependents(then, data_set);
+ }
+ pcmk__update_action_for_orderings(then, data_set);
+ for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
+ pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
+
+ pcmk__update_action_for_orderings(other->action, data_set);
+ }
+ }
+}
+
+static inline bool
+is_primitive_action(const pe_action_t *action)
+{
+ return action && action->rsc && (action->rsc->variant == pe_native);
+}
+
+/*!
+ * \internal
+ * \brief Clear a single action flag and set reason text
+ *
+ * \param[in,out] action Action whose flag should be cleared
+ * \param[in] flag Action flag that should be cleared
+ * \param[in] reason Action that is the reason why flag is being cleared
+ */
+#define clear_action_flag_because(action, flag, reason) do { \
+ if (pcmk_is_set((action)->flags, (flag))) { \
+ pe__clear_action_flags(action, flag); \
+ if ((action)->rsc != (reason)->rsc) { \
+ char *reason_text = pe__action2reason((reason), (flag)); \
+ pe_action_set_reason((action), reason_text, \
+ ((flag) == pe_action_migrate_runnable)); \
+ free(reason_text); \
+ } \
+ } \
+ } while (0)
+
+/*!
+ * \internal
+ * \brief Update actions in an asymmetric ordering
+ *
+ * If the "first" action in an asymmetric ordering is unrunnable, make the
+ * "second" action unrunnable as well, if appropriate.
+ *
+ * \param[in] first 'First' action in an asymmetric ordering
+ * \param[in,out] then 'Then' action in an asymmetric ordering
+ */
+static void
+handle_asymmetric_ordering(const pe_action_t *first, pe_action_t *then)
+{
+ /* Only resource actions after an unrunnable 'first' action need updates for
+ * asymmetric ordering.
+ */
+ if ((then->rsc == NULL) || pcmk_is_set(first->flags, pe_action_runnable)) {
+ return;
+ }
+
+ // Certain optional 'then' actions are unaffected by unrunnable 'first'
+ if (pcmk_is_set(then->flags, pe_action_optional)) {
+ enum rsc_role_e then_rsc_role = then->rsc->fns->state(then->rsc, TRUE);
+
+ if ((then_rsc_role == RSC_ROLE_STOPPED)
+ && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_none)) {
+ /* If 'then' should stop after 'first' but is already stopped, the
+ * ordering is irrelevant.
+ */
+ return;
+ } else if ((then_rsc_role >= RSC_ROLE_STARTED)
+ && pcmk__str_eq(then->task, RSC_START, pcmk__str_none)
+ && pe__rsc_running_on_only(then->rsc, then->node)) {
+ /* Similarly if 'then' should start after 'first' but is already
+ * started on a single node.
+ */
+ return;
+ }
+ }
+
+ // 'First' can't run, so 'then' can't either
+ clear_action_flag_because(then, pe_action_optional, first);
+ clear_action_flag_because(then, pe_action_runnable, first);
+}
+
+/*!
+ * \internal
+ * \brief Set action bits appropriately when pe_restart_order is used
+ *
+ * \param[in,out] first 'First' action in an ordering with pe_restart_order
+ * \param[in,out] then 'Then' action in an ordering with pe_restart_order
+ * \param[in] filter What action flags to care about
+ *
+ * \note pe_restart_order is set for "stop resource before starting it" and
+ * "stop later group member before stopping earlier group member"
+ */
+static void
+handle_restart_ordering(pe_action_t *first, pe_action_t *then, uint32_t filter)
+{
+ const char *reason = NULL;
+
+ CRM_ASSERT(is_primitive_action(first));
+ CRM_ASSERT(is_primitive_action(then));
+
+ // We need to update the action in two cases:
+
+ // ... if 'then' is required
+ if (pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(then->flags, pe_action_optional)) {
+ reason = "restart";
+ }
+
+ /* ... if 'then' is unrunnable action on same resource (if a resource
+ * should restart but can't start, we still want to stop)
+ */
+ if (pcmk_is_set(filter, pe_action_runnable)
+ && !pcmk_is_set(then->flags, pe_action_runnable)
+ && pcmk_is_set(then->rsc->flags, pe_rsc_managed)
+ && (first->rsc == then->rsc)) {
+ reason = "stop";
+ }
+
+ if (reason == NULL) {
+ return;
+ }
+
+ pe_rsc_trace(first->rsc, "Handling %s -> %s for %s",
+ first->uuid, then->uuid, reason);
+
+ // Make 'first' required if it is runnable
+ if (pcmk_is_set(first->flags, pe_action_runnable)) {
+ clear_action_flag_because(first, pe_action_optional, then);
+ }
+
+ // Make 'first' required if 'then' is required
+ if (!pcmk_is_set(then->flags, pe_action_optional)) {
+ clear_action_flag_because(first, pe_action_optional, then);
+ }
+
+ // Make 'first' unmigratable if 'then' is unmigratable
+ if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
+ clear_action_flag_because(first, pe_action_migrate_runnable, then);
+ }
+
+ // Make 'then' unrunnable if 'first' is required but unrunnable
+ if (!pcmk_is_set(first->flags, pe_action_optional)
+ && !pcmk_is_set(first->flags, pe_action_runnable)) {
+ clear_action_flag_because(then, pe_action_runnable, first);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update two actions according to an ordering between them
+ *
+ * Given information about an ordering of two actions, update the actions' flags
+ * (and runnable_before members if appropriate) as appropriate for the ordering.
+ * Effects may cascade to other orderings involving the actions as well.
+ *
+ * \param[in,out] first 'First' action in an ordering
+ * \param[in,out] then 'Then' action in an ordering
+ * \param[in] node If not NULL, limit scope of ordering to this node
+ * (ignored)
+ * \param[in] flags Action flags for \p first for ordering purposes
+ * \param[in] filter Action flags to limit scope of certain updates (may
+ * include pe_action_optional to affect only mandatory
+ * actions, and pe_action_runnable to affect only
+ * runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+uint32_t
+pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set)
+{
+ uint32_t changed = pcmk__updated_none;
+ uint32_t then_flags = then->flags;
+ uint32_t first_flags = first->flags;
+
+ if (pcmk_is_set(type, pe_order_asymmetrical)) {
+ handle_asymmetric_ordering(first, then);
+ }
+
+ if (pcmk_is_set(type, pe_order_implies_first)
+ && !pcmk_is_set(then_flags, pe_action_optional)) {
+ // Then is required, and implies first should be, too
+
+ if (pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(flags, pe_action_optional)
+ && pcmk_is_set(first_flags, pe_action_optional)) {
+ clear_action_flag_because(first, pe_action_optional, then);
+ }
+
+ if (pcmk_is_set(flags, pe_action_migrate_runnable)
+ && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
+ clear_action_flag_because(first, pe_action_migrate_runnable, then);
+ }
+ }
+
+ if (pcmk_is_set(type, pe_order_promoted_implies_first)
+ && (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)
+ && pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(then->flags, pe_action_optional)) {
+
+ clear_action_flag_because(first, pe_action_optional, then);
+
+ if (pcmk_is_set(first->flags, pe_action_migrate_runnable)
+ && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
+ clear_action_flag_because(first, pe_action_migrate_runnable,
+ then);
+ }
+ }
+
+ if (pcmk_is_set(type, pe_order_implies_first_migratable)
+ && pcmk_is_set(filter, pe_action_optional)) {
+
+ if (!pcmk_all_flags_set(then->flags,
+ pe_action_migrate_runnable|pe_action_runnable)) {
+ clear_action_flag_because(first, pe_action_runnable, then);
+ }
+
+ if (!pcmk_is_set(then->flags, pe_action_optional)) {
+ clear_action_flag_because(first, pe_action_optional, then);
+ }
+ }
+
+ if (pcmk_is_set(type, pe_order_pseudo_left)
+ && pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(first->flags, pe_action_runnable)) {
+
+ clear_action_flag_because(then, pe_action_migrate_runnable, first);
+ pe__clear_action_flags(then, pe_action_pseudo);
+ }
+
+ if (pcmk_is_set(type, pe_order_runnable_left)
+ && pcmk_is_set(filter, pe_action_runnable)
+ && pcmk_is_set(then->flags, pe_action_runnable)
+ && !pcmk_is_set(flags, pe_action_runnable)) {
+
+ clear_action_flag_because(then, pe_action_runnable, first);
+ clear_action_flag_because(then, pe_action_migrate_runnable, first);
+ }
+
+ if (pcmk_is_set(type, pe_order_implies_then)
+ && pcmk_is_set(filter, pe_action_optional)
+ && pcmk_is_set(then->flags, pe_action_optional)
+ && !pcmk_is_set(flags, pe_action_optional)
+ && !pcmk_is_set(first->flags, pe_action_migrate_runnable)) {
+
+ clear_action_flag_because(then, pe_action_optional, first);
+ }
+
+ if (pcmk_is_set(type, pe_order_restart)) {
+ handle_restart_ordering(first, then, filter);
+ }
+
+ if (then_flags != then->flags) {
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ pe_rsc_trace(then->rsc,
+ "%s on %s: flags are now %#.6x (was %#.6x) "
+ "because of 'first' %s (%#.6x)",
+ then->uuid, pe__node_name(then->node),
+ then->flags, then_flags, first->uuid, first->flags);
+
+ if ((then->rsc != NULL) && (then->rsc->parent != NULL)) {
+ // Required to handle "X_stop then X_start" for cloned groups
+ pcmk__update_action_for_orderings(then, data_set);
+ }
+ }
+
+ if (first_flags != first->flags) {
+ pcmk__set_updated_flags(changed, first, pcmk__updated_first);
+ pe_rsc_trace(first->rsc,
+ "%s on %s: flags are now %#.6x (was %#.6x) "
+ "because of 'then' %s (%#.6x)",
+ first->uuid, pe__node_name(first->node),
+ first->flags, first_flags, then->uuid, then->flags);
+ }
+
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Trace-log an action (optionally with its dependent actions)
+ *
+ * \param[in] pre_text If not NULL, prefix the log with this plus ": "
+ * \param[in] action Action to log
+ * \param[in] details If true, recursively log dependent actions
+ */
+void
+pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details)
+{
+ const char *node_uname = NULL;
+ const char *node_uuid = NULL;
+ const char *desc = NULL;
+
+ CRM_CHECK(action != NULL, return);
+
+ if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
+ if (action->node != NULL) {
+ node_uname = action->node->details->uname;
+ node_uuid = action->node->details->id;
+ } else {
+ node_uname = "<none>";
+ }
+ }
+
+ switch (text2task(action->task)) {
+ case stonith_node:
+ case shutdown_crm:
+ if (pcmk_is_set(action->flags, pe_action_pseudo)) {
+ desc = "Pseudo ";
+ } else if (pcmk_is_set(action->flags, pe_action_optional)) {
+ desc = "Optional ";
+ } else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
+ desc = "!!Non-Startable!! ";
+ } else if (pcmk_is_set(action->flags, pe_action_processed)) {
+ desc = "";
+ } else {
+ desc = "(Provisional) ";
+ }
+ crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
+ ((pre_text == NULL)? "" : pre_text),
+ ((pre_text == NULL)? "" : ": "),
+ desc, action->id, action->uuid,
+ (node_uname? "\ton " : ""), (node_uname? node_uname : ""),
+ (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
+ (node_uuid? ")" : ""));
+ break;
+ default:
+ if (pcmk_is_set(action->flags, pe_action_optional)) {
+ desc = "Optional ";
+ } else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
+ desc = "Pseudo ";
+ } else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
+ desc = "!!Non-Startable!! ";
+ } else if (pcmk_is_set(action->flags, pe_action_processed)) {
+ desc = "";
+ } else {
+ desc = "(Provisional) ";
+ }
+ crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
+ ((pre_text == NULL)? "" : pre_text),
+ ((pre_text == NULL)? "" : ": "),
+ desc, action->id, action->uuid,
+ (action->rsc? action->rsc->id : "<none>"),
+ (node_uname? "\ton " : ""), (node_uname? node_uname : ""),
+ (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
+ (node_uuid? ")" : ""));
+ break;
+ }
+
+ if (details) {
+ const GList *iter = NULL;
+ const pe_action_wrapper_t *other = NULL;
+
+ crm_trace("\t\t====== Preceding Actions");
+ for (iter = action->actions_before; iter != NULL; iter = iter->next) {
+ other = (const pe_action_wrapper_t *) iter->data;
+ pcmk__log_action("\t\t", other->action, false);
+ }
+ crm_trace("\t\t====== Subsequent Actions");
+ for (iter = action->actions_after; iter != NULL; iter = iter->next) {
+ other = (const pe_action_wrapper_t *) iter->data;
+ pcmk__log_action("\t\t", other->action, false);
+ }
+ crm_trace("\t\t====== End");
+
+ } else {
+ crm_trace("\t\t(before=%d, after=%d)",
+ g_list_length(action->actions_before),
+ g_list_length(action->actions_after));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a new shutdown action for a node
+ *
+ * \param[in,out] node Node being shut down
+ *
+ * \return Newly created shutdown action for \p node
+ */
+pe_action_t *
+pcmk__new_shutdown_action(pe_node_t *node)
+{
+ char *shutdown_id = NULL;
+ pe_action_t *shutdown_op = NULL;
+
+ CRM_ASSERT(node != NULL);
+
+ shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN,
+ node->details->uname);
+
+ shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE,
+ TRUE, node->details->data_set);
+
+ pcmk__order_stops_before_shutdown(node, shutdown_op);
+ add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
+ return shutdown_op;
+}
+
+/*!
+ * \internal
+ * \brief Calculate and add an operation digest to XML
+ *
+ * Calculate an operation digest, which enables us to later determine when a
+ * restart is needed due to the resource's parameters being changed, and add it
+ * to given XML.
+ *
+ * \param[in] op Operation result from executor
+ * \param[in,out] update XML to add digest to
+ */
+static void
+add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update)
+{
+ char *digest = NULL;
+ xmlNode *args_xml = NULL;
+
+ if (op->params == NULL) {
+ return;
+ }
+ args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
+ g_hash_table_foreach(op->params, hash2field, args_xml);
+ pcmk__filter_op_for_digest(args_xml);
+ digest = calculate_operation_digest(args_xml, NULL);
+ crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
+ free_xml(args_xml);
+ free(digest);
+}
+
+#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+
+/*!
+ * \internal
+ * \brief Create XML for resource operation history update
+ *
+ * \param[in,out] parent Parent XML node to add to
+ * \param[in,out] op Operation event data
+ * \param[in] caller_version DC feature set
+ * \param[in] target_rc Expected result of operation
+ * \param[in] node Name of node on which operation was performed
+ * \param[in] origin Arbitrary description of update source
+ *
+ * \return Newly created XML node for history update
+ */
+xmlNode *
+pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
+ const char *caller_version, int target_rc,
+ const char *node, const char *origin)
+{
+ char *key = NULL;
+ char *magic = NULL;
+ char *op_id = NULL;
+ char *op_id_additional = NULL;
+ char *local_user_data = NULL;
+ const char *exit_reason = NULL;
+
+ xmlNode *xml_op = NULL;
+ const char *task = NULL;
+
+ CRM_CHECK(op != NULL, return NULL);
+ crm_trace("Creating history XML for %s-interval %s action for %s on %s "
+ "(DC version: %s, origin: %s)",
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
+ ((node == NULL)? "no node" : node), caller_version, origin);
+
+ task = op->op_type;
+
+ /* Record a successful agent reload as a start, and a failed one as a
+ * monitor, to make life easier for the scheduler when determining the
+ * current state.
+ *
+ * @COMPAT We should check "reload" here only if the operation was for a
+ * pre-OCF-1.1 resource agent, but we don't know that here, and we should
+ * only ever get results for actions scheduled by us, so we can reasonably
+ * assume any "reload" is actually a pre-1.1 agent reload.
+ */
+ if (pcmk__str_any_of(task, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT,
+ NULL)) {
+ if (op->op_status == PCMK_EXEC_DONE) {
+ task = CRMD_ACTION_START;
+ } else {
+ task = CRMD_ACTION_STATUS;
+ }
+ }
+
+ key = pcmk__op_key(op->rsc_id, task, op->interval_ms);
+ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) {
+ const char *n_type = crm_meta_value(op->params, "notify_type");
+ const char *n_task = crm_meta_value(op->params, "notify_operation");
+
+ CRM_LOG_ASSERT(n_type != NULL);
+ CRM_LOG_ASSERT(n_task != NULL);
+ op_id = pcmk__notify_key(op->rsc_id, n_type, n_task);
+
+ if (op->op_status != PCMK_EXEC_PENDING) {
+ /* Ignore notify errors.
+ *
+ * @TODO It might be better to keep the correct result here, and
+ * ignore it in process_graph_event().
+ */
+ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
+ }
+
+ /* Migration history is preserved separately, which usually matters for
+ * multiple nodes and is important for future cluster transitions.
+ */
+ } else if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+ op_id = strdup(key);
+
+ } else if (did_rsc_op_fail(op, target_rc)) {
+ op_id = pcmk__op_key(op->rsc_id, "last_failure", 0);
+ if (op->interval_ms == 0) {
+ // Ensure 'last' gets updated, in case record-pending is true
+ op_id_additional = pcmk__op_key(op->rsc_id, "last", 0);
+ }
+ exit_reason = op->exit_reason;
+
+ } else if (op->interval_ms > 0) {
+ op_id = strdup(key);
+
+ } else {
+ op_id = pcmk__op_key(op->rsc_id, "last", 0);
+ }
+
+ again:
+ xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id);
+ if (xml_op == NULL) {
+ xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
+ }
+
+ if (op->user_data == NULL) {
+ crm_debug("Generating fake transition key for: " PCMK__OP_FMT
+ " %d from %s", op->rsc_id, op->op_type, op->interval_ms,
+ op->call_id, origin);
+ local_user_data = pcmk__transition_key(-1, op->call_id, target_rc,
+ FAKE_TE_ID);
+ op->user_data = local_user_data;
+ }
+
+ if (magic == NULL) {
+ magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc,
+ (const char *) op->user_data);
+ }
+
+ crm_xml_add(xml_op, XML_ATTR_ID, op_id);
+ crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
+ crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
+ crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
+ crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
+ crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
+ crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
+ crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason);
+ crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */
+
+ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
+ crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
+ crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
+ crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
+
+ if (compare_version("2.1", caller_version) <= 0) {
+ if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
+ crm_trace("Timing data (" PCMK__OP_FMT
+ "): last=%u change=%u exec=%u queue=%u",
+ op->rsc_id, op->op_type, op->interval_ms,
+ op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
+
+ if ((op->interval_ms != 0) && (op->t_rcchange != 0)) {
+ // Recurring ops may have changed rc after initial run
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
+ (long long) op->t_rcchange);
+ } else {
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
+ (long long) op->t_run);
+ }
+
+ crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
+ crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
+ }
+ }
+
+ if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
+ /*
+ * Record migrate_source and migrate_target always for migrate ops.
+ */
+ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
+
+ crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
+
+ name = XML_LRM_ATTR_MIGRATE_TARGET;
+ crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
+ }
+
+ add_op_digest_to_xml(op, xml_op);
+
+ if (op_id_additional) {
+ free(op_id);
+ op_id = op_id_additional;
+ op_id_additional = NULL;
+ goto again;
+ }
+
+ if (local_user_data) {
+ free(local_user_data);
+ op->user_data = NULL;
+ }
+ free(magic);
+ free(op_id);
+ free(key);
+ return xml_op;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action shutdown-locks a resource to a node
+ *
+ * If the shutdown-lock cluster property is set, resources will not be recovered
+ * on a different node if cleanly stopped, and may start only on that same node.
+ * This function checks whether that applies to a given action, so that the
+ * transition graph can be marked appropriately.
+ *
+ * \param[in] action Action to check
+ *
+ * \return true if \p action locks its resource to the action's node,
+ * otherwise false
+ */
+bool
+pcmk__action_locks_rsc_to_node(const pe_action_t *action)
+{
+ // Only resource actions taking place on resource's lock node are locked
+ if ((action == NULL) || (action->rsc == NULL)
+ || (action->rsc->lock_node == NULL) || (action->node == NULL)
+ || (action->node->details != action->rsc->lock_node->details)) {
+ return false;
+ }
+
+ /* During shutdown, only stops are locked (otherwise, another action such as
+ * a demote would cause the controller to clear the lock)
+ */
+ if (action->node->details->shutdown && (action->task != NULL)
+ && (strcmp(action->task, RSC_STOP) != 0)) {
+ return false;
+ }
+
+ return true;
+}
+
+/* lowest to highest */
+static gint
+sort_action_id(gconstpointer a, gconstpointer b)
+{
+ const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a;
+ const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b;
+
+ if (a == NULL) {
+ return 1;
+ }
+ if (b == NULL) {
+ return -1;
+ }
+ if (action_wrapper1->action->id < action_wrapper2->action->id) {
+ return 1;
+ }
+ if (action_wrapper1->action->id > action_wrapper2->action->id) {
+ return -1;
+ }
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Remove any duplicate action inputs, merging action flags
+ *
+ * \param[in,out] action Action whose inputs should be checked
+ */
+void
+pcmk__deduplicate_action_inputs(pe_action_t *action)
+{
+ GList *item = NULL;
+ GList *next = NULL;
+ pe_action_wrapper_t *last_input = NULL;
+
+ action->actions_before = g_list_sort(action->actions_before,
+ sort_action_id);
+ for (item = action->actions_before; item != NULL; item = next) {
+ pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
+
+ next = item->next;
+ if ((last_input != NULL)
+ && (input->action->id == last_input->action->id)) {
+ crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
+ input->action->uuid, input->action->id,
+ action->uuid, action->id);
+
+ /* For the purposes of scheduling, the ordering flags no longer
+ * matter, but crm_simulate looks at certain ones when creating a
+ * dot graph. Combining the flags is sufficient for that purpose.
+ */
+ last_input->type |= input->type;
+ if (input->state == pe_link_dumped) {
+ last_input->state = pe_link_dumped;
+ }
+
+ free(item->data);
+ action->actions_before = g_list_delete_link(action->actions_before,
+ item);
+ } else {
+ last_input = input;
+ input->state = pe_link_not_dumped;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Output all scheduled actions
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__output_actions(pe_working_set_t *data_set)
+{
+ pcmk__output_t *out = data_set->priv;
+
+ // Output node (non-resource) actions
+ for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
+ char *node_name = NULL;
+ char *task = NULL;
+ pe_action_t *action = (pe_action_t *) iter->data;
+
+ if (action->rsc != NULL) {
+ continue; // Resource actions will be output later
+
+ } else if (pcmk_is_set(action->flags, pe_action_optional)) {
+ continue; // This action was not scheduled
+ }
+
+ if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
+ task = strdup("Shutdown");
+
+ } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
+ const char *op = g_hash_table_lookup(action->meta, "stonith_action");
+
+ task = crm_strdup_printf("Fence (%s)", op);
+
+ } else {
+ continue; // Don't display other node action types
+ }
+
+ if (pe__is_guest_node(action->node)) {
+ node_name = crm_strdup_printf("%s (resource: %s)",
+ pe__node_name(action->node),
+ action->node->details->remote_rsc->container->id);
+ } else if (action->node != NULL) {
+ node_name = crm_strdup_printf("%s", pe__node_name(action->node));
+ }
+
+ out->message(out, "node-action", task, node_name, action->reason);
+
+ free(node_name);
+ free(task);
+ }
+
+ // Output resource actions
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ rsc->cmds->output_actions(rsc);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether action from resource history is still in configuration
+ *
+ * \param[in] rsc Resource that action is for
+ * \param[in] task Action's name
+ * \param[in] interval_ms Action's interval (in milliseconds)
+ *
+ * \return true if action is still in resource configuration, otherwise false
+ */
+static bool
+action_in_config(const pe_resource_t *rsc, const char *task, guint interval_ms)
+{
+ char *key = pcmk__op_key(rsc->id, task, interval_ms);
+ bool config = (find_rsc_op_entry(rsc, key) != NULL);
+
+ free(key);
+ return config;
+}
+
+/*!
+ * \internal
+ * \brief Get action name needed to compare digest for configuration changes
+ *
+ * \param[in] task Action name from history
+ * \param[in] interval_ms Action interval (in milliseconds)
+ *
+ * \return Action name whose digest should be compared
+ */
+static const char *
+task_for_digest(const char *task, guint interval_ms)
+{
+ /* Certain actions need to be compared against the parameters used to start
+ * the resource.
+ */
+ if ((interval_ms == 0)
+ && pcmk__str_any_of(task, RSC_STATUS, RSC_MIGRATED, RSC_PROMOTE, NULL)) {
+ task = RSC_START;
+ }
+ return task;
+}
+
+/*!
+ * \internal
+ * \brief Check whether only sanitized parameters to an action changed
+ *
+ * When collecting CIB files for troubleshooting, crm_report will mask
+ * sensitive resource parameters. If simulations were run using that, affected
+ * resources would appear to need a restart, which would complicate
+ * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive
+ * parameters. This function used that digest to check whether only masked
+ * parameters are different.
+ *
+ * \param[in] xml_op Resource history entry with secure digest
+ * \param[in] digest_data Operation digest information being compared
+ * \param[in] data_set Cluster working set
+ *
+ * \return true if only sanitized parameters changed, otherwise false
+ */
+static bool
+only_sanitized_changed(const xmlNode *xml_op,
+ const op_digest_cache_t *digest_data,
+ const pe_working_set_t *data_set)
+{
+ const char *digest_secure = NULL;
+
+ if (!pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
+ // The scheduler is not being run as a simulation
+ return false;
+ }
+
+ digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
+
+ return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL)
+ && (digest_data->digest_secure_calc != NULL)
+ && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0);
+}
+
+/*!
+ * \internal
+ * \brief Force a restart due to a configuration change
+ *
+ * \param[in,out] rsc Resource that action is for
+ * \param[in] task Name of action whose configuration changed
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in,out] node Node where resource should be restarted
+ */
+static void
+force_restart(pe_resource_t *rsc, const char *task, guint interval_ms,
+ pe_node_t *node)
+{
+ char *key = pcmk__op_key(rsc->id, task, interval_ms);
+ pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE,
+ rsc->cluster);
+
+ pe_action_set_reason(required, "resource definition change", true);
+ trigger_unfencing(rsc, node, "Device parameters changed", NULL,
+ rsc->cluster);
+}
+
+/*!
+ * \internal
+ * \brief Schedule a reload of a resource on a node
+ *
+ * \param[in,out] rsc Resource to reload
+ * \param[in] node Where resource should be reloaded
+ */
+static void
+schedule_reload(pe_resource_t *rsc, const pe_node_t *node)
+{
+ pe_action_t *reload = NULL;
+
+ // For collective resources, just call recursively for children
+ if (rsc->variant > pe_native) {
+ g_list_foreach(rsc->children, (GFunc) schedule_reload, (gpointer) node);
+ return;
+ }
+
+ // Skip the reload in certain situations
+ if ((node == NULL)
+ || !pcmk_is_set(rsc->flags, pe_rsc_managed)
+ || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s",
+ rsc->id,
+ pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " unmanaged",
+ pcmk_is_set(rsc->flags, pe_rsc_failed)? " failed" : "",
+ (node == NULL)? "inactive" : node->details->uname);
+ return;
+ }
+
+ /* If a resource's configuration changed while a start was pending,
+ * force a full restart instead of a reload.
+ */
+ if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
+ pe_rsc_trace(rsc, "%s: preventing agent reload because start pending",
+ rsc->id);
+ custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
+ rsc->cluster);
+ return;
+ }
+
+ // Schedule the reload
+ pe__set_resource_flags(rsc, pe_rsc_reload);
+ reload = custom_action(rsc, reload_key(rsc), CRMD_ACTION_RELOAD_AGENT, node,
+ FALSE, TRUE, rsc->cluster);
+ pe_action_set_reason(reload, "resource definition change", FALSE);
+
+ // Set orderings so that a required stop or demote cancels the reload
+ pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
+ pe_order_optional|pe_order_then_cancels_first,
+ rsc->cluster);
+ pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
+ pe_order_optional|pe_order_then_cancels_first,
+ rsc->cluster);
+}
+
+/*!
+ * \internal
+ * \brief Handle any configuration change for an action
+ *
+ * Given an action from resource history, if the resource's configuration
+ * changed since the action was done, schedule any actions needed (restart,
+ * reload, unfencing, rescheduling recurring actions, etc.).
+ *
+ * \param[in,out] rsc Resource that action is for
+ * \param[in,out] node Node that action was on
+ * \param[in] xml_op Action XML from resource history
+ *
+ * \return true if action configuration changed, otherwise false
+ */
+bool
+pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
+ const xmlNode *xml_op)
+{
+ guint interval_ms = 0;
+ const char *task = NULL;
+ const op_digest_cache_t *digest_data = NULL;
+
+ CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL),
+ return false);
+
+ task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+ CRM_CHECK(task != NULL, return false);
+
+ crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+
+ // If this is a recurring action, check whether it has been orphaned
+ if (interval_ms > 0) {
+ if (action_in_config(rsc, task, interval_ms)) {
+ pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration",
+ pcmk__readable_interval(interval_ms), task, rsc->id,
+ pe__node_name(node));
+ } else if (pcmk_is_set(rsc->cluster->flags,
+ pe_flag_stop_action_orphans)) {
+ pcmk__schedule_cancel(rsc,
+ crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
+ task, interval_ms, node, "orphan");
+ return true;
+ } else {
+ pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned",
+ pcmk__readable_interval(interval_ms), task, rsc->id,
+ pe__node_name(node));
+ return true;
+ }
+ }
+
+ crm_trace("Checking %s-interval %s for %s on %s for configuration changes",
+ pcmk__readable_interval(interval_ms), task, rsc->id,
+ pe__node_name(node));
+ task = task_for_digest(task, interval_ms);
+ digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster);
+
+ if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) {
+ if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) {
+ pcmk__output_t *out = rsc->cluster->priv;
+
+ out->info(out,
+ "Only 'private' parameters to %s-interval %s for %s "
+ "on %s changed: %s",
+ pcmk__readable_interval(interval_ms), task, rsc->id,
+ pe__node_name(node),
+ crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
+ }
+ return false;
+ }
+
+ switch (digest_data->rc) {
+ case RSC_DIGEST_RESTART:
+ crm_log_xml_debug(digest_data->params_restart, "params:restart");
+ force_restart(rsc, task, interval_ms, node);
+ return true;
+
+ case RSC_DIGEST_ALL:
+ case RSC_DIGEST_UNKNOWN:
+ // Changes that can potentially be handled by an agent reload
+
+ if (interval_ms > 0) {
+ /* Recurring actions aren't reloaded per se, they are just
+ * re-scheduled so the next run uses the new parameters.
+ * The old instance will be cancelled automatically.
+ */
+ crm_log_xml_debug(digest_data->params_all, "params:reschedule");
+ pcmk__reschedule_recurring(rsc, task, interval_ms, node);
+
+ } else if (crm_element_value(xml_op,
+ XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
+ // Agent supports reload, so use it
+ trigger_unfencing(rsc, node,
+ "Device parameters changed (reload)", NULL,
+ rsc->cluster);
+ crm_log_xml_debug(digest_data->params_all, "params:reload");
+ schedule_reload(rsc, node);
+
+ } else {
+ pe_rsc_trace(rsc,
+ "Restarting %s because agent doesn't support reload",
+ rsc->id);
+ crm_log_xml_debug(digest_data->params_restart,
+ "params:restart");
+ force_restart(rsc, task, interval_ms, node);
+ }
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Create a list of resource's action history entries, sorted by call ID
+ *
+ * \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
+ * \param[out] start_index Where to store index of start-like action, if any
+ * \param[out] stop_index Where to store index of stop action, if any
+ */
+static GList *
+rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index)
+{
+ GList *ops = NULL;
+
+ for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP);
+ rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
+ ops = g_list_prepend(ops, rsc_op);
+ }
+ ops = g_list_sort(ops, sort_op_by_callid);
+ calculate_active_ops(ops, start_index, stop_index);
+ return ops;
+}
+
+/*!
+ * \internal
+ * \brief Process a resource's action history from the CIB status
+ *
+ * Given a resource's action history, if the resource's configuration
+ * changed since the actions were done, schedule any actions needed (restart,
+ * reload, unfencing, rescheduling recurring actions, clean-up, etc.).
+ * (This also cancels recurring actions for maintenance mode, which is not
+ * entirely related but convenient to do here.)
+ *
+ * \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
+ * \param[in,out] rsc Resource whose history is being processed
+ * \param[in,out] node Node whose history is being processed
+ */
+static void
+process_rsc_history(const xmlNode *rsc_entry, pe_resource_t *rsc,
+ pe_node_t *node)
+{
+ int offset = -1;
+ int stop_index = 0;
+ int start_index = 0;
+ GList *sorted_op_list = NULL;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
+ if (pe_rsc_is_anon_clone(pe__const_top_resource(rsc, false))) {
+ pe_rsc_trace(rsc,
+ "Skipping configuration check "
+ "for orphaned clone instance %s",
+ rsc->id);
+ } else {
+ pe_rsc_trace(rsc,
+ "Skipping configuration check and scheduling clean-up "
+ "for orphaned resource %s", rsc->id);
+ pcmk__schedule_cleanup(rsc, node, false);
+ }
+ return;
+ }
+
+ if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
+ if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) {
+ pcmk__schedule_cleanup(rsc, node, false);
+ }
+ pe_rsc_trace(rsc,
+ "Skipping configuration check for %s "
+ "because no longer active on %s",
+ rsc->id, pe__node_name(node));
+ return;
+ }
+
+ pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s",
+ rsc->id, pe__node_name(node));
+
+ if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) {
+ pcmk__schedule_cleanup(rsc, node, false);
+ }
+
+ sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index);
+ if (start_index < stop_index) {
+ return; // Resource is stopped
+ }
+
+ for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
+ xmlNode *rsc_op = (xmlNode *) iter->data;
+ const char *task = NULL;
+ guint interval_ms = 0;
+
+ if (++offset < start_index) {
+ // Skip actions that happened before a start
+ continue;
+ }
+
+ task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
+ crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+
+ if ((interval_ms > 0)
+ && (pcmk_is_set(rsc->flags, pe_rsc_maintenance)
+ || node->details->maintenance)) {
+ // Maintenance mode cancels recurring operations
+ pcmk__schedule_cancel(rsc,
+ crm_element_value(rsc_op, XML_LRM_ATTR_CALLID),
+ task, interval_ms, node, "maintenance mode");
+
+ } else if ((interval_ms > 0)
+ || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START,
+ RSC_PROMOTE, RSC_MIGRATED, NULL)) {
+ /* If a resource operation failed, and the operation's definition
+ * has changed, clear any fail count so they can be retried fresh.
+ */
+
+ if (pe__bundle_needs_remote_name(rsc)) {
+ /* We haven't allocated resources to nodes yet, so if the
+ * REMOTE_CONTAINER_HACK is used, we may calculate the digest
+ * based on the literal "#uname" value rather than the properly
+ * substituted value. That would mistakenly make the action
+ * definition appear to have been changed. Defer the check until
+ * later in this case.
+ */
+ pe__add_param_check(rsc_op, rsc, node, pe_check_active,
+ rsc->cluster);
+
+ } else if (pcmk__check_action_config(rsc, node, rsc_op)
+ && (pe_get_failcount(node, rsc, NULL, pe_fc_effective,
+ NULL) != 0)) {
+ pe__clear_failcount(rsc, node, "action definition changed",
+ rsc->cluster);
+ }
+ }
+ }
+ g_list_free(sorted_op_list);
+}
+
+/*!
+ * \internal
+ * \brief Process a node's action history from the CIB status
+ *
+ * Given a node's resource history, if the resource's configuration changed
+ * since the actions were done, schedule any actions needed (restart,
+ * reload, unfencing, rescheduling recurring actions, clean-up, etc.).
+ * (This also cancels recurring actions for maintenance mode, which is not
+ * entirely related but convenient to do here.)
+ *
+ * \param[in,out] node Node whose history is being processed
+ * \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML
+ */
+static void
+process_node_history(pe_node_t *node, const xmlNode *lrm_rscs)
+{
+ crm_trace("Processing node history for %s", pe__node_name(node));
+ for (const xmlNode *rsc_entry = first_named_child(lrm_rscs,
+ XML_LRM_TAG_RESOURCE);
+ rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
+
+ if (xml_has_children(rsc_entry)) {
+ GList *result = pcmk__rscs_matching_id(ID(rsc_entry),
+ node->details->data_set);
+
+ for (GList *iter = result; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ if (rsc->variant == pe_native) {
+ process_rsc_history(rsc_entry, rsc, node);
+ }
+ }
+ g_list_free(result);
+ }
+ }
+}
+
+// XPath to find a node's resource history
+#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
+ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
+ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES
+
+/*!
+ * \internal
+ * \brief Process any resource configuration changes in the CIB status
+ *
+ * Go through all nodes' resource history, and if a resource's configuration
+ * changed since its actions were done, schedule any actions needed (restart,
+ * reload, unfencing, rescheduling recurring actions, clean-up, etc.).
+ * (This also cancels recurring actions for maintenance mode, which is not
+ * entirely related but convenient to do here.)
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__handle_rsc_config_changes(pe_working_set_t *data_set)
+{
+ crm_trace("Check resource and action configuration for changes");
+
+ /* Rather than iterate through the status section, iterate through the nodes
+ * and search for the appropriate status subsection for each. This skips
+ * orphaned nodes and lets us eliminate some cases before searching the XML.
+ */
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+
+ /* Don't bother checking actions for a node that can't run actions ...
+ * unless it's in maintenance mode, in which case we still need to
+ * cancel any existing recurring monitors.
+ */
+ if (node->details->maintenance
+ || pcmk__node_available(node, false, false)) {
+
+ char *xpath = NULL;
+ xmlNode *history = NULL;
+
+ xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname);
+ history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
+ free(xpath);
+
+ process_node_history(node, history);
+ }
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
new file mode 100644
index 0000000..5682744
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+#define PE__VARIANT_BUNDLE 1
+#include <lib/pengine/variant.h>
+
+static bool
+is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node)
+{
+ for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ if (node->details == replica->node->details) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Assign a bundle resource to a node
+ *
+ * \param[in,out] rsc Resource to assign to a node
+ * \param[in] prefer Node to prefer, if all else is equal
+ *
+ * \return Node that \p rsc is assigned to, if assigned entirely to one node
+ */
+pe_node_t *
+pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer)
+{
+ GList *containers = NULL;
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return NULL);
+
+ get_bundle_variant_data(bundle_data, rsc);
+
+ pe__set_resource_flags(rsc, pe_rsc_allocating);
+ containers = pe__bundle_containers(rsc);
+
+ pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
+ rsc, __func__, rsc->allowed_nodes, rsc->cluster);
+
+ containers = g_list_sort(containers, pcmk__cmp_instance);
+ pcmk__assign_instances(rsc, containers, bundle_data->nreplicas,
+ bundle_data->nreplicas_per_host);
+ g_list_free(containers);
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+ pe_node_t *container_host = NULL;
+
+ CRM_ASSERT(replica);
+ if (replica->ip) {
+ pe_rsc_trace(rsc, "Allocating bundle %s IP %s",
+ rsc->id, replica->ip->id);
+ replica->ip->cmds->assign(replica->ip, prefer);
+ }
+
+ container_host = replica->container->allocated_to;
+ if (replica->remote && pe__is_guest_or_remote_node(container_host)) {
+ /* We need 'nested' connection resources to be on the same
+ * host because pacemaker-remoted only supports a single
+ * active connection
+ */
+ pcmk__new_colocation("child-remote-with-docker-remote", NULL,
+ INFINITY, replica->remote,
+ container_host->details->remote_rsc, NULL,
+ NULL, true, rsc->cluster);
+ }
+
+ if (replica->remote) {
+ pe_rsc_trace(rsc, "Allocating bundle %s connection %s",
+ rsc->id, replica->remote->id);
+ replica->remote->cmds->assign(replica->remote, prefer);
+ }
+
+ // Explicitly allocate replicas' children before bundle child
+ if (replica->child) {
+ pe_node_t *node = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
+ if (node->details != replica->node->details) {
+ node->weight = -INFINITY;
+ } else if (!pcmk__threshold_reached(replica->child, node,
+ NULL)) {
+ node->weight = INFINITY;
+ }
+ }
+
+ pe__set_resource_flags(replica->child->parent, pe_rsc_allocating);
+ pe_rsc_trace(rsc, "Allocating bundle %s replica child %s",
+ rsc->id, replica->child->id);
+ replica->child->cmds->assign(replica->child, replica->node);
+ pe__clear_resource_flags(replica->child->parent,
+ pe_rsc_allocating);
+ }
+ }
+
+ if (bundle_data->child) {
+ pe_node_t *node = NULL;
+ GHashTableIter iter;
+ g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
+ if (is_bundle_node(bundle_data, node)) {
+ node->weight = 0;
+ } else {
+ node->weight = -INFINITY;
+ }
+ }
+ pe_rsc_trace(rsc, "Allocating bundle %s child %s",
+ rsc->id, bundle_data->child->id);
+ bundle_data->child->cmds->assign(bundle_data->child, prefer);
+ }
+
+ pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
+ return NULL;
+}
+
+
+void
+pcmk__bundle_create_actions(pe_resource_t *rsc)
+{
+ pe_action_t *action = NULL;
+ GList *containers = NULL;
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return);
+
+ containers = pe__bundle_containers(rsc);
+ get_bundle_variant_data(bundle_data, rsc);
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ CRM_ASSERT(replica);
+ if (replica->ip) {
+ replica->ip->cmds->create_actions(replica->ip);
+ }
+ if (replica->container) {
+ replica->container->cmds->create_actions(replica->container);
+ }
+ if (replica->remote) {
+ replica->remote->cmds->create_actions(replica->remote);
+ }
+ }
+
+ pcmk__create_instance_actions(rsc, containers);
+
+ if (bundle_data->child) {
+ bundle_data->child->cmds->create_actions(bundle_data->child);
+
+ if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
+ /* promote */
+ pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true);
+ action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true);
+ action->priority = INFINITY;
+
+ /* demote */
+ pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true);
+ action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true);
+ action->priority = INFINITY;
+ }
+ }
+
+ g_list_free(containers);
+}
+
+void
+pcmk__bundle_internal_constraints(pe_resource_t *rsc)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return);
+
+ get_bundle_variant_data(bundle_data, rsc);
+
+ if (bundle_data->child) {
+ pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child,
+ RSC_START, pe_order_implies_first_printed);
+ pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child,
+ RSC_STOP, pe_order_implies_first_printed);
+
+ if (bundle_data->child->children) {
+ pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc,
+ RSC_STARTED,
+ pe_order_implies_then_printed);
+ pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc,
+ RSC_STOPPED,
+ pe_order_implies_then_printed);
+ } else {
+ pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc,
+ RSC_STARTED,
+ pe_order_implies_then_printed);
+ pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc,
+ RSC_STOPPED,
+ pe_order_implies_then_printed);
+ }
+ }
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ CRM_ASSERT(replica);
+ CRM_ASSERT(replica->container);
+
+ replica->container->cmds->internal_constraints(replica->container);
+
+ pcmk__order_starts(rsc, replica->container,
+ pe_order_runnable_left|pe_order_implies_first_printed);
+
+ if (replica->child) {
+ pcmk__order_stops(rsc, replica->child,
+ pe_order_implies_first_printed);
+ }
+ pcmk__order_stops(rsc, replica->container,
+ pe_order_implies_first_printed);
+ pcmk__order_resource_actions(replica->container, RSC_START, rsc,
+ RSC_STARTED,
+ pe_order_implies_then_printed);
+ pcmk__order_resource_actions(replica->container, RSC_STOP, rsc,
+ RSC_STOPPED,
+ pe_order_implies_then_printed);
+
+ if (replica->ip) {
+ replica->ip->cmds->internal_constraints(replica->ip);
+
+ // Start IP then container
+ pcmk__order_starts(replica->ip, replica->container,
+ pe_order_runnable_left|pe_order_preserve);
+ pcmk__order_stops(replica->container, replica->ip,
+ pe_order_implies_first|pe_order_preserve);
+
+ pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip,
+ replica->container, NULL, NULL, true,
+ rsc->cluster);
+ }
+
+ if (replica->remote) {
+ /* This handles ordering and colocating remote relative to container
+ * (via "resource-with-container"). Since IP is also ordered and
+ * colocated relative to the container, we don't need to do anything
+ * explicit here with IP.
+ */
+ replica->remote->cmds->internal_constraints(replica->remote);
+ }
+
+ if (replica->child) {
+ CRM_ASSERT(replica->remote);
+
+ // "Start remote then child" is implicit in scheduler's remote logic
+ }
+
+ }
+
+ if (bundle_data->child) {
+ bundle_data->child->cmds->internal_constraints(bundle_data->child);
+ if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
+ pcmk__promotable_restart_ordering(rsc);
+
+ /* child demoted before global demoted */
+ pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc,
+ RSC_DEMOTED,
+ pe_order_implies_then_printed);
+
+ /* global demote before child demote */
+ pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child,
+ RSC_DEMOTE,
+ pe_order_implies_first_printed);
+
+ /* child promoted before global promoted */
+ pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc,
+ RSC_PROMOTED,
+ pe_order_implies_then_printed);
+
+ /* global promote before child promote */
+ pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child,
+ RSC_PROMOTE,
+ pe_order_implies_first_printed);
+ }
+ }
+}
+
+static pe_resource_t *
+compatible_replica_for_node(const pe_resource_t *rsc_lh,
+ const pe_node_t *candidate,
+ const pe_resource_t *rsc, enum rsc_role_e filter,
+ gboolean current)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(candidate != NULL, return NULL);
+ get_bundle_variant_data(bundle_data, rsc);
+
+ crm_trace("Looking for compatible child from %s for %s on %s",
+ rsc_lh->id, rsc->id, pe__node_name(candidate));
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ if (pcmk__instance_matches(replica->container, candidate, filter,
+ current)) {
+ crm_trace("Pairing %s with %s on %s",
+ rsc_lh->id, replica->container->id,
+ pe__node_name(candidate));
+ return replica->container;
+ }
+ }
+
+ crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id);
+ return NULL;
+}
+
+static pe_resource_t *
+compatible_replica(const pe_resource_t *rsc_lh, const pe_resource_t *rsc,
+ enum rsc_role_e filter, gboolean current,
+ pe_working_set_t *data_set)
+{
+ GList *scratch = NULL;
+ pe_resource_t *pair = NULL;
+ pe_node_t *active_node_lh = NULL;
+
+ active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current);
+ if (active_node_lh) {
+ return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter,
+ current);
+ }
+
+ scratch = g_hash_table_get_values(rsc_lh->allowed_nodes);
+ scratch = pcmk__sort_nodes(scratch, NULL);
+
+ for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) {
+ pe_node_t *node = (pe_node_t *) gIter->data;
+
+ pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current);
+ if (pair) {
+ goto done;
+ }
+ }
+
+ pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none"));
+ done:
+ g_list_free(scratch);
+ return pair;
+}
+
+int copies_per_node(pe_resource_t * rsc)
+{
+ /* Strictly speaking, there should be a 'copies_per_node' addition
+ * to the resource function table and each case would be a
+ * function. However that would be serious overkill to return an
+ * int. In fact, it seems to me that both function tables
+ * could/should be replaced by resources.{c,h} full of
+ * rsc_{some_operation} functions containing a switch as below
+ * which calls out to functions named {variant}_{some_operation}
+ * as needed.
+ */
+ switch(rsc->variant) {
+ case pe_unknown:
+ return 0;
+ case pe_native:
+ case pe_group:
+ return 1;
+ case pe_clone:
+ {
+ const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
+
+ if (max_clones_node == NULL) {
+ return 1;
+
+ } else {
+ int max_i;
+
+ pcmk__scan_min_int(max_clones_node, &max_i, 0);
+ return max_i;
+ }
+ }
+ case pe_container:
+ {
+ pe__bundle_variant_data_t *data = NULL;
+ get_bundle_variant_data(data, rsc);
+ return data->nreplicas_per_host;
+ }
+ }
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint, apply its score to the dependent's
+ * allowed node weights (if we are still placing resources) or priority (if
+ * we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ * \param[in] for_dependent true if called on behalf of dependent
+ */
+void
+pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent)
+{
+ GList *allocated_primaries = NULL;
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ /* This should never be called for the bundle itself as a dependent.
+ * Instead, we add its colocation constraints to its replicas and call the
+ * apply_coloc_score() for the replicas as dependents.
+ */
+ CRM_ASSERT(!for_dependent);
+
+ CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
+ return);
+ CRM_ASSERT(dependent->variant == pe_native);
+
+ if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ pe_rsc_trace(primary, "%s is still provisional", primary->id);
+ return;
+
+ } else if (colocation->dependent->variant > pe_group) {
+ pe_resource_t *primary_replica = compatible_replica(dependent, primary,
+ RSC_ROLE_UNKNOWN,
+ FALSE,
+ dependent->cluster);
+
+ if (primary_replica) {
+ pe_rsc_debug(primary, "Pairing %s with %s",
+ dependent->id, primary_replica->id);
+ dependent->cmds->apply_coloc_score(dependent, primary_replica,
+ colocation, true);
+
+ } else if (colocation->score >= INFINITY) {
+ crm_notice("Cannot pair %s with instance of %s",
+ dependent->id, primary->id);
+ pcmk__assign_resource(dependent, NULL, true);
+
+ } else {
+ pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
+ dependent->id, primary->id);
+ }
+
+ return;
+ }
+
+ get_bundle_variant_data(bundle_data, primary);
+ pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
+ colocation->id, dependent->id, primary->id, colocation->score);
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ if (colocation->score < INFINITY) {
+ replica->container->cmds->apply_coloc_score(dependent,
+ replica->container,
+ colocation, false);
+
+ } else {
+ pe_node_t *chosen = replica->container->fns->location(replica->container,
+ NULL, FALSE);
+
+ if ((chosen == NULL)
+ || is_set_recursive(replica->container, pe_rsc_block, TRUE)) {
+ continue;
+ }
+ if ((colocation->primary_role >= RSC_ROLE_PROMOTED)
+ && (replica->child == NULL)) {
+ continue;
+ }
+ if ((colocation->primary_role >= RSC_ROLE_PROMOTED)
+ && (replica->child->next_role < RSC_ROLE_PROMOTED)) {
+ continue;
+ }
+
+ pe_rsc_trace(primary, "Allowing %s: %s %d",
+ colocation->id, pe__node_name(chosen), chosen->weight);
+ allocated_primaries = g_list_prepend(allocated_primaries, chosen);
+ }
+ }
+
+ if (colocation->score >= INFINITY) {
+ node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE);
+ }
+ g_list_free(allocated_primaries);
+}
+
+// Bundle implementation of resource_alloc_functions_t:with_this_colocations()
+void
+pcmk__with_bundle_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_container)
+ && (orig_rsc != NULL) && (list != NULL),
+ return);
+
+ if (rsc == orig_rsc) { // Colocations are wanted for bundle itself
+ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs);
+
+ // Only the bundle replicas' containers get the bundle's constraints
+ } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) {
+ pcmk__add_collective_constraints(list, orig_rsc, rsc, true);
+ }
+}
+
+// Bundle implementation of resource_alloc_functions_t:this_with_colocations()
+void
+pcmk__bundle_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_container)
+ && (orig_rsc != NULL) && (list != NULL),
+ return);
+
+ if (rsc == orig_rsc) { // Colocations are wanted for bundle itself
+ pcmk__add_this_with_list(list, rsc->rsc_cons);
+
+ // Only the bundle replicas' containers get the bundle's constraints
+ } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) {
+ pcmk__add_collective_constraints(list, orig_rsc, rsc, false);
+ }
+}
+
+enum pe_action_flags
+pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node)
+{
+ GList *containers = NULL;
+ enum pe_action_flags flags = 0;
+ pe__bundle_variant_data_t *data = NULL;
+
+ get_bundle_variant_data(data, action->rsc);
+ if(data->child) {
+ enum action_tasks task = get_complex_task(data->child, action->task);
+ switch(task) {
+ case no_action:
+ case action_notify:
+ case action_notified:
+ case action_promote:
+ case action_promoted:
+ case action_demote:
+ case action_demoted:
+ return pcmk__collective_action_flags(action,
+ data->child->children,
+ node);
+ default:
+ break;
+ }
+ }
+
+ containers = pe__bundle_containers(action->rsc);
+ flags = pcmk__collective_action_flags(action, containers, node);
+ g_list_free(containers);
+ return flags;
+}
+
+/*!
+ * \internal
+ * \brief Get containerized resource corresponding to a given bundle container
+ *
+ * \param[in] instance Collective instance that might be a bundle container
+ *
+ * \return Bundled resource instance inside \p instance if it is a bundle
+ * container instance, otherwise NULL
+ */
+const pe_resource_t *
+pcmk__get_rsc_in_container(const pe_resource_t *instance)
+{
+ const pe__bundle_variant_data_t *data = NULL;
+ const pe_resource_t *top = pe__const_top_resource(instance, true);
+
+ if ((top == NULL) || (top->variant != pe_container)) {
+ return NULL;
+ }
+ get_bundle_variant_data(data, top);
+
+ for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) {
+ const pe__bundle_replica_t *replica = iter->data;
+
+ if (instance == replica->container) {
+ return replica->child;
+ }
+ }
+ return NULL;
+}
+
+void
+pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+ get_bundle_variant_data(bundle_data, rsc);
+
+ pcmk__apply_location(rsc, constraint);
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ if (replica->container) {
+ replica->container->cmds->apply_location(replica->container,
+ constraint);
+ }
+ if (replica->ip) {
+ replica->ip->cmds->apply_location(replica->ip, constraint);
+ }
+ }
+
+ if (bundle_data->child
+ && ((constraint->role_filter == RSC_ROLE_UNPROMOTED)
+ || (constraint->role_filter == RSC_ROLE_PROMOTED))) {
+ bundle_data->child->cmds->apply_location(bundle_data->child,
+ constraint);
+ bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location,
+ constraint);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a resource's actions to the transition graph
+ *
+ * \param[in,out] rsc Resource whose actions should be added
+ */
+void
+pcmk__bundle_expand(pe_resource_t *rsc)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return);
+
+ get_bundle_variant_data(bundle_data, rsc);
+
+ if (bundle_data->child) {
+ bundle_data->child->cmds->add_actions_to_graph(bundle_data->child);
+ }
+
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ CRM_ASSERT(replica);
+ if (replica->remote && replica->container
+ && pe__bundle_needs_remote_name(replica->remote)) {
+
+ /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
+ * run pacemaker-remoted inside, without needing a separate IP for
+ * the container. This is done by configuring the inner remote's
+ * connection host as the magic string "#uname", then
+ * replacing it with the underlying host when needed.
+ */
+ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']",
+ replica->remote->xml, LOG_ERR);
+ const char *calculated_addr = NULL;
+
+ // Replace the value in replica->remote->xml (if appropriate)
+ calculated_addr = pe__add_bundle_remote_name(replica->remote,
+ rsc->cluster,
+ nvpair, "value");
+ if (calculated_addr) {
+ /* Since this is for the bundle as a resource, and not any
+ * particular action, replace the value in the default
+ * parameters (not evaluated for node). create_graph_action()
+ * will grab it from there to replace it in node-evaluated
+ * parameters.
+ */
+ GHashTable *params = pe_rsc_params(replica->remote,
+ NULL, rsc->cluster);
+
+ g_hash_table_replace(params,
+ strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
+ strdup(calculated_addr));
+ } else {
+ /* The only way to get here is if the remote connection is
+ * neither currently running nor scheduled to run. That means we
+ * won't be doing any operations that require addr (only start
+ * requires it; we additionally use it to compare digests when
+ * unpacking status, promote, and migrate_from history, but
+ * that's already happened by this point).
+ */
+ crm_info("Unable to determine address for bundle %s remote connection",
+ rsc->id);
+ }
+ }
+ if (replica->ip) {
+ replica->ip->cmds->add_actions_to_graph(replica->ip);
+ }
+ if (replica->container) {
+ replica->container->cmds->add_actions_to_graph(replica->container);
+ }
+ if (replica->remote) {
+ replica->remote->cmds->add_actions_to_graph(replica->remote);
+ }
+ }
+}
+
+/*!
+ * \internal
+ *
+ * \brief Schedule any probes needed for a resource on a node
+ *
+ * \param[in,out] rsc Resource to create probe for
+ * \param[in,out] node Node to create probe on
+ *
+ * \return true if any probe was created, otherwise false
+ */
+bool
+pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node)
+{
+ bool any_created = false;
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return false);
+
+ get_bundle_variant_data(bundle_data, rsc);
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ CRM_ASSERT(replica);
+ if ((replica->ip != NULL)
+ && replica->ip->cmds->create_probe(replica->ip, node)) {
+ any_created = true;
+ }
+ if ((replica->child != NULL) && (node->details == replica->node->details)
+ && replica->child->cmds->create_probe(replica->child, node)) {
+ any_created = true;
+ }
+ if ((replica->container != NULL)
+ && replica->container->cmds->create_probe(replica->container,
+ node)) {
+ any_created = true;
+
+ /* If we're limited to one replica per host (due to
+ * the lack of an IP range probably), then we don't
+ * want any of our peer containers starting until
+ * we've established that no other copies are already
+ * running.
+ *
+ * Partly this is to ensure that nreplicas_per_host is
+ * observed, but also to ensure that the containers
+ * don't fail to start because the necessary port
+ * mappings (which won't include an IP for uniqueness)
+ * are already taken
+ */
+
+ for (GList *tIter = bundle_data->replicas;
+ tIter && (bundle_data->nreplicas_per_host == 1);
+ tIter = tIter->next) {
+ pe__bundle_replica_t *other = tIter->data;
+
+ if ((other != replica) && (other != NULL)
+ && (other->container != NULL)) {
+
+ pcmk__new_ordering(replica->container,
+ pcmk__op_key(replica->container->id, RSC_STATUS, 0),
+ NULL, other->container,
+ pcmk__op_key(other->container->id, RSC_START, 0),
+ NULL,
+ pe_order_optional|pe_order_same_node,
+ rsc->cluster);
+ }
+ }
+ }
+ if ((replica->container != NULL) && (replica->remote != NULL)
+ && replica->remote->cmds->create_probe(replica->remote, node)) {
+
+ /* Do not probe the remote resource until we know where the
+ * container is running. This is required for REMOTE_CONTAINER_HACK
+ * to correctly probe remote resources.
+ */
+ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS,
+ 0);
+ pe_action_t *probe = find_first_action(replica->remote->actions,
+ probe_uuid, NULL, node);
+
+ free(probe_uuid);
+ if (probe != NULL) {
+ any_created = true;
+ crm_trace("Ordering %s probe on %s",
+ replica->remote->id, pe__node_name(node));
+ pcmk__new_ordering(replica->container,
+ pcmk__op_key(replica->container->id, RSC_START, 0),
+ NULL, replica->remote, NULL, probe,
+ pe_order_probe, rsc->cluster);
+ }
+ }
+ }
+ return any_created;
+}
+
+void
+pcmk__output_bundle_actions(pe_resource_t *rsc)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+
+ CRM_CHECK(rsc != NULL, return);
+
+ get_bundle_variant_data(bundle_data, rsc);
+ for (GList *gIter = bundle_data->replicas; gIter != NULL;
+ gIter = gIter->next) {
+ pe__bundle_replica_t *replica = gIter->data;
+
+ CRM_ASSERT(replica);
+ if (replica->ip != NULL) {
+ replica->ip->cmds->output_actions(replica->ip);
+ }
+ if (replica->container != NULL) {
+ replica->container->cmds->output_actions(replica->container);
+ }
+ if (replica->remote != NULL) {
+ replica->remote->cmds->output_actions(replica->remote);
+ }
+ if (replica->child != NULL) {
+ replica->child->cmds->output_actions(replica->child);
+ }
+ }
+}
+
+// Bundle implementation of resource_alloc_functions_t:add_utilization()
+void
+pcmk__bundle_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization)
+{
+ pe__bundle_variant_data_t *bundle_data = NULL;
+ pe__bundle_replica_t *replica = NULL;
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return;
+ }
+
+ get_bundle_variant_data(bundle_data, rsc);
+ if (bundle_data->replicas == NULL) {
+ return;
+ }
+
+ /* All bundle replicas are identical, so using the utilization of the first
+ * is sufficient for any. Only the implicit container resource can have
+ * utilization values.
+ */
+ replica = (pe__bundle_replica_t *) bundle_data->replicas->data;
+ if (replica->container != NULL) {
+ replica->container->cmds->add_utilization(replica->container, orig_rsc,
+ all_rscs, utilization);
+ }
+}
+
+// Bundle implementation of resource_alloc_functions_t:shutdown_lock()
+void
+pcmk__bundle_shutdown_lock(pe_resource_t *rsc)
+{
+ return; // Bundles currently don't support shutdown locks
+}
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
new file mode 100644
index 0000000..934f512
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -0,0 +1,643 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Assign a clone resource's instances to nodes
+ *
+ * \param[in,out] rsc Clone resource to assign
+ * \param[in] prefer Node to prefer, if all else is equal
+ *
+ * \return NULL (clones are not assigned to a single node)
+ */
+pe_node_t *
+pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer)
+{
+ CRM_ASSERT(pe_rsc_is_clone(rsc));
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return NULL; // Assignment has already been done
+ }
+
+ // Detect assignment loops
+ if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
+ pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
+ return NULL;
+ }
+ pe__set_resource_flags(rsc, pe_rsc_allocating);
+
+ // If this clone is promotable, consider nodes' promotion scores
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ pcmk__add_promotion_scores(rsc);
+ }
+
+ /* If this clone is colocated with any other resources, assign those first.
+ * Since the this_with_colocations() method boils down to a copy of rsc_cons
+ * for clones, we can use that here directly for efficiency.
+ */
+ for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
+ pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data;
+
+ pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first",
+ rsc->id, constraint->id, constraint->primary->id);
+ constraint->primary->cmds->assign(constraint->primary, prefer);
+ }
+
+ /* If any resources are colocated with this one, consider their preferences.
+ * Because the with_this_colocations() method boils down to a copy of
+ * rsc_cons_lhs for clones, we can use that here directly for efficiency.
+ */
+ g_list_foreach(rsc->rsc_cons_lhs, pcmk__add_dependent_scores, rsc);
+
+ pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
+ rsc, __func__, rsc->allowed_nodes, rsc->cluster);
+
+ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance);
+ pcmk__assign_instances(rsc, rsc->children, pe__clone_max(rsc),
+ pe__clone_node_max(rsc));
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ pcmk__set_instance_roles(rsc);
+ }
+
+ pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
+ pe_rsc_trace(rsc, "Assigned clone %s", rsc->id);
+ return NULL;
+}
+
+static pe_action_t *
+find_rsc_action(pe_resource_t *rsc, const char *task)
+{
+ pe_action_t *match = NULL;
+ GList *actions = pe__resource_actions(rsc, NULL, task, FALSE);
+
+ for (GList *item = actions; item != NULL; item = item->next) {
+ pe_action_t *op = (pe_action_t *) item->data;
+
+ if (!pcmk_is_set(op->flags, pe_action_optional)) {
+ if (match != NULL) {
+ // More than one match, don't return any
+ match = NULL;
+ break;
+ }
+ match = op;
+ }
+ }
+ g_list_free(actions);
+ return match;
+}
+
+/*!
+ * \internal
+ * \brief Order starts and stops of an ordered clone's instances
+ *
+ * \param[in,out] rsc Clone resource
+ */
+static void
+order_instance_starts_stops(pe_resource_t *rsc)
+{
+ pe_action_t *last_stop = NULL;
+ pe_action_t *last_start = NULL;
+
+ // Instances must be ordered by ascending instance number, so sort them
+ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
+
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
+ pe_action_t *action = NULL;
+
+ // Order this instance's stop after previous instance's stop
+ // @TODO: Should instances be stopped in reverse order instead?
+ action = find_rsc_action(child, RSC_STOP);
+ if (action != NULL) {
+ if (last_stop != NULL) {
+ order_actions(action, last_stop, pe_order_optional);
+ }
+ last_stop = action;
+ }
+
+ // Order this instance's start after previous instance's start
+ action = find_rsc_action(child, RSC_START);
+ if (action != NULL) {
+ if (last_start != NULL) {
+ order_actions(last_start, action, pe_order_optional);
+ }
+ last_start = action;
+ }
+ }
+}
+
+void
+clone_create_actions(pe_resource_t *rsc)
+{
+ pe_rsc_debug(rsc, "Creating actions for clone %s", rsc->id);
+ pcmk__create_instance_actions(rsc, rsc->children);
+ if (pe__clone_is_ordered(rsc)) {
+ order_instance_starts_stops(rsc);
+ }
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ pcmk__create_promotable_actions(rsc);
+ }
+}
+
+void
+clone_internal_constraints(pe_resource_t *rsc)
+{
+ pe_resource_t *last_rsc = NULL;
+ GList *gIter;
+ bool ordered = pe__clone_is_ordered(rsc);
+
+ pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
+ pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
+ pe_order_runnable_left);
+ pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
+ pe_order_runnable_left);
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
+ pe_order_runnable_left);
+ }
+
+ if (ordered) {
+ /* we have to maintain a consistent sorted child list when building order constraints */
+ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
+ }
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+
+ child_rsc->cmds->internal_constraints(child_rsc);
+
+ pcmk__order_starts(rsc, child_rsc,
+ pe_order_runnable_left|pe_order_implies_first_printed);
+ pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
+ pe_order_implies_then_printed);
+ if (ordered && (last_rsc != NULL)) {
+ pcmk__order_starts(last_rsc, child_rsc, pe_order_optional);
+ }
+
+ pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed);
+ pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
+ pe_order_implies_then_printed);
+ if (ordered && (last_rsc != NULL)) {
+ pcmk__order_stops(child_rsc, last_rsc, pe_order_optional);
+ }
+
+ last_rsc = child_rsc;
+ }
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ pcmk__order_promotable_instances(rsc);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint, apply its score to the dependent's
+ * allowed node weights (if we are still placing resources) or priority (if
+ * we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ * \param[in] for_dependent true if called on behalf of dependent
+ */
+void
+pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent)
+{
+ GList *gIter = NULL;
+ gboolean do_interleave = FALSE;
+ const char *interleave_s = NULL;
+
+ /* This should never be called for the clone itself as a dependent. Instead,
+ * we add its colocation constraints to its instances and call the
+ * apply_coloc_score() for the instances as dependents.
+ */
+ CRM_ASSERT(!for_dependent);
+
+ CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
+ return);
+ CRM_CHECK(dependent->variant == pe_native, return);
+
+ pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
+ colocation->id, dependent->id, primary->id, colocation->score);
+
+ if (pcmk_is_set(primary->flags, pe_rsc_promotable)) {
+ if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ // We haven't placed the primary yet, so we can't apply colocation
+ pe_rsc_trace(primary, "%s is still provisional", primary->id);
+ return;
+
+ } else if (colocation->primary_role == RSC_ROLE_UNKNOWN) {
+ // This isn't a role-specfic colocation, so handle normally
+ pe_rsc_trace(primary, "Handling %s as a clone colocation",
+ colocation->id);
+
+ } else if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
+ // We're placing the dependent
+ pcmk__update_dependent_with_promotable(primary, dependent,
+ colocation);
+ return;
+
+ } else if (colocation->dependent_role == RSC_ROLE_PROMOTED) {
+ // We're choosing roles for the dependent
+ pcmk__update_promotable_dependent_priority(primary, dependent,
+ colocation);
+ return;
+ }
+ }
+
+ // Only the dependent needs to be marked for interleave
+ interleave_s = g_hash_table_lookup(colocation->dependent->meta,
+ XML_RSC_ATTR_INTERLEAVE);
+ if (crm_is_true(interleave_s)
+ && (colocation->dependent->variant > pe_group)) {
+ /* @TODO Do we actually care about multiple primary copies sharing a
+ * dependent copy anymore?
+ */
+ if (copies_per_node(colocation->dependent) != copies_per_node(colocation->primary)) {
+ pcmk__config_err("Cannot interleave %s and %s because they do not "
+ "support the same number of instances per node",
+ colocation->dependent->id,
+ colocation->primary->id);
+
+ } else {
+ do_interleave = TRUE;
+ }
+ }
+
+ if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ pe_rsc_trace(primary, "%s is still provisional", primary->id);
+ return;
+
+ } else if (do_interleave) {
+ pe_resource_t *primary_instance = NULL;
+
+ primary_instance = pcmk__find_compatible_instance(dependent, primary,
+ RSC_ROLE_UNKNOWN,
+ false);
+ if (primary_instance != NULL) {
+ pe_rsc_debug(primary, "Pairing %s with %s",
+ dependent->id, primary_instance->id);
+ dependent->cmds->apply_coloc_score(dependent, primary_instance,
+ colocation, true);
+
+ } else if (colocation->score >= INFINITY) {
+ crm_notice("Cannot pair %s with instance of %s",
+ dependent->id, primary->id);
+ pcmk__assign_resource(dependent, NULL, true);
+
+ } else {
+ pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
+ dependent->id, primary->id);
+ }
+
+ return;
+
+ } else if (colocation->score >= INFINITY) {
+ GList *affected_nodes = NULL;
+
+ gIter = primary->children;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+ pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
+
+ if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
+ pe_rsc_trace(primary, "Allowing %s: %s %d",
+ colocation->id, pe__node_name(chosen),
+ chosen->weight);
+ affected_nodes = g_list_prepend(affected_nodes, chosen);
+ }
+ }
+
+ node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE);
+ g_list_free(affected_nodes);
+ return;
+ }
+
+ gIter = primary->children;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+
+ child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation,
+ false);
+ }
+}
+
+// Clone implementation of resource_alloc_functions_t:with_this_colocations()
+void
+pcmk__with_clone_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
+
+ if (rsc == orig_rsc) { // Colocations are wanted for clone itself
+ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs);
+ } else {
+ pcmk__add_collective_constraints(list, orig_rsc, rsc, true);
+ }
+}
+
+// Clone implementation of resource_alloc_functions_t:this_with_colocations()
+void
+pcmk__clone_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
+
+ if (rsc == orig_rsc) { // Colocations are wanted for clone itself
+ pcmk__add_this_with_list(list, rsc->rsc_cons);
+ } else {
+ pcmk__add_collective_constraints(list, orig_rsc, rsc, false);
+ }
+}
+
+enum pe_action_flags
+clone_action_flags(pe_action_t *action, const pe_node_t *node)
+{
+ return pcmk__collective_action_flags(action, action->rsc->children, node);
+}
+
+void
+clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
+{
+ GList *gIter = rsc->children;
+
+ pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
+
+ pcmk__apply_location(rsc, constraint);
+
+ for (; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+
+ child_rsc->cmds->apply_location(child_rsc, constraint);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a resource's actions to the transition graph
+ *
+ * \param[in,out] rsc Resource whose actions should be added
+ */
+void
+clone_expand(pe_resource_t *rsc)
+{
+ GList *gIter = NULL;
+
+ g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL);
+
+ pe__create_clone_notifications(rsc);
+
+ /* Now that the notifcations have been created we can expand the children */
+
+ gIter = rsc->children;
+ for (; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+
+ child_rsc->cmds->add_actions_to_graph(child_rsc);
+ }
+
+ pcmk__add_rsc_actions_to_graph(rsc);
+
+ /* The notifications are in the graph now, we can destroy the notify_data */
+ pe__free_clone_notification_data(rsc);
+}
+
+// Check whether a resource or any of its children is known on node
+static bool
+rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ if (rsc->children) {
+ for (GList *child_iter = rsc->children; child_iter != NULL;
+ child_iter = child_iter->next) {
+
+ pe_resource_t *child = (pe_resource_t *) child_iter->data;
+
+ if (rsc_known_on(child, node)) {
+ return TRUE;
+ }
+ }
+
+ } else if (rsc->known_on) {
+ GHashTableIter iter;
+ pe_node_t *known_node = NULL;
+
+ g_hash_table_iter_init(&iter, rsc->known_on);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
+ if (node->details == known_node->details) {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+// Look for an instance of clone that is known on node
+static pe_resource_t *
+find_instance_on(const pe_resource_t *clone, const pe_node_t *node)
+{
+ for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child = (pe_resource_t *) gIter->data;
+
+ if (rsc_known_on(child, node)) {
+ return child;
+ }
+ }
+ return NULL;
+}
+
+// For anonymous clones, only a single instance needs to be probed
+static bool
+probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
+ pe_working_set_t *data_set)
+{
+ // First, check if we probed an instance on this node last time
+ pe_resource_t *child = find_instance_on(rsc, node);
+
+ // Otherwise, check if we plan to start an instance on this node
+ if (child == NULL) {
+ for (GList *child_iter = rsc->children; child_iter && !child;
+ child_iter = child_iter->next) {
+
+ pe_node_t *local_node = NULL;
+ pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data;
+
+ if (child_rsc) { /* make clang analyzer happy */
+ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
+ if (local_node && (local_node->details == node->details)) {
+ child = child_rsc;
+ }
+ }
+ }
+ }
+
+ // Otherwise, use the first clone instance
+ if (child == NULL) {
+ child = rsc->children->data;
+ }
+ CRM_ASSERT(child);
+ return child->cmds->create_probe(child, node);
+}
+
+/*!
+ * \internal
+ *
+ * \brief Schedule any probes needed for a resource on a node
+ *
+ * \param[in,out] rsc Resource to create probe for
+ * \param[in,out] node Node to create probe on
+ *
+ * \return true if any probe was created, otherwise false
+ */
+bool
+clone_create_probe(pe_resource_t *rsc, pe_node_t *node)
+{
+ CRM_ASSERT(rsc);
+
+ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
+ if (rsc->children == NULL) {
+ pe_warn("Clone %s has no children", rsc->id);
+ return false;
+ }
+
+ if (rsc->exclusive_discover) {
+ pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
+ if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
+ /* exclusive discover is enabled and this node is not marked
+ * as a node this resource should be discovered on
+ *
+ * remove the node from allowed_nodes so that the
+ * notification contains only nodes that we might ever run
+ * on
+ */
+ g_hash_table_remove(rsc->allowed_nodes, node->details->id);
+
+ /* Bit of a shortcut - might as well take it */
+ return false;
+ }
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
+ return pcmk__probe_resource_list(rsc->children, node);
+ } else {
+ return probe_anonymous_clone(rsc, node, rsc->cluster);
+ }
+}
+
+void
+clone_append_meta(const pe_resource_t *rsc, xmlNode *xml)
+{
+ char *name = NULL;
+
+ name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
+ crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique));
+ free(name);
+
+ name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
+ crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify));
+ free(name);
+
+ name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
+ crm_xml_add_int(xml, name, pe__clone_max(rsc));
+ free(name);
+
+ name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
+ crm_xml_add_int(xml, name, pe__clone_node_max(rsc));
+ free(name);
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
+ int promoted_max = pe__clone_promoted_max(rsc);
+ int promoted_node_max = pe__clone_promoted_node_max(rsc);
+
+ name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
+ crm_xml_add_int(xml, name, promoted_max);
+ free(name);
+
+ name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
+ crm_xml_add_int(xml, name, promoted_node_max);
+ free(name);
+
+ /* @COMPAT Maintain backward compatibility with resource agents that
+ * expect the old names (deprecated since 2.0.0).
+ */
+ name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY);
+ crm_xml_add_int(xml, name, promoted_max);
+ free(name);
+
+ name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY);
+ crm_xml_add_int(xml, name, promoted_node_max);
+ free(name);
+ }
+}
+
+// Clone implementation of resource_alloc_functions_t:add_utilization()
+void
+pcmk__clone_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization)
+{
+ bool existing = false;
+ pe_resource_t *child = NULL;
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return;
+ }
+
+ // Look for any child already existing in the list
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ child = (pe_resource_t *) iter->data;
+ if (g_list_find(all_rscs, child)) {
+ existing = true; // Keep checking remaining children
+ } else {
+ // If this is a clone of a group, look for group's members
+ for (GList *member_iter = child->children; member_iter != NULL;
+ member_iter = member_iter->next) {
+
+ pe_resource_t *member = (pe_resource_t *) member_iter->data;
+
+ if (g_list_find(all_rscs, member) != NULL) {
+ // Add *child's* utilization, not group member's
+ child->cmds->add_utilization(child, orig_rsc, all_rscs,
+ utilization);
+ existing = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!existing && (rsc->children != NULL)) {
+ // If nothing was found, still add first child's utilization
+ child = (pe_resource_t *) rsc->children->data;
+
+ child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization);
+ }
+}
+
+// Clone implementation of resource_alloc_functions_t:shutdown_lock()
+void
+pcmk__clone_shutdown_lock(pe_resource_t *rsc)
+{
+ return; // Clones currently don't support shutdown locks
+}
diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c
new file mode 100644
index 0000000..eeef4f1
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_colocation.c
@@ -0,0 +1,1595 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include "crm/common/util.h"
+#include "crm/common/xml_internal.h"
+#include "crm/msg_xml.h"
+#include "libpacemaker_private.h"
+
+#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \
+ __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \
+ if (__rsc == NULL) { \
+ pcmk__config_err("%s: No resource found for %s", __set, __name); \
+ return; \
+ } \
+ } while(0)
+
+// Used to temporarily mark a node as unusable
+#define INFINITY_HACK (INFINITY * -100)
+
+static gint
+cmp_dependent_priority(gconstpointer a, gconstpointer b)
+{
+ const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a;
+ const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b;
+
+ if (a == NULL) {
+ return 1;
+ }
+ if (b == NULL) {
+ return -1;
+ }
+
+ CRM_ASSERT(rsc_constraint1->dependent != NULL);
+ CRM_ASSERT(rsc_constraint1->primary != NULL);
+
+ if (rsc_constraint1->dependent->priority > rsc_constraint2->dependent->priority) {
+ return -1;
+ }
+
+ if (rsc_constraint1->dependent->priority < rsc_constraint2->dependent->priority) {
+ return 1;
+ }
+
+ /* Process clones before primitives and groups */
+ if (rsc_constraint1->dependent->variant > rsc_constraint2->dependent->variant) {
+ return -1;
+ }
+ if (rsc_constraint1->dependent->variant < rsc_constraint2->dependent->variant) {
+ return 1;
+ }
+
+ /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable
+ * clones (probably unnecessary, but avoids having to update regression
+ * tests)
+ */
+ if (rsc_constraint1->dependent->variant == pe_clone) {
+ if (pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable)
+ && !pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) {
+ return -1;
+ } else if (!pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable)
+ && pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) {
+ return 1;
+ }
+ }
+
+ return strcmp(rsc_constraint1->dependent->id,
+ rsc_constraint2->dependent->id);
+}
+
+static gint
+cmp_primary_priority(gconstpointer a, gconstpointer b)
+{
+ const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a;
+ const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b;
+
+ if (a == NULL) {
+ return 1;
+ }
+ if (b == NULL) {
+ return -1;
+ }
+
+ CRM_ASSERT(rsc_constraint1->dependent != NULL);
+ CRM_ASSERT(rsc_constraint1->primary != NULL);
+
+ if (rsc_constraint1->primary->priority > rsc_constraint2->primary->priority) {
+ return -1;
+ }
+
+ if (rsc_constraint1->primary->priority < rsc_constraint2->primary->priority) {
+ return 1;
+ }
+
+ /* Process clones before primitives and groups */
+ if (rsc_constraint1->primary->variant > rsc_constraint2->primary->variant) {
+ return -1;
+ } else if (rsc_constraint1->primary->variant < rsc_constraint2->primary->variant) {
+ return 1;
+ }
+
+ /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable
+ * clones (probably unnecessary, but avoids having to update regression
+ * tests)
+ */
+ if (rsc_constraint1->primary->variant == pe_clone) {
+ if (pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable)
+ && !pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) {
+ return -1;
+ } else if (!pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable)
+ && pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) {
+ return 1;
+ }
+ }
+
+ return strcmp(rsc_constraint1->primary->id, rsc_constraint2->primary->id);
+}
+
+/*!
+ * \internal
+ * \brief Add a "this with" colocation constraint to a sorted list
+ *
+ * \param[in,out] list List of constraints to add \p colocation to
+ * \param[in] colocation Colocation constraint to add to \p list
+ *
+ * \note The list will be sorted using cmp_primary_priority().
+ */
+void
+pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation)
+{
+ CRM_ASSERT((list != NULL) && (colocation != NULL));
+
+ crm_trace("Adding colocation %s (%s with %s%s%s @%d) "
+ "to 'this with' list",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id,
+ (colocation->node_attribute == NULL)? "" : " using ",
+ pcmk__s(colocation->node_attribute, ""),
+ colocation->score);
+ *list = g_list_insert_sorted(*list, (gpointer) colocation,
+ cmp_primary_priority);
+}
+
+/*!
+ * \internal
+ * \brief Add a list of "this with" colocation constraints to a list
+ *
+ * \param[in,out] list List of constraints to add \p addition to
+ * \param[in] addition List of colocation constraints to add to \p list
+ *
+ * \note The lists must be pre-sorted by cmp_primary_priority().
+ */
+void
+pcmk__add_this_with_list(GList **list, GList *addition)
+{
+ CRM_CHECK((list != NULL), return);
+
+ if (*list == NULL) { // Trivial case for efficiency
+ crm_trace("Copying %u 'this with' colocations to new list",
+ g_list_length(addition));
+ *list = g_list_copy(addition);
+ } else {
+ while (addition != NULL) {
+ pcmk__add_this_with(list, addition->data);
+ addition = addition->next;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a "with this" colocation constraint to a sorted list
+ *
+ * \param[in,out] list List of constraints to add \p colocation to
+ * \param[in] colocation Colocation constraint to add to \p list
+ *
+ * \note The list will be sorted using cmp_dependent_priority().
+ */
+void
+pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation)
+{
+ CRM_ASSERT((list != NULL) && (colocation != NULL));
+
+ crm_trace("Adding colocation %s (%s with %s%s%s @%d) "
+ "to 'with this' list",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id,
+ (colocation->node_attribute == NULL)? "" : " using ",
+ pcmk__s(colocation->node_attribute, ""),
+ colocation->score);
+ *list = g_list_insert_sorted(*list, (gpointer) colocation,
+ cmp_dependent_priority);
+}
+
+/*!
+ * \internal
+ * \brief Add a list of "with this" colocation constraints to a list
+ *
+ * \param[in,out] list List of constraints to add \p addition to
+ * \param[in] addition List of colocation constraints to add to \p list
+ *
+ * \note The lists must be pre-sorted by cmp_dependent_priority().
+ */
+void
+pcmk__add_with_this_list(GList **list, GList *addition)
+{
+ CRM_CHECK((list != NULL), return);
+
+ if (*list == NULL) { // Trivial case for efficiency
+ crm_trace("Copying %u 'with this' colocations to new list",
+ g_list_length(addition));
+ *list = g_list_copy(addition);
+ } else {
+ while (addition != NULL) {
+ pcmk__add_with_this(list, addition->data);
+ addition = addition->next;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add orderings necessary for an anti-colocation constraint
+ *
+ * \param[in,out] first_rsc One resource in an anti-colocation
+ * \param[in] first_role Anti-colocation role of \p first_rsc
+ * \param[in] then_rsc Other resource in the anti-colocation
+ * \param[in] then_role Anti-colocation role of \p then_rsc
+ */
+static void
+anti_colocation_order(pe_resource_t *first_rsc, int first_role,
+ pe_resource_t *then_rsc, int then_role)
+{
+ const char *first_tasks[] = { NULL, NULL };
+ const char *then_tasks[] = { NULL, NULL };
+
+ /* Actions to make first_rsc lose first_role */
+ if (first_role == RSC_ROLE_PROMOTED) {
+ first_tasks[0] = CRMD_ACTION_DEMOTE;
+
+ } else {
+ first_tasks[0] = CRMD_ACTION_STOP;
+
+ if (first_role == RSC_ROLE_UNPROMOTED) {
+ first_tasks[1] = CRMD_ACTION_PROMOTE;
+ }
+ }
+
+ /* Actions to make then_rsc gain then_role */
+ if (then_role == RSC_ROLE_PROMOTED) {
+ then_tasks[0] = CRMD_ACTION_PROMOTE;
+
+ } else {
+ then_tasks[0] = CRMD_ACTION_START;
+
+ if (then_role == RSC_ROLE_UNPROMOTED) {
+ then_tasks[1] = CRMD_ACTION_DEMOTE;
+ }
+ }
+
+ for (int first_lpc = 0;
+ (first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) {
+
+ for (int then_lpc = 0;
+ (then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) {
+
+ pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc],
+ then_rsc, then_tasks[then_lpc],
+ pe_order_anti_colocation);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a new colocation constraint to a cluster working set
+ *
+ * \param[in] id XML ID for this constraint
+ * \param[in] node_attr Colocate by this attribute (NULL for #uname)
+ * \param[in] score Constraint score
+ * \param[in,out] dependent Resource to be colocated
+ * \param[in,out] primary Resource to colocate \p dependent with
+ * \param[in] dependent_role Current role of \p dependent
+ * \param[in] primary_role Current role of \p primary
+ * \param[in] influence Whether colocation constraint has influence
+ * \param[in,out] data_set Cluster working set to add constraint to
+ */
+void
+pcmk__new_colocation(const char *id, const char *node_attr, int score,
+ pe_resource_t *dependent, pe_resource_t *primary,
+ const char *dependent_role, const char *primary_role,
+ bool influence, pe_working_set_t *data_set)
+{
+ pcmk__colocation_t *new_con = NULL;
+
+ if (score == 0) {
+ crm_trace("Ignoring colocation '%s' because score is 0", id);
+ return;
+ }
+ if ((dependent == NULL) || (primary == NULL)) {
+ pcmk__config_err("Ignoring colocation '%s' because resource "
+ "does not exist", id);
+ return;
+ }
+
+ new_con = calloc(1, sizeof(pcmk__colocation_t));
+ if (new_con == NULL) {
+ return;
+ }
+
+ if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ dependent_role = RSC_ROLE_UNKNOWN_S;
+ }
+
+ if (pcmk__str_eq(primary_role, RSC_ROLE_STARTED_S,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ primary_role = RSC_ROLE_UNKNOWN_S;
+ }
+
+ new_con->id = id;
+ new_con->dependent = dependent;
+ new_con->primary = primary;
+ new_con->score = score;
+ new_con->dependent_role = text2role(dependent_role);
+ new_con->primary_role = text2role(primary_role);
+ new_con->node_attribute = node_attr;
+ new_con->influence = influence;
+
+ if (node_attr == NULL) {
+ node_attr = CRM_ATTR_UNAME;
+ }
+
+ pe_rsc_trace(dependent, "%s ==> %s (%s %d)",
+ dependent->id, primary->id, node_attr, score);
+
+ pcmk__add_this_with(&(dependent->rsc_cons), new_con);
+ pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con);
+
+ data_set->colocation_constraints = g_list_append(data_set->colocation_constraints,
+ new_con);
+
+ if (score <= -INFINITY) {
+ anti_colocation_order(dependent, new_con->dependent_role, primary,
+ new_con->primary_role);
+ anti_colocation_order(primary, new_con->primary_role, dependent,
+ new_con->dependent_role);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return the boolean influence corresponding to configuration
+ *
+ * \param[in] coloc_id Colocation XML ID (for error logging)
+ * \param[in] rsc Resource involved in constraint (for default)
+ * \param[in] influence_s String value of influence option
+ *
+ * \return true if string evaluates true, false if string evaluates false,
+ * or value of resource's critical option if string is NULL or invalid
+ */
+static bool
+unpack_influence(const char *coloc_id, const pe_resource_t *rsc,
+ const char *influence_s)
+{
+ if (influence_s != NULL) {
+ int influence_i = 0;
+
+ if (crm_str_to_boolean(influence_s, &influence_i) < 0) {
+ pcmk__config_err("Constraint '%s' has invalid value for "
+ XML_COLOC_ATTR_INFLUENCE " (using default)",
+ coloc_id);
+ } else {
+ return (influence_i != 0);
+ }
+ }
+ return pcmk_is_set(rsc->flags, pe_rsc_critical);
+}
+
+static void
+unpack_colocation_set(xmlNode *set, int score, const char *coloc_id,
+ const char *influence_s, pe_working_set_t *data_set)
+{
+ xmlNode *xml_rsc = NULL;
+ pe_resource_t *with = NULL;
+ pe_resource_t *resource = NULL;
+ const char *set_id = ID(set);
+ const char *role = crm_element_value(set, "role");
+ const char *ordering = crm_element_value(set, "ordering");
+ int local_score = score;
+ bool sequential = false;
+
+ const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE);
+
+ if (score_s) {
+ local_score = char2score(score_s);
+ }
+ if (local_score == 0) {
+ crm_trace("Ignoring colocation '%s' for set '%s' because score is 0",
+ coloc_id, set_id);
+ return;
+ }
+
+ if (ordering == NULL) {
+ ordering = "group";
+ }
+
+ if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) {
+ return;
+
+ } else if ((local_score > 0)
+ && pcmk__str_eq(ordering, "group", pcmk__str_casei)) {
+ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
+ if (with != NULL) {
+ pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id);
+ pcmk__new_colocation(set_id, NULL, local_score, resource,
+ with, role, role,
+ unpack_influence(coloc_id, resource,
+ influence_s), data_set);
+ }
+ with = resource;
+ }
+
+ } else if (local_score > 0) {
+ pe_resource_t *last = NULL;
+
+ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
+ if (last != NULL) {
+ pe_rsc_trace(resource, "Colocating %s with %s",
+ last->id, resource->id);
+ pcmk__new_colocation(set_id, NULL, local_score, last,
+ resource, role, role,
+ unpack_influence(coloc_id, last,
+ influence_s), data_set);
+ }
+
+ last = resource;
+ }
+
+ } else {
+ /* Anti-colocating with every prior resource is
+ * the only way to ensure the intuitive result
+ * (i.e. that no one in the set can run with anyone else in the set)
+ */
+
+ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ xmlNode *xml_rsc_with = NULL;
+ bool influence = true;
+
+ EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc));
+ influence = unpack_influence(coloc_id, resource, influence_s);
+
+ for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc_with != NULL;
+ xml_rsc_with = crm_next_same_xml(xml_rsc_with)) {
+
+ if (pcmk__str_eq(resource->id, ID(xml_rsc_with),
+ pcmk__str_casei)) {
+ break;
+ }
+ EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with));
+ pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id,
+ with->id);
+ pcmk__new_colocation(set_id, NULL, local_score,
+ resource, with, role, role,
+ influence, data_set);
+ }
+ }
+ }
+}
+
+static void
+colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score,
+ const char *influence_s, pe_working_set_t *data_set)
+{
+ xmlNode *xml_rsc = NULL;
+ pe_resource_t *rsc_1 = NULL;
+ pe_resource_t *rsc_2 = NULL;
+
+ const char *role_1 = crm_element_value(set1, "role");
+ const char *role_2 = crm_element_value(set2, "role");
+
+ int rc = pcmk_rc_ok;
+ bool sequential = false;
+
+ if (score == 0) {
+ crm_trace("Ignoring colocation '%s' between sets because score is 0",
+ id);
+ return;
+ }
+
+ rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential);
+ if (rc != pcmk_rc_ok || sequential) {
+ // Get the first one
+ xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ if (xml_rsc != NULL) {
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+ }
+ }
+
+ rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential);
+ if (rc != pcmk_rc_ok || sequential) {
+ // Get the last one
+ const char *rid = NULL;
+
+ for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ rid = ID(xml_rsc);
+ }
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid);
+ }
+
+ if ((rsc_1 != NULL) && (rsc_2 != NULL)) {
+ pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2,
+ unpack_influence(id, rsc_1, influence_s),
+ data_set);
+
+ } else if (rsc_1 != NULL) {
+ bool influence = unpack_influence(id, rsc_1, influence_s);
+
+ for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc));
+ pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
+ role_2, influence, data_set);
+ }
+
+ } else if (rsc_2 != NULL) {
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+ pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
+ role_2,
+ unpack_influence(id, rsc_1, influence_s),
+ data_set);
+ }
+
+ } else {
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ xmlNode *xml_rsc_2 = NULL;
+ bool influence = true;
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+ influence = unpack_influence(id, rsc_1, influence_s);
+
+ for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc_2 != NULL;
+ xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2));
+ pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2,
+ role_1, role_2, influence,
+ data_set);
+ }
+ }
+ }
+}
+
+static void
+unpack_simple_colocation(xmlNode *xml_obj, const char *id,
+ const char *influence_s, pe_working_set_t *data_set)
+{
+ int score_i = 0;
+
+ const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
+ const char *dependent_id = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_SOURCE);
+ const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
+ const char *dependent_role = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_SOURCE_ROLE);
+ const char *primary_role = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_TARGET_ROLE);
+ const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR);
+
+ // @COMPAT: Deprecated since 2.1.5
+ const char *dependent_instance = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_SOURCE_INSTANCE);
+ // @COMPAT: Deprecated since 2.1.5
+ const char *primary_instance = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_TARGET_INSTANCE);
+
+ pe_resource_t *dependent = pcmk__find_constraint_resource(data_set->resources,
+ dependent_id);
+ pe_resource_t *primary = pcmk__find_constraint_resource(data_set->resources,
+ primary_id);
+
+ if (dependent_instance != NULL) {
+ pe_warn_once(pe_wo_coloc_inst,
+ "Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is "
+ "deprecated and will be removed in a future release.");
+ }
+
+ if (primary_instance != NULL) {
+ pe_warn_once(pe_wo_coloc_inst,
+ "Support for " XML_COLOC_ATTR_TARGET_INSTANCE " is "
+ "deprecated and will be removed in a future release.");
+ }
+
+ if (dependent == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "does not exist", id, dependent_id);
+ return;
+
+ } else if (primary == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "does not exist", id, primary_id);
+ return;
+
+ } else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "is not a clone but instance '%s' was requested",
+ id, dependent_id, dependent_instance);
+ return;
+
+ } else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "is not a clone but instance '%s' was requested",
+ id, primary_id, primary_instance);
+ return;
+ }
+
+ if (dependent_instance != NULL) {
+ dependent = find_clone_instance(dependent, dependent_instance);
+ if (dependent == NULL) {
+ pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
+ "does not have an instance '%s'",
+ id, dependent_id, dependent_instance);
+ return;
+ }
+ }
+
+ if (primary_instance != NULL) {
+ primary = find_clone_instance(primary, primary_instance);
+ if (primary == NULL) {
+ pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
+ "does not have an instance '%s'",
+ "'%s'", id, primary_id, primary_instance);
+ return;
+ }
+ }
+
+ if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) {
+ pcmk__config_warn("The colocation constraint '"
+ XML_CONS_ATTR_SYMMETRICAL
+ "' attribute has been removed");
+ }
+
+ if (score) {
+ score_i = char2score(score);
+ }
+
+ pcmk__new_colocation(id, attr, score_i, dependent, primary,
+ dependent_role, primary_role,
+ unpack_influence(id, dependent, influence_s), data_set);
+}
+
+// \return Standard Pacemaker return code
+static int
+unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
+ pe_working_set_t *data_set)
+{
+ const char *id = NULL;
+ const char *dependent_id = NULL;
+ const char *primary_id = NULL;
+ const char *dependent_role = NULL;
+ const char *primary_role = NULL;
+
+ pe_resource_t *dependent = NULL;
+ pe_resource_t *primary = NULL;
+
+ pe_tag_t *dependent_tag = NULL;
+ pe_tag_t *primary_tag = NULL;
+
+ xmlNode *dependent_set = NULL;
+ xmlNode *primary_set = NULL;
+ bool any_sets = false;
+
+ *expanded_xml = NULL;
+
+ CRM_CHECK(xml_obj != NULL, return EINVAL);
+
+ id = ID(xml_obj);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return pcmk_rc_unpack_error;
+ }
+
+ // Check whether there are any resource sets with template or tag references
+ *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
+ if (*expanded_xml != NULL) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
+ return pcmk_rc_ok;
+ }
+
+ dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
+ primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
+ if ((dependent_id == NULL) || (primary_id == NULL)) {
+ return pcmk_rc_ok;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent,
+ &dependent_tag)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", id, dependent_id);
+ return pcmk_rc_unpack_error;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary,
+ &primary_tag)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", id, primary_id);
+ return pcmk_rc_unpack_error;
+ }
+
+ if ((dependent != NULL) && (primary != NULL)) {
+ /* Neither side references any template/tag. */
+ return pcmk_rc_ok;
+ }
+
+ if ((dependent_tag != NULL) && (primary_tag != NULL)) {
+ // A colocation constraint between two templates/tags makes no sense
+ pcmk__config_err("Ignoring constraint '%s' because two templates or "
+ "tags cannot be colocated", id);
+ return pcmk_rc_unpack_error;
+ }
+
+ dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE);
+ primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE);
+
+ *expanded_xml = copy_xml(xml_obj);
+
+ // Convert template/tag reference in "rsc" into resource_set under constraint
+ if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE,
+ true, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (dependent_set != NULL) {
+ if (dependent_role != NULL) {
+ // Move "rsc-role" into converted resource_set as "role"
+ crm_xml_add(dependent_set, "role", dependent_role);
+ xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE);
+ }
+ any_sets = true;
+ }
+
+ // Convert template/tag reference in "with-rsc" into resource_set under constraint
+ if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET,
+ true, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (primary_set != NULL) {
+ if (primary_role != NULL) {
+ // Move "with-rsc-role" into converted resource_set as "role"
+ crm_xml_add(primary_set, "role", primary_role);
+ xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE);
+ }
+ any_sets = true;
+ }
+
+ if (any_sets) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
+ } else {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Parse a colocation constraint from XML into a cluster working set
+ *
+ * \param[in,out] xml_obj Colocation constraint XML to unpack
+ * \param[in,out] data_set Cluster working set to add constraint to
+ */
+void
+pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ int score_i = 0;
+ xmlNode *set = NULL;
+ xmlNode *last = NULL;
+
+ xmlNode *orig_xml = NULL;
+ xmlNode *expanded_xml = NULL;
+
+ const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
+ const char *influence_s = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_INFLUENCE);
+
+ if (score) {
+ score_i = char2score(score);
+ }
+
+ if (unpack_colocation_tags(xml_obj, &expanded_xml,
+ data_set) != pcmk_rc_ok) {
+ return;
+ }
+ if (expanded_xml) {
+ orig_xml = xml_obj;
+ xml_obj = expanded_xml;
+ }
+
+ for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL;
+ set = crm_next_same_xml(set)) {
+
+ set = expand_idref(set, data_set->input);
+ if (set == NULL) { // Configuration error, message already logged
+ if (expanded_xml != NULL) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+
+ unpack_colocation_set(set, score_i, id, influence_s, data_set);
+
+ if (last != NULL) {
+ colocate_rsc_sets(id, last, set, score_i, influence_s, data_set);
+ }
+ last = set;
+ }
+
+ if (expanded_xml) {
+ free_xml(expanded_xml);
+ xml_obj = orig_xml;
+ }
+
+ if (last == NULL) {
+ unpack_simple_colocation(xml_obj, id, influence_s, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Make actions of a given type unrunnable for a given resource
+ *
+ * \param[in,out] rsc Resource whose actions should be blocked
+ * \param[in] task Name of action to block
+ * \param[in] reason Unrunnable start action causing the block
+ */
+static void
+mark_action_blocked(pe_resource_t *rsc, const char *task,
+ const pe_resource_t *reason)
+{
+ char *reason_text = crm_strdup_printf("colocation with %s", reason->id);
+
+ for (GList *gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ if (pcmk_is_set(action->flags, pe_action_runnable)
+ && pcmk__str_eq(action->task, task, pcmk__str_casei)) {
+
+ pe__clear_action_flags(action, pe_action_runnable);
+ pe_action_set_reason(action, reason_text, false);
+ pcmk__block_colocation_dependents(action, rsc->cluster);
+ pcmk__update_action_for_orderings(action, rsc->cluster);
+ }
+ }
+
+ // If parent resource can't perform an action, neither can any children
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ mark_action_blocked((pe_resource_t *) (iter->data), task, reason);
+ }
+ free(reason_text);
+}
+
+/*!
+ * \internal
+ * \brief If an action is unrunnable, block any relevant dependent actions
+ *
+ * If a given action is an unrunnable start or promote, block the start or
+ * promote actions of resources colocated with it, as appropriate to the
+ * colocations' configured roles.
+ *
+ * \param[in,out] action Action to check
+ * \param[in] data_set Cluster working set (ignored)
+ */
+void
+pcmk__block_colocation_dependents(pe_action_t *action,
+ pe_working_set_t *data_set)
+{
+ GList *gIter = NULL;
+ GList *colocations = NULL;
+ pe_resource_t *rsc = NULL;
+ bool is_start = false;
+
+ if (pcmk_is_set(action->flags, pe_action_runnable)) {
+ return; // Only unrunnable actions block dependents
+ }
+
+ is_start = pcmk__str_eq(action->task, RSC_START, pcmk__str_none);
+ if (!is_start && !pcmk__str_eq(action->task, RSC_PROMOTE, pcmk__str_none)) {
+ return; // Only unrunnable starts and promotes block dependents
+ }
+
+ CRM_ASSERT(action->rsc != NULL); // Start and promote are resource actions
+
+ /* If this resource is part of a collective resource, dependents are blocked
+ * only if all instances of the collective are unrunnable, so check the
+ * collective resource.
+ */
+ rsc = uber_parent(action->rsc);
+ if (rsc->parent != NULL) {
+ rsc = rsc->parent; // Bundle
+ }
+
+ // Colocation fails only if entire primary can't reach desired role
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child = (pe_resource_t *) gIter->data;
+ pe_action_t *child_action = find_first_action(child->actions, NULL,
+ action->task, NULL);
+
+ if ((child_action == NULL)
+ || pcmk_is_set(child_action->flags, pe_action_runnable)) {
+ crm_trace("Not blocking %s colocation dependents because "
+ "at least %s has runnable %s",
+ rsc->id, child->id, action->task);
+ return; // At least one child can reach desired role
+ }
+ }
+
+ crm_trace("Blocking %s colocation dependents due to unrunnable %s %s",
+ rsc->id, action->rsc->id, action->task);
+
+ // Check each colocation where this resource is primary
+ colocations = pcmk__with_this_colocations(rsc);
+ for (gIter = colocations; gIter != NULL; gIter = gIter->next) {
+ pcmk__colocation_t *colocation = (pcmk__colocation_t *) gIter->data;
+
+ if (colocation->score < INFINITY) {
+ continue; // Only mandatory colocations block dependent
+ }
+
+ /* If the primary can't start, the dependent can't reach its colocated
+ * role, regardless of what the primary or dependent colocation role is.
+ *
+ * If the primary can't be promoted, the dependent can't reach its
+ * colocated role if the primary's colocation role is promoted.
+ */
+ if (!is_start && (colocation->primary_role != RSC_ROLE_PROMOTED)) {
+ continue;
+ }
+
+ // Block the dependent from reaching its colocated role
+ if (colocation->dependent_role == RSC_ROLE_PROMOTED) {
+ mark_action_blocked(colocation->dependent, RSC_PROMOTE,
+ action->rsc);
+ } else {
+ mark_action_blocked(colocation->dependent, RSC_START, action->rsc);
+ }
+ }
+ g_list_free(colocations);
+}
+
+/*!
+ * \internal
+ * \brief Determine how a colocation constraint should affect a resource
+ *
+ * Colocation constraints have different effects at different points in the
+ * scheduler sequence. Initially, they affect a resource's location; once that
+ * is determined, then for promotable clones they can affect a resource
+ * instance's role; after both are determined, the constraints no longer matter.
+ * Given a specific colocation constraint, check what has been done so far to
+ * determine what should be affected at the current point in the scheduler.
+ *
+ * \param[in] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint
+ * \param[in] preview If true, pretend resources have already been allocated
+ *
+ * \return How colocation constraint should be applied at this point
+ */
+enum pcmk__coloc_affects
+pcmk__colocation_affects(const pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation, bool preview)
+{
+ if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ // Primary resource has not been allocated yet, so we can't do anything
+ return pcmk__coloc_affects_nothing;
+ }
+
+ if ((colocation->dependent_role >= RSC_ROLE_UNPROMOTED)
+ && (dependent->parent != NULL)
+ && pcmk_is_set(dependent->parent->flags, pe_rsc_promotable)
+ && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
+
+ /* This is a colocation by role, and the dependent is a promotable clone
+ * that has already been allocated, so the colocation should now affect
+ * the role.
+ */
+ return pcmk__coloc_affects_role;
+ }
+
+ if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
+ /* The dependent resource has already been through allocation, so the
+ * constraint no longer has any effect. Log an error if a mandatory
+ * colocation constraint has been violated.
+ */
+
+ const pe_node_t *primary_node = primary->allocated_to;
+
+ if (dependent->allocated_to == NULL) {
+ crm_trace("Skipping colocation '%s': %s will not run anywhere",
+ colocation->id, dependent->id);
+
+ } else if (colocation->score >= INFINITY) {
+ // Dependent resource must colocate with primary resource
+
+ if ((primary_node == NULL) ||
+ (primary_node->details != dependent->allocated_to->details)) {
+ crm_err("%s must be colocated with %s but is not (%s vs. %s)",
+ dependent->id, primary->id,
+ pe__node_name(dependent->allocated_to),
+ pe__node_name(primary_node));
+ }
+
+ } else if (colocation->score <= -CRM_SCORE_INFINITY) {
+ // Dependent resource must anti-colocate with primary resource
+
+ if ((primary_node != NULL) &&
+ (dependent->allocated_to->details == primary_node->details)) {
+ crm_err("%s and %s must be anti-colocated but are allocated "
+ "to the same node (%s)",
+ dependent->id, primary->id, pe__node_name(primary_node));
+ }
+ }
+ return pcmk__coloc_affects_nothing;
+ }
+
+ if ((colocation->score > 0)
+ && (colocation->dependent_role != RSC_ROLE_UNKNOWN)
+ && (colocation->dependent_role != dependent->next_role)) {
+
+ crm_trace("Skipping colocation '%s': dependent limited to %s role "
+ "but %s next role is %s",
+ colocation->id, role2text(colocation->dependent_role),
+ dependent->id, role2text(dependent->next_role));
+ return pcmk__coloc_affects_nothing;
+ }
+
+ if ((colocation->score > 0)
+ && (colocation->primary_role != RSC_ROLE_UNKNOWN)
+ && (colocation->primary_role != primary->next_role)) {
+
+ crm_trace("Skipping colocation '%s': primary limited to %s role "
+ "but %s next role is %s",
+ colocation->id, role2text(colocation->primary_role),
+ primary->id, role2text(primary->next_role));
+ return pcmk__coloc_affects_nothing;
+ }
+
+ if ((colocation->score < 0)
+ && (colocation->dependent_role != RSC_ROLE_UNKNOWN)
+ && (colocation->dependent_role == dependent->next_role)) {
+ crm_trace("Skipping anti-colocation '%s': dependent role %s matches",
+ colocation->id, role2text(colocation->dependent_role));
+ return pcmk__coloc_affects_nothing;
+ }
+
+ if ((colocation->score < 0)
+ && (colocation->primary_role != RSC_ROLE_UNKNOWN)
+ && (colocation->primary_role == primary->next_role)) {
+ crm_trace("Skipping anti-colocation '%s': primary role %s matches",
+ colocation->id, role2text(colocation->primary_role));
+ return pcmk__coloc_affects_nothing;
+ }
+
+ return pcmk__coloc_affects_location;
+}
+
+/*!
+ * \internal
+ * \brief Apply colocation to dependent for allocation purposes
+ *
+ * Update the allowed node weights of the dependent resource in a colocation,
+ * for the purposes of allocating it to a node
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint
+ */
+void
+pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation)
+{
+ const char *attribute = CRM_ATTR_ID;
+ const char *value = NULL;
+ GHashTable *work = NULL;
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ if (colocation->node_attribute != NULL) {
+ attribute = colocation->node_attribute;
+ }
+
+ if (primary->allocated_to != NULL) {
+ value = pe_node_attribute_raw(primary->allocated_to, attribute);
+
+ } else if (colocation->score < 0) {
+ // Nothing to do (anti-colocation with something that is not running)
+ return;
+ }
+
+ work = pcmk__copy_node_table(dependent->allowed_nodes);
+
+ g_hash_table_iter_init(&iter, work);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ if (primary->allocated_to == NULL) {
+ node->weight = pcmk__add_scores(-colocation->score, node->weight);
+ pe_rsc_trace(dependent,
+ "Applied %s to %s score on %s (now %s after "
+ "subtracting %s because primary %s inactive)",
+ colocation->id, dependent->id, pe__node_name(node),
+ pcmk_readable_score(node->weight),
+ pcmk_readable_score(colocation->score), primary->id);
+
+ } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value,
+ pcmk__str_casei)) {
+ /* Add colocation score only if optional (or minus infinity). A
+ * mandatory colocation is a requirement rather than a preference,
+ * so we don't need to consider it for relative assignment purposes.
+ * The resource will simply be forbidden from running on the node if
+ * the primary isn't active there (via the condition above).
+ */
+ if (colocation->score < CRM_SCORE_INFINITY) {
+ node->weight = pcmk__add_scores(colocation->score,
+ node->weight);
+ pe_rsc_trace(dependent,
+ "Applied %s to %s score on %s (now %s after "
+ "adding %s)",
+ colocation->id, dependent->id, pe__node_name(node),
+ pcmk_readable_score(node->weight),
+ pcmk_readable_score(colocation->score));
+ }
+
+ } else if (colocation->score >= CRM_SCORE_INFINITY) {
+ /* Only mandatory colocations are relevant when the colocation
+ * attribute doesn't match, because an attribute not matching is not
+ * a negative preference -- the colocation is simply relevant only
+ * where it matches.
+ */
+ node->weight = -CRM_SCORE_INFINITY;
+ pe_rsc_trace(dependent,
+ "Banned %s from %s because colocation %s attribute %s "
+ "does not match",
+ dependent->id, pe__node_name(node), colocation->id,
+ attribute);
+ }
+ }
+
+ if ((colocation->score <= -INFINITY) || (colocation->score >= INFINITY)
+ || pcmk__any_node_available(work)) {
+
+ g_hash_table_destroy(dependent->allowed_nodes);
+ dependent->allowed_nodes = work;
+ work = NULL;
+
+ } else {
+ pe_rsc_info(dependent,
+ "%s: Rolling back scores from %s (no available nodes)",
+ dependent->id, primary->id);
+ }
+
+ if (work != NULL) {
+ g_hash_table_destroy(work);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply colocation to dependent for role purposes
+ *
+ * Update the priority of the dependent resource in a colocation, for the
+ * purposes of selecting its role
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint
+ */
+void
+pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation)
+{
+ const char *dependent_value = NULL;
+ const char *primary_value = NULL;
+ const char *attribute = CRM_ATTR_ID;
+ int score_multiplier = 1;
+
+ if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) {
+ return;
+ }
+
+ if (colocation->node_attribute != NULL) {
+ attribute = colocation->node_attribute;
+ }
+
+ dependent_value = pe_node_attribute_raw(dependent->allocated_to, attribute);
+ primary_value = pe_node_attribute_raw(primary->allocated_to, attribute);
+
+ if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) {
+ if ((colocation->score == INFINITY)
+ && (colocation->dependent_role == RSC_ROLE_PROMOTED)) {
+ dependent->priority = -INFINITY;
+ }
+ return;
+ }
+
+ if ((colocation->primary_role != RSC_ROLE_UNKNOWN)
+ && (colocation->primary_role != primary->next_role)) {
+ return;
+ }
+
+ if (colocation->dependent_role == RSC_ROLE_UNPROMOTED) {
+ score_multiplier = -1;
+ }
+
+ dependent->priority = pcmk__add_scores(score_multiplier * colocation->score,
+ dependent->priority);
+ pe_rsc_trace(dependent,
+ "Applied %s to %s promotion priority (now %s after %s %s)",
+ colocation->id, dependent->id,
+ pcmk_readable_score(dependent->priority),
+ ((score_multiplier == 1)? "adding" : "subtracting"),
+ pcmk_readable_score(colocation->score));
+}
+
+/*!
+ * \internal
+ * \brief Find score of highest-scored node that matches colocation attribute
+ *
+ * \param[in] rsc Resource whose allowed nodes should be searched
+ * \param[in] attr Colocation attribute name (must not be NULL)
+ * \param[in] value Colocation attribute value to require
+ */
+static int
+best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr,
+ const char *value)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+ int best_score = -INFINITY;
+ const char *best_node = NULL;
+
+ // Find best allowed node with matching attribute
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+
+ if ((node->weight > best_score) && pcmk__node_available(node, false, false)
+ && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) {
+
+ best_score = node->weight;
+ best_node = node->details->uname;
+ }
+ }
+
+ if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) {
+ if (best_node == NULL) {
+ crm_info("No allowed node for %s matches node attribute %s=%s",
+ rsc->id, attr, value);
+ } else {
+ crm_info("Allowed node %s for %s had best score (%d) "
+ "of those matching node attribute %s=%s",
+ best_node, rsc->id, best_score, attr, value);
+ }
+ }
+ return best_score;
+}
+
+/*!
+ * \internal
+ * \brief Add resource's colocation matches to current node allocation scores
+ *
+ * For each node in a given table, if any of a given resource's allowed nodes
+ * have a matching value for the colocation attribute, add the highest of those
+ * nodes' scores to the node's score.
+ *
+ * \param[in,out] nodes Hash table of nodes with allocation scores so far
+ * \param[in] rsc Resource whose allowed nodes should be compared
+ * \param[in] attr Colocation attribute that must match (NULL for default)
+ * \param[in] factor Factor by which to multiply scores being added
+ * \param[in] only_positive Whether to add only positive scores
+ */
+static void
+add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc,
+ const char *attr, float factor,
+ bool only_positive)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ if (attr == NULL) {
+ attr = CRM_ATTR_UNAME;
+ }
+
+ // Iterate through each node
+ g_hash_table_iter_init(&iter, nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ float weight_f = 0;
+ int weight = 0;
+ int score = 0;
+ int new_score = 0;
+
+ score = best_node_score_matching_attr(rsc, attr,
+ pe_node_attribute_raw(node, attr));
+
+ if ((factor < 0) && (score < 0)) {
+ /* Negative preference for a node with a negative score
+ * should not become a positive preference.
+ *
+ * @TODO Consider filtering only if weight is -INFINITY
+ */
+ crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)",
+ pe__node_name(node), node->weight, factor, score);
+ continue;
+ }
+
+ if (node->weight == INFINITY_HACK) {
+ crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)",
+ pe__node_name(node), node->weight, factor, score);
+ continue;
+ }
+
+ weight_f = factor * score;
+
+ // Round the number; see http://c-faq.com/fp/round.html
+ weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5));
+
+ /* Small factors can obliterate the small scores that are often actually
+ * used in configurations. If the score and factor are nonzero, ensure
+ * that the result is nonzero as well.
+ */
+ if ((weight == 0) && (score != 0)) {
+ if (factor > 0.0) {
+ weight = 1;
+ } else if (factor < 0.0) {
+ weight = -1;
+ }
+ }
+
+ new_score = pcmk__add_scores(weight, node->weight);
+
+ if (only_positive && (new_score < 0) && (node->weight > 0)) {
+ crm_trace("%s: Filtering %d + %f * %d = %d "
+ "(negative disallowed, marking node unusable)",
+ pe__node_name(node), node->weight, factor, score,
+ new_score);
+ node->weight = INFINITY_HACK;
+ continue;
+ }
+
+ if (only_positive && (new_score < 0) && (node->weight == 0)) {
+ crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)",
+ pe__node_name(node), node->weight, factor, score,
+ new_score);
+ continue;
+ }
+
+ crm_trace("%s: %d + %f * %d = %d", pe__node_name(node),
+ node->weight, factor, score, new_score);
+ node->weight = new_score;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update nodes with scores of colocated resources' nodes
+ *
+ * Given a table of nodes and a resource, update the nodes' scores with the
+ * scores of the best nodes matching the attribute used for each of the
+ * resource's relevant colocations.
+ *
+ * \param[in,out] rsc Resource to check colocations for
+ * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID)
+ * \param[in,out] nodes Nodes to update
+ * \param[in] attr Colocation attribute (NULL to use default)
+ * \param[in] factor Incorporate scores multiplied by this factor
+ * \param[in] flags Bitmask of enum pcmk__coloc_select values
+ *
+ * \note The caller remains responsible for freeing \p *nodes.
+ */
+void
+pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags)
+{
+ GHashTable *work = NULL;
+
+ CRM_CHECK((rsc != NULL) && (nodes != NULL), return);
+
+ if (log_id == NULL) {
+ log_id = rsc->id;
+ }
+
+ // Avoid infinite recursion
+ if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
+ pe_rsc_info(rsc, "%s: Breaking dependency loop at %s",
+ log_id, rsc->id);
+ return;
+ }
+ pe__set_resource_flags(rsc, pe_rsc_merging);
+
+ if (*nodes == NULL) {
+ /* Only cmp_resources() passes a NULL nodes table, which indicates we
+ * should initialize it with the resource's allowed node scores.
+ */
+ work = pcmk__copy_node_table(rsc->allowed_nodes);
+ } else {
+ pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)",
+ log_id, rsc->id, factor);
+ work = pcmk__copy_node_table(*nodes);
+ add_node_scores_matching_attr(work, rsc, attr, factor,
+ pcmk_is_set(flags,
+ pcmk__coloc_select_nonnegative));
+ }
+
+ if (work == NULL) {
+ pe__clear_resource_flags(rsc, pe_rsc_merging);
+ return;
+ }
+
+ if (pcmk__any_node_available(work)) {
+ GList *colocations = NULL;
+
+ if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
+ colocations = pcmk__this_with_colocations(rsc);
+ pe_rsc_trace(rsc,
+ "Checking additional %d optional '%s with' constraints",
+ g_list_length(colocations), rsc->id);
+ } else {
+ colocations = pcmk__with_this_colocations(rsc);
+ pe_rsc_trace(rsc,
+ "Checking additional %d optional 'with %s' constraints",
+ g_list_length(colocations), rsc->id);
+ }
+ flags |= pcmk__coloc_select_active;
+
+ for (GList *iter = colocations; iter != NULL; iter = iter->next) {
+ pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data;
+
+ pe_resource_t *other = NULL;
+ float other_factor = factor * constraint->score / (float) INFINITY;
+
+ if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
+ other = constraint->primary;
+ } else if (!pcmk__colocation_has_influence(constraint, NULL)) {
+ continue;
+ } else {
+ other = constraint->dependent;
+ }
+
+ pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)",
+ constraint->id, constraint->dependent->id,
+ constraint->primary->id);
+ other->cmds->add_colocated_node_scores(other, log_id, &work,
+ constraint->node_attribute,
+ other_factor, flags);
+ pe__show_node_weights(true, NULL, log_id, work, rsc->cluster);
+ }
+ g_list_free(colocations);
+
+ } else if (pcmk_is_set(flags, pcmk__coloc_select_active)) {
+ pe_rsc_info(rsc, "%s: Rolling back optional scores from %s",
+ log_id, rsc->id);
+ g_hash_table_destroy(work);
+ pe__clear_resource_flags(rsc, pe_rsc_merging);
+ return;
+ }
+
+
+ if (pcmk_is_set(flags, pcmk__coloc_select_nonnegative)) {
+ pe_node_t *node = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, work);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ if (node->weight == INFINITY_HACK) {
+ node->weight = 1;
+ }
+ }
+ }
+
+ if (*nodes != NULL) {
+ g_hash_table_destroy(*nodes);
+ }
+ *nodes = work;
+
+ pe__clear_resource_flags(rsc, pe_rsc_merging);
+}
+
+/*!
+ * \internal
+ * \brief Apply a "with this" colocation to a resource's allowed node scores
+ *
+ * \param[in,out] data Colocation to apply
+ * \param[in,out] user_data Resource being assigned
+ */
+void
+pcmk__add_dependent_scores(gpointer data, gpointer user_data)
+{
+ pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
+ pe_resource_t *rsc = (pe_resource_t *) user_data;
+
+ pe_resource_t *other = colocation->dependent;
+ const float factor = colocation->score / (float) INFINITY;
+ uint32_t flags = pcmk__coloc_select_active;
+
+ if (!pcmk__colocation_has_influence(colocation, NULL)) {
+ return;
+ }
+ if (rsc->variant == pe_clone) {
+ flags |= pcmk__coloc_select_nonnegative;
+ }
+ pe_rsc_trace(rsc,
+ "%s: Incorporating attenuated %s assignment scores due "
+ "to colocation %s", rsc->id, other->id, colocation->id);
+ other->cmds->add_colocated_node_scores(other, rsc->id, &rsc->allowed_nodes,
+ colocation->node_attribute, factor,
+ flags);
+}
+
+/*!
+ * \internal
+ * \brief Get all colocations affecting a resource as the primary
+ *
+ * \param[in] rsc Resource to get colocations for
+ *
+ * \return Newly allocated list of colocations affecting \p rsc as primary
+ *
+ * \note This is a convenience wrapper for the with_this_colocations() method.
+ */
+GList *
+pcmk__with_this_colocations(const pe_resource_t *rsc)
+{
+ GList *list = NULL;
+
+ rsc->cmds->with_this_colocations(rsc, rsc, &list);
+ return list;
+}
+
+/*!
+ * \internal
+ * \brief Get all colocations affecting a resource as the dependent
+ *
+ * \param[in] rsc Resource to get colocations for
+ *
+ * \return Newly allocated list of colocations affecting \p rsc as dependent
+ *
+ * \note This is a convenience wrapper for the this_with_colocations() method.
+ */
+GList *
+pcmk__this_with_colocations(const pe_resource_t *rsc)
+{
+ GList *list = NULL;
+
+ rsc->cmds->this_with_colocations(rsc, rsc, &list);
+ return list;
+}
diff --git a/lib/pacemaker/pcmk_sched_constraints.c b/lib/pacemaker/pcmk_sched_constraints.c
new file mode 100644
index 0000000..bae6827
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_constraints.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <regex.h>
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+#include <crm/common/iso8601.h>
+#include <crm/pengine/status.h>
+#include <crm/pengine/internal.h>
+#include <crm/pengine/rules.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+static bool
+evaluate_lifetime(xmlNode *lifetime, pe_working_set_t *data_set)
+{
+ bool result = FALSE;
+ crm_time_t *next_change = crm_time_new_undefined();
+
+ result = pe_evaluate_rules(lifetime, NULL, data_set->now, next_change);
+ if (crm_time_is_defined(next_change)) {
+ time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change);
+
+ pe__update_recheck_time(recheck, data_set);
+ }
+ crm_time_free(next_change);
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Unpack constraints from XML
+ *
+ * Given a cluster working set, unpack all constraints from its input XML into
+ * data structures.
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__unpack_constraints(pe_working_set_t *data_set)
+{
+ xmlNode *xml_constraints = pcmk_find_cib_element(data_set->input,
+ XML_CIB_TAG_CONSTRAINTS);
+
+ for (xmlNode *xml_obj = pcmk__xe_first_child(xml_constraints);
+ xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
+
+ xmlNode *lifetime = NULL;
+ const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *tag = crm_element_name(xml_obj);
+
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without "
+ XML_ATTR_ID, tag);
+ continue;
+ }
+
+ crm_trace("Unpacking %s constraint '%s'", tag, id);
+
+ lifetime = first_named_child(xml_obj, "lifetime");
+ if (lifetime != NULL) {
+ pcmk__config_warn("Support for 'lifetime' attribute (in %s) is "
+ "deprecated (the rules it contains should "
+ "instead be direct descendants of the "
+ "constraint object)", id);
+ }
+
+ if ((lifetime != NULL) && !evaluate_lifetime(lifetime, data_set)) {
+ crm_info("Constraint %s %s is not active", tag, id);
+
+ } else if (pcmk__str_eq(XML_CONS_TAG_RSC_ORDER, tag, pcmk__str_casei)) {
+ pcmk__unpack_ordering(xml_obj, data_set);
+
+ } else if (pcmk__str_eq(XML_CONS_TAG_RSC_DEPEND, tag, pcmk__str_casei)) {
+ pcmk__unpack_colocation(xml_obj, data_set);
+
+ } else if (pcmk__str_eq(XML_CONS_TAG_RSC_LOCATION, tag, pcmk__str_casei)) {
+ pcmk__unpack_location(xml_obj, data_set);
+
+ } else if (pcmk__str_eq(XML_CONS_TAG_RSC_TICKET, tag, pcmk__str_casei)) {
+ pcmk__unpack_rsc_ticket(xml_obj, data_set);
+
+ } else {
+ pe_err("Unsupported constraint type: %s", tag);
+ }
+ }
+}
+
+pe_resource_t *
+pcmk__find_constraint_resource(GList *rsc_list, const char *id)
+{
+ GList *rIter = NULL;
+
+ for (rIter = rsc_list; id && rIter; rIter = rIter->next) {
+ pe_resource_t *parent = rIter->data;
+ pe_resource_t *match = parent->fns->find_rsc(parent, id, NULL,
+ pe_find_renamed);
+
+ if (match != NULL) {
+ if(!pcmk__str_eq(match->id, id, pcmk__str_casei)) {
+ /* We found an instance of a clone instead */
+ match = uber_parent(match);
+ crm_debug("Found %s for %s", match->id, id);
+ }
+ return match;
+ }
+ }
+ crm_trace("No match for %s", id);
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an ID references a resource tag
+ *
+ * \param[in] data_set Cluster working set
+ * \param[in] id Tag ID to search for
+ * \param[out] tag Where to store tag, if found
+ *
+ * \return true if ID refers to a tagged resource or resource set template,
+ * otherwise false
+ */
+static bool
+find_constraint_tag(const pe_working_set_t *data_set, const char *id,
+ pe_tag_t **tag)
+{
+ *tag = NULL;
+
+ // Check whether id refers to a resource set template
+ if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id,
+ NULL, (gpointer *) tag)) {
+ if (*tag == NULL) {
+ crm_warn("No resource is derived from template '%s'", id);
+ return false;
+ }
+ return true;
+ }
+
+ // If not, check whether id refers to a tag
+ if (g_hash_table_lookup_extended(data_set->tags, id,
+ NULL, (gpointer *) tag)) {
+ if (*tag == NULL) {
+ crm_warn("No resource is tagged with '%s'", id);
+ return false;
+ }
+ return true;
+ }
+
+ crm_warn("No template or tag named '%s'", id);
+ return false;
+}
+
+/*!
+ * \brief
+ * \internal Check whether an ID refers to a valid resource or tag
+ *
+ * \param[in] data_set Cluster working set
+ * \param[in] id ID to search for
+ * \param[out] rsc Where to store resource, if found (or NULL to skip
+ * searching resources)
+ * \param[out] tag Where to store tag, if found (or NULL to skip searching
+ * tags)
+ *
+ * \return true if id refers to a resource (possibly indirectly via a tag)
+ */
+bool
+pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, const char *id,
+ pe_resource_t **rsc, pe_tag_t **tag)
+{
+ if (rsc != NULL) {
+ *rsc = pcmk__find_constraint_resource(data_set->resources, id);
+ if (*rsc != NULL) {
+ return true;
+ }
+ }
+
+ if ((tag != NULL) && find_constraint_tag(data_set, id, tag)) {
+ return true;
+ }
+
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Replace any resource tags with equivalent resource_ref entries
+ *
+ * If a given constraint has resource sets, check each set for resource_ref
+ * entries that list tags rather than resource IDs, and replace any found with
+ * resource_ref entries for the corresponding resource IDs.
+ *
+ * \param[in,out] xml_obj Constraint XML
+ * \param[in] data_set Cluster working set
+ *
+ * \return Equivalent XML with resource tags replaced (or NULL if none)
+ * \note It is the caller's responsibility to free the result with free_xml().
+ */
+xmlNode *
+pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pe_working_set_t *data_set)
+{
+ xmlNode *new_xml = NULL;
+ bool any_refs = false;
+
+ // Short-circuit if there are no sets
+ if (first_named_child(xml_obj, XML_CONS_TAG_RSC_SET) == NULL) {
+ return NULL;
+ }
+
+ new_xml = copy_xml(xml_obj);
+
+ for (xmlNode *set = first_named_child(new_xml, XML_CONS_TAG_RSC_SET);
+ set != NULL; set = crm_next_same_xml(set)) {
+
+ GList *tag_refs = NULL;
+ GList *gIter = NULL;
+
+ for (xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ pe_resource_t *rsc = NULL;
+ pe_tag_t *tag = NULL;
+
+ if (!pcmk__valid_resource_or_tag(data_set, ID(xml_rsc), &rsc,
+ &tag)) {
+ pcmk__config_err("Ignoring resource sets for constraint '%s' "
+ "because '%s' is not a valid resource or tag",
+ ID(xml_obj), ID(xml_rsc));
+ free_xml(new_xml);
+ return NULL;
+
+ } else if (rsc) {
+ continue;
+
+ } else if (tag) {
+ /* The resource_ref under the resource_set references a template/tag */
+ xmlNode *last_ref = xml_rsc;
+
+ /* A sample:
+
+ Original XML:
+
+ <resource_set id="tag1-colocation-0" sequential="true">
+ <resource_ref id="rsc1"/>
+ <resource_ref id="tag1"/>
+ <resource_ref id="rsc4"/>
+ </resource_set>
+
+ Now we are appending rsc2 and rsc3 which are tagged with tag1 right after it:
+
+ <resource_set id="tag1-colocation-0" sequential="true">
+ <resource_ref id="rsc1"/>
+ <resource_ref id="tag1"/>
+ <resource_ref id="rsc2"/>
+ <resource_ref id="rsc3"/>
+ <resource_ref id="rsc4"/>
+ </resource_set>
+
+ */
+
+ for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
+ const char *obj_ref = (const char *) gIter->data;
+ xmlNode *new_rsc_ref = NULL;
+
+ new_rsc_ref = xmlNewDocRawNode(getDocPtr(set), NULL,
+ (pcmkXmlStr) XML_TAG_RESOURCE_REF, NULL);
+ crm_xml_add(new_rsc_ref, XML_ATTR_ID, obj_ref);
+ xmlAddNextSibling(last_ref, new_rsc_ref);
+
+ last_ref = new_rsc_ref;
+ }
+
+ any_refs = true;
+
+ /* Freeing the resource_ref now would break the XML child
+ * iteration, so just remember it for freeing later.
+ */
+ tag_refs = g_list_append(tag_refs, xml_rsc);
+ }
+ }
+
+ /* Now free '<resource_ref id="tag1"/>', and finally get:
+
+ <resource_set id="tag1-colocation-0" sequential="true">
+ <resource_ref id="rsc1"/>
+ <resource_ref id="rsc2"/>
+ <resource_ref id="rsc3"/>
+ <resource_ref id="rsc4"/>
+ </resource_set>
+
+ */
+ for (gIter = tag_refs; gIter != NULL; gIter = gIter->next) {
+ xmlNode *tag_ref = gIter->data;
+
+ free_xml(tag_ref);
+ }
+ g_list_free(tag_refs);
+ }
+
+ if (!any_refs) {
+ free_xml(new_xml);
+ new_xml = NULL;
+ }
+ return new_xml;
+}
+
+/*!
+ * \internal
+ * \brief Convert a tag into a resource set of tagged resources
+ *
+ * \param[in,out] xml_obj Constraint XML
+ * \param[out] rsc_set Where to store resource set XML created based on tag
+ * \param[in] attr Name of XML attribute containing resource or tag ID
+ * \param[in] convert_rsc Convert to set even if \p attr references a resource
+ * \param[in] data_set Cluster working set
+ */
+bool
+pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
+ bool convert_rsc, const pe_working_set_t *data_set)
+{
+ const char *cons_id = NULL;
+ const char *id = NULL;
+
+ pe_resource_t *rsc = NULL;
+ pe_tag_t *tag = NULL;
+
+ *rsc_set = NULL;
+
+ CRM_CHECK((xml_obj != NULL) && (attr != NULL), return false);
+
+ cons_id = ID(xml_obj);
+ if (cons_id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return false;
+ }
+
+ id = crm_element_value(xml_obj, attr);
+ if (id == NULL) {
+ return true;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, id, &rsc, &tag)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", cons_id, id);
+ return false;
+
+ } else if (tag) {
+ GList *gIter = NULL;
+
+ /* A template/tag is referenced by the "attr" attribute (first, then, rsc or with-rsc).
+ Add the template/tag's corresponding "resource_set" which contains the resources derived
+ from it or tagged with it under the constraint. */
+ *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET);
+ crm_xml_add(*rsc_set, XML_ATTR_ID, id);
+
+ for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
+ const char *obj_ref = (const char *) gIter->data;
+ xmlNode *rsc_ref = NULL;
+
+ rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF);
+ crm_xml_add(rsc_ref, XML_ATTR_ID, obj_ref);
+ }
+
+ /* Set sequential="false" for the resource_set */
+ pcmk__xe_set_bool_attr(*rsc_set, "sequential", false);
+
+ } else if ((rsc != NULL) && convert_rsc) {
+ /* Even a regular resource is referenced by "attr", convert it into a resource_set.
+ Because the other side of the constraint could be a template/tag reference. */
+ xmlNode *rsc_ref = NULL;
+
+ *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET);
+ crm_xml_add(*rsc_set, XML_ATTR_ID, id);
+
+ rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF);
+ crm_xml_add(rsc_ref, XML_ATTR_ID, id);
+
+ } else {
+ return true;
+ }
+
+ /* Remove the "attr" attribute referencing the template/tag */
+ if (*rsc_set != NULL) {
+ xml_remove_prop(xml_obj, attr);
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Create constraints inherent to resource types
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__create_internal_constraints(pe_working_set_t *data_set)
+{
+ crm_trace("Create internal constraints");
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ rsc->cmds->internal_constraints(rsc);
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_fencing.c b/lib/pacemaker/pcmk_sched_fencing.c
new file mode 100644
index 0000000..c912640
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_fencing.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Check whether a resource is known on a particular node
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Node to check
+ *
+ * \return TRUE if resource (or parent if an anonymous clone) is known
+ */
+static bool
+rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
+ return TRUE;
+
+ } else if ((rsc->variant == pe_native)
+ && pe_rsc_is_anon_clone(rsc->parent)
+ && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
+ /* We check only the parent, not the uber-parent, because we cannot
+ * assume that the resource is known if it is in an anonymously cloned
+ * group (which may be only partially known).
+ */
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Order a resource's start and promote actions relative to fencing
+ *
+ * \param[in,out] rsc Resource to be ordered
+ * \param[in,out] stonith_op Fence action
+ */
+static void
+order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
+{
+ pe_node_t *target;
+ GList *gIter = NULL;
+
+ CRM_CHECK(stonith_op && stonith_op->node, return);
+ target = stonith_op->node;
+
+ for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ switch (action->needs) {
+ case rsc_req_nothing:
+ // Anything other than start or promote requires nothing
+ break;
+
+ case rsc_req_stonith:
+ order_actions(stonith_op, action, pe_order_optional);
+ break;
+
+ case rsc_req_quorum:
+ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
+ && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
+ && !rsc_is_known_on(rsc, target)) {
+
+ /* If we don't know the status of the resource on the node
+ * we're about to shoot, we have to assume it may be active
+ * there. Order the resource start after the fencing. This
+ * is analogous to waiting for all the probes for a resource
+ * to complete before starting it.
+ *
+ * The most likely explanation is that the DC died and took
+ * its status with it.
+ */
+ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
+ pe__node_name(target));
+ order_actions(stonith_op, action,
+ pe_order_optional | pe_order_runnable_left);
+ }
+ break;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order a resource's stop and demote actions relative to fencing
+ *
+ * \param[in,out] rsc Resource to be ordered
+ * \param[in,out] stonith_op Fence action
+ */
+static void
+order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
+{
+ GList *gIter = NULL;
+ GList *action_list = NULL;
+ bool order_implicit = false;
+
+ pe_resource_t *top = uber_parent(rsc);
+ pe_action_t *parent_stop = NULL;
+ pe_node_t *target;
+
+ CRM_CHECK(stonith_op && stonith_op->node, return);
+ target = stonith_op->node;
+
+ /* Get a list of stop actions potentially implied by the fencing */
+ action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
+
+ /* If resource requires fencing, implicit actions must occur after fencing.
+ *
+ * Implied stops and demotes of resources running on guest nodes are always
+ * ordered after fencing, even if the resource does not require fencing,
+ * because guest node "fencing" is actually just a resource stop.
+ */
+ if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)
+ || pe__is_guest_node(target)) {
+
+ order_implicit = true;
+ }
+
+ if (action_list && order_implicit) {
+ parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
+ }
+
+ for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ // The stop would never complete, so convert it into a pseudo-action.
+ pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable);
+
+ if (order_implicit) {
+ pe__set_action_flags(action, pe_action_implied_by_stonith);
+
+ /* Order the stonith before the parent stop (if any).
+ *
+ * Also order the stonith before the resource stop, unless the
+ * resource is inside a bundle -- that would cause a graph loop.
+ * We can rely on the parent stop's ordering instead.
+ *
+ * User constraints must not order a resource in a guest node
+ * relative to the guest node container resource. The
+ * pe_order_preserve flag marks constraints as generated by the
+ * cluster and thus immune to that check (and is irrelevant if
+ * target is not a guest).
+ */
+ if (!pe_rsc_is_bundled(rsc)) {
+ order_actions(stonith_op, action, pe_order_preserve);
+ }
+ order_actions(stonith_op, parent_stop, pe_order_preserve);
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
+ rsc->id, (order_implicit? "after" : "because"),
+ pe__node_name(target));
+ } else {
+ crm_info("%s is implicit %s %s is fenced",
+ action->uuid, (order_implicit? "after" : "because"),
+ pe__node_name(target));
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
+ pe__order_notifs_after_fencing(action, rsc, stonith_op);
+ }
+
+#if 0
+ /* It might be a good idea to stop healthy resources on a node about to
+ * be fenced, when possible.
+ *
+ * However, fencing must be done before a failed resource's
+ * (pseudo-)stop action, so that could create a loop. For example, given
+ * a group of A and B running on node N with a failed stop of B:
+ *
+ * fence N -> stop B (pseudo-op) -> stop A -> fence N
+ *
+ * The block below creates the stop A -> fence N ordering and therefore
+ * must (at least for now) be disabled. Instead, run the block above and
+ * treat all resources on N as B would be (i.e., as a pseudo-op after
+ * the fencing).
+ *
+ * @TODO Maybe break the "A requires B" dependency in
+ * pcmk__update_action_for_orderings() and use this block for healthy
+ * resources instead of the above.
+ */
+ crm_info("Moving healthy resource %s off %s before fencing",
+ rsc->id, pe__node_name(node));
+ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
+ strdup(CRM_OP_FENCE), stonith_op,
+ pe_order_optional, rsc->cluster);
+#endif
+ }
+
+ g_list_free(action_list);
+
+ /* Get a list of demote actions potentially implied by the fencing */
+ action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
+
+ for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ if (!(action->node->details->online) || action->node->details->unclean
+ || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ pe_rsc_info(rsc,
+ "Demote of failed resource %s is implicit after %s is fenced",
+ rsc->id, pe__node_name(target));
+ } else {
+ pe_rsc_info(rsc, "%s is implicit after %s is fenced",
+ action->uuid, pe__node_name(target));
+ }
+
+ /* The demote would never complete and is now implied by the
+ * fencing, so convert it into a pseudo-action.
+ */
+ pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable);
+
+ if (pe_rsc_is_bundled(rsc)) {
+ // Do nothing, let recovery be ordered after parent's implied stop
+
+ } else if (order_implicit) {
+ order_actions(stonith_op, action, pe_order_preserve|pe_order_optional);
+ }
+ }
+ }
+
+ g_list_free(action_list);
+}
+
+/*!
+ * \internal
+ * \brief Order resource actions properly relative to fencing
+ *
+ * \param[in,out] rsc Resource whose actions should be ordered
+ * \param[in,out] stonith_op Fencing operation to be ordered against
+ */
+static void
+rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op)
+{
+ if (rsc->children) {
+ GList *gIter = NULL;
+
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
+
+ rsc_stonith_ordering(child_rsc, stonith_op);
+ }
+
+ } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc,
+ "Skipping fencing constraints for unmanaged resource: %s",
+ rsc->id);
+
+ } else {
+ order_start_vs_fencing(rsc, stonith_op);
+ order_stop_vs_fencing(rsc, stonith_op);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order all actions appropriately relative to a fencing operation
+ *
+ * Ensure start operations of affected resources are ordered after fencing,
+ * imply stop and demote operations of affected resources by marking them as
+ * pseudo-actions, etc.
+ *
+ * \param[in,out] stonith_op Fencing operation
+ * \param[in,out] data_set Working set of cluster
+ */
+void
+pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
+{
+ CRM_CHECK(stonith_op && data_set, return);
+ for (GList *r = data_set->resources; r != NULL; r = r->next) {
+ rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order an action after unfencing
+ *
+ * \param[in] rsc Resource that action is for
+ * \param[in,out] node Node that action is on
+ * \param[in,out] action Action to be ordered after unfencing
+ * \param[in] order Ordering flags
+ */
+void
+pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node,
+ pe_action_t *action, enum pe_ordering order)
+{
+ /* When unfencing is in use, we order unfence actions before any probe or
+ * start of resources that require unfencing, and also of fence devices.
+ *
+ * This might seem to violate the principle that fence devices require
+ * only quorum. However, fence agents that unfence often don't have enough
+ * information to even probe or start unless the node is first unfenced.
+ */
+ if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device)
+ && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing))
+ || pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
+
+ /* Start with an optional ordering. Requiring unfencing would result in
+ * the node being unfenced, and all its resources being stopped,
+ * whenever a new resource is added -- which would be highly suboptimal.
+ */
+ pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
+ node->details->data_set);
+
+ order_actions(unfence, action, order);
+
+ if (!pcmk__node_unfenced(node)) {
+ // But unfencing is required if it has never been done
+ char *reason = crm_strdup_printf("required by %s %s",
+ rsc->id, action->task);
+
+ trigger_unfencing(NULL, node, reason, NULL,
+ node->details->data_set);
+ free(reason);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create pseudo-op for guest node fence, and order relative to it
+ *
+ * \param[in,out] node Guest node to fence
+ */
+void
+pcmk__fence_guest(pe_node_t *node)
+{
+ pe_resource_t *container = NULL;
+ pe_action_t *stop = NULL;
+ pe_action_t *stonith_op = NULL;
+
+ /* The fence action is just a label; we don't do anything differently for
+ * off vs. reboot. We specify it explicitly, rather than let it default to
+ * cluster's default action, because we are not _initiating_ fencing -- we
+ * are creating a pseudo-event to describe fencing that is already occurring
+ * by other means (container recovery).
+ */
+ const char *fence_action = "off";
+
+ CRM_ASSERT(node != NULL);
+
+ /* Check whether guest's container resource has any explicit stop or
+ * start (the stop may be implied by fencing of the guest's host).
+ */
+ container = node->details->remote_rsc->container;
+ if (container) {
+ stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
+ NULL);
+
+ if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
+ NULL)) {
+ fence_action = "reboot";
+ }
+ }
+
+ /* Create a fence pseudo-event, so we have an event to order actions
+ * against, and the controller can always detect it.
+ */
+ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
+ FALSE, node->details->data_set);
+ pe__set_action_flags(stonith_op, pe_action_pseudo|pe_action_runnable);
+
+ /* We want to imply stops/demotes after the guest is stopped, not wait until
+ * it is restarted, so we always order pseudo-fencing after stop, not start
+ * (even though start might be closer to what is done for a real reboot).
+ */
+ if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
+ pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
+ NULL, FALSE,
+ node->details->data_set);
+
+ crm_info("Implying guest %s is down (action %d) after %s fencing",
+ pe__node_name(node), stonith_op->id,
+ pe__node_name(stop->node));
+ order_actions(parent_stonith_op, stonith_op,
+ pe_order_runnable_left|pe_order_implies_then);
+
+ } else if (stop) {
+ order_actions(stop, stonith_op,
+ pe_order_runnable_left|pe_order_implies_then);
+ crm_info("Implying guest %s is down (action %d) "
+ "after container %s is stopped (action %d)",
+ pe__node_name(node), stonith_op->id,
+ container->id, stop->id);
+ } else {
+ /* If we're fencing the guest node but there's no stop for the guest
+ * resource, we must think the guest is already stopped. However, we may
+ * think so because its resource history was just cleaned. To avoid
+ * unnecessarily considering the guest node down if it's really up,
+ * order the pseudo-fencing after any stop of the connection resource,
+ * which will be ordered after any container (re-)probe.
+ */
+ stop = find_first_action(node->details->remote_rsc->actions, NULL,
+ RSC_STOP, NULL);
+
+ if (stop) {
+ order_actions(stop, stonith_op, pe_order_optional);
+ crm_info("Implying guest %s is down (action %d) "
+ "after connection is stopped (action %d)",
+ pe__node_name(node), stonith_op->id, stop->id);
+ } else {
+ /* Not sure why we're fencing, but everything must already be
+ * cleanly stopped.
+ */
+ crm_info("Implying guest %s is down (action %d) ",
+ pe__node_name(node), stonith_op->id);
+ }
+ }
+
+ // Order/imply other actions relative to pseudo-fence as with real fence
+ pcmk__order_vs_fence(stonith_op, node->details->data_set);
+}
+
+/*!
+ * \internal
+ * \brief Check whether node has already been unfenced
+ *
+ * \param[in] node Node to check
+ *
+ * \return true if node has a nonzero #node-unfenced attribute (or none),
+ * otherwise false
+ */
+bool
+pcmk__node_unfenced(const pe_node_t *node)
+{
+ const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
+
+ return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
+}
+
+/*!
+ * \internal
+ * \brief Order a resource's start and stop relative to unfencing of a node
+ *
+ * \param[in,out] data Node that could be unfenced
+ * \param[in,out] user_data Resource to order
+ */
+void
+pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
+{
+ pe_node_t *node = (pe_node_t *) data;
+ pe_resource_t *rsc = (pe_resource_t *) user_data;
+
+ pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false,
+ rsc->cluster);
+
+ crm_debug("Ordering any stops of %s before %s, and any starts after",
+ rsc->id, unfence->uuid);
+
+ /*
+ * It would be more efficient to order clone resources once,
+ * rather than order each instance, but ordering the instance
+ * allows us to avoid unnecessary dependencies that might conflict
+ * with user constraints.
+ *
+ * @TODO: This constraint can still produce a transition loop if the
+ * resource has a stop scheduled on the node being unfenced, and
+ * there is a user ordering constraint to start some other resource
+ * (which will be ordered after the unfence) before stopping this
+ * resource. An example is "start some slow-starting cloned service
+ * before stopping an associated virtual IP that may be moving to
+ * it":
+ * stop this -> unfencing -> start that -> stop this
+ */
+ pcmk__new_ordering(rsc, stop_key(rsc), NULL,
+ NULL, strdup(unfence->uuid), unfence,
+ pe_order_optional|pe_order_same_node,
+ rsc->cluster);
+
+ pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
+ rsc, start_key(rsc), NULL,
+ pe_order_implies_then_on_node|pe_order_same_node,
+ rsc->cluster);
+}
diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c
new file mode 100644
index 0000000..cb139f7
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_group.c
@@ -0,0 +1,865 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+
+#include <crm/msg_xml.h>
+
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Assign a group resource to a node
+ *
+ * \param[in,out] rsc Group resource to assign to a node
+ * \param[in] prefer Node to prefer, if all else is equal
+ *
+ * \return Node that \p rsc is assigned to, if assigned entirely to one node
+ */
+pe_node_t *
+pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer)
+{
+ pe_node_t *first_assigned_node = NULL;
+ pe_resource_t *first_member = NULL;
+
+ CRM_ASSERT(rsc != NULL);
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return rsc->allocated_to; // Assignment already done
+ }
+ if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
+ pe_rsc_debug(rsc, "Assignment dependency loop detected involving %s",
+ rsc->id);
+ return NULL;
+ }
+
+ if (rsc->children == NULL) {
+ // No members to assign
+ pe__clear_resource_flags(rsc, pe_rsc_provisional);
+ return NULL;
+ }
+
+ pe__set_resource_flags(rsc, pe_rsc_allocating);
+ first_member = (pe_resource_t *) rsc->children->data;
+ rsc->role = first_member->role;
+
+ pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
+ rsc, __func__, rsc->allowed_nodes, rsc->cluster);
+
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+ pe_node_t *node = NULL;
+
+ pe_rsc_trace(rsc, "Assigning group %s member %s",
+ rsc->id, member->id);
+ node = member->cmds->assign(member, prefer);
+ if (first_assigned_node == NULL) {
+ first_assigned_node = node;
+ }
+ }
+
+ pe__set_next_role(rsc, first_member->next_role, "first group member");
+ pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
+
+ if (!pe__group_flag_is_set(rsc, pe__group_colocated)) {
+ return NULL;
+ }
+ return first_assigned_node;
+}
+
+/*!
+ * \internal
+ * \brief Create a pseudo-operation for a group as an ordering point
+ *
+ * \param[in,out] group Group resource to create action for
+ * \param[in] action Action name
+ *
+ * \return Newly created pseudo-operation
+ */
+static pe_action_t *
+create_group_pseudo_op(pe_resource_t *group, const char *action)
+{
+ pe_action_t *op = custom_action(group, pcmk__op_key(group->id, action, 0),
+ action, NULL, TRUE, TRUE, group->cluster);
+ pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
+ return op;
+}
+
+/*!
+ * \internal
+ * \brief Create all actions needed for a given group resource
+ *
+ * \param[in,out] rsc Group resource to create actions for
+ */
+void
+pcmk__group_create_actions(pe_resource_t *rsc)
+{
+ CRM_ASSERT(rsc != NULL);
+
+ pe_rsc_trace(rsc, "Creating actions for group %s", rsc->id);
+
+ // Create actions for individual group members
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+
+ member->cmds->create_actions(member);
+ }
+
+ // Create pseudo-actions for group itself to serve as ordering points
+ create_group_pseudo_op(rsc, RSC_START);
+ create_group_pseudo_op(rsc, RSC_STARTED);
+ create_group_pseudo_op(rsc, RSC_STOP);
+ create_group_pseudo_op(rsc, RSC_STOPPED);
+ if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE))) {
+ create_group_pseudo_op(rsc, RSC_DEMOTE);
+ create_group_pseudo_op(rsc, RSC_DEMOTED);
+ create_group_pseudo_op(rsc, RSC_PROMOTE);
+ create_group_pseudo_op(rsc, RSC_PROMOTED);
+ }
+}
+
+// User data for member_internal_constraints()
+struct member_data {
+ // These could be derived from member but this avoids some function calls
+ bool ordered;
+ bool colocated;
+ bool promotable;
+
+ pe_resource_t *last_active;
+ pe_resource_t *previous_member;
+};
+
+/*!
+ * \internal
+ * \brief Create implicit constraints needed for a group member
+ *
+ * \param[in,out] data Group member to create implicit constraints for
+ * \param[in,out] user_data Member data (struct member_data *)
+ */
+static void
+member_internal_constraints(gpointer data, gpointer user_data)
+{
+ pe_resource_t *member = (pe_resource_t *) data;
+ struct member_data *member_data = (struct member_data *) user_data;
+
+ // For ordering demote vs demote or stop vs stop
+ uint32_t down_flags = pe_order_implies_first_printed;
+
+ // For ordering demote vs demoted or stop vs stopped
+ uint32_t post_down_flags = pe_order_implies_then_printed;
+
+ // Create the individual member's implicit constraints
+ member->cmds->internal_constraints(member);
+
+ if (member_data->previous_member == NULL) {
+ // This is first member
+ if (member_data->ordered) {
+ pe__set_order_flags(down_flags, pe_order_optional);
+ post_down_flags = pe_order_implies_then;
+ }
+
+ } else if (member_data->colocated) {
+ // Colocate this member with the previous one
+ pcmk__new_colocation("group:internal_colocation", NULL, INFINITY,
+ member, member_data->previous_member, NULL, NULL,
+ pcmk_is_set(member->flags, pe_rsc_critical),
+ member->cluster);
+ }
+
+ if (member_data->promotable) {
+ // Demote group -> demote member -> group is demoted
+ pcmk__order_resource_actions(member->parent, RSC_DEMOTE,
+ member, RSC_DEMOTE, down_flags);
+ pcmk__order_resource_actions(member, RSC_DEMOTE,
+ member->parent, RSC_DEMOTED,
+ post_down_flags);
+
+ // Promote group -> promote member -> group is promoted
+ pcmk__order_resource_actions(member, RSC_PROMOTE,
+ member->parent, RSC_PROMOTED,
+ pe_order_runnable_left
+ |pe_order_implies_then
+ |pe_order_implies_then_printed);
+ pcmk__order_resource_actions(member->parent, RSC_PROMOTE,
+ member, RSC_PROMOTE,
+ pe_order_implies_first_printed);
+ }
+
+ // Stop group -> stop member -> group is stopped
+ pcmk__order_stops(member->parent, member, down_flags);
+ pcmk__order_resource_actions(member, RSC_STOP, member->parent, RSC_STOPPED,
+ post_down_flags);
+
+ // Start group -> start member -> group is started
+ pcmk__order_starts(member->parent, member, pe_order_implies_first_printed);
+ pcmk__order_resource_actions(member, RSC_START, member->parent, RSC_STARTED,
+ pe_order_runnable_left
+ |pe_order_implies_then
+ |pe_order_implies_then_printed);
+
+ if (!member_data->ordered) {
+ pcmk__order_starts(member->parent, member,
+ pe_order_implies_then
+ |pe_order_runnable_left
+ |pe_order_implies_first_printed);
+ if (member_data->promotable) {
+ pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member,
+ RSC_PROMOTE,
+ pe_order_implies_then
+ |pe_order_runnable_left
+ |pe_order_implies_first_printed);
+ }
+
+ } else if (member_data->previous_member == NULL) {
+ pcmk__order_starts(member->parent, member, pe_order_none);
+ if (member_data->promotable) {
+ pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member,
+ RSC_PROMOTE, pe_order_none);
+ }
+
+ } else {
+ // Order this member relative to the previous one
+
+ pcmk__order_starts(member_data->previous_member, member,
+ pe_order_implies_then|pe_order_runnable_left);
+ pcmk__order_stops(member, member_data->previous_member,
+ pe_order_optional|pe_order_restart);
+
+ /* In unusual circumstances (such as adding a new member to the middle
+ * of a group with unmanaged later members), this member may be active
+ * while the previous (new) member is inactive. In this situation, the
+ * usual restart orderings will be irrelevant, so we need to order this
+ * member's stop before the previous member's start.
+ */
+ if ((member->running_on != NULL)
+ && (member_data->previous_member->running_on == NULL)) {
+ pcmk__order_resource_actions(member, RSC_STOP,
+ member_data->previous_member, RSC_START,
+ pe_order_implies_first
+ |pe_order_runnable_left);
+ }
+
+ if (member_data->promotable) {
+ pcmk__order_resource_actions(member_data->previous_member,
+ RSC_PROMOTE, member, RSC_PROMOTE,
+ pe_order_implies_then
+ |pe_order_runnable_left);
+ pcmk__order_resource_actions(member, RSC_DEMOTE,
+ member_data->previous_member,
+ RSC_DEMOTE, pe_order_optional);
+ }
+ }
+
+ // Make sure partially active groups shut down in sequence
+ if (member->running_on != NULL) {
+ if (member_data->ordered && (member_data->previous_member != NULL)
+ && (member_data->previous_member->running_on == NULL)
+ && (member_data->last_active != NULL)
+ && (member_data->last_active->running_on != NULL)) {
+ pcmk__order_stops(member, member_data->last_active, pe_order_optional);
+ }
+ member_data->last_active = member;
+ }
+
+ member_data->previous_member = member;
+}
+
+/*!
+ * \internal
+ * \brief Create implicit constraints needed for a group resource
+ *
+ * \param[in,out] rsc Group resource to create implicit constraints for
+ */
+void
+pcmk__group_internal_constraints(pe_resource_t *rsc)
+{
+ struct member_data member_data = { false, };
+
+ CRM_ASSERT(rsc != NULL);
+
+ /* Order group pseudo-actions relative to each other for restarting:
+ * stop group -> group is stopped -> start group -> group is started
+ */
+ pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
+ pe_order_runnable_left);
+ pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
+ pe_order_runnable_left);
+
+ member_data.ordered = pe__group_flag_is_set(rsc, pe__group_ordered);
+ member_data.colocated = pe__group_flag_is_set(rsc, pe__group_colocated);
+ member_data.promotable = pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
+ pe_rsc_promotable);
+ g_list_foreach(rsc->children, member_internal_constraints, &member_data);
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint for a group with some other resource, apply the
+ * score to the dependent's allowed node weights (if we are still placing
+ * resources) or priority (if we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent group resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ */
+static void
+colocate_group_with(pe_resource_t *dependent, const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation)
+{
+ pe_resource_t *member = NULL;
+
+ if (dependent->children == NULL) {
+ return;
+ }
+
+ pe_rsc_trace(primary, "Processing %s (group %s with %s) for dependent",
+ colocation->id, dependent->id, primary->id);
+
+ if (pe__group_flag_is_set(dependent, pe__group_colocated)) {
+ // Colocate first member (internal colocations will handle the rest)
+ member = (pe_resource_t *) dependent->children->data;
+ member->cmds->apply_coloc_score(member, primary, colocation, true);
+ return;
+ }
+
+ if (colocation->score >= INFINITY) {
+ pcmk__config_err("%s: Cannot perform mandatory colocation between "
+ "non-colocated group and %s",
+ dependent->id, primary->id);
+ return;
+ }
+
+ // Colocate each member individually
+ for (GList *iter = dependent->children; iter != NULL; iter = iter->next) {
+ member = (pe_resource_t *) iter->data;
+ member->cmds->apply_coloc_score(member, primary, colocation, true);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint for some other resource with a group, apply the
+ * score to the dependent's allowed node weights (if we are still placing
+ * resources) or priority (if we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary group resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ */
+static void
+colocate_with_group(pe_resource_t *dependent, const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation)
+{
+ pe_resource_t *member = NULL;
+
+ pe_rsc_trace(primary,
+ "Processing colocation %s (%s with group %s) for primary",
+ colocation->id, dependent->id, primary->id);
+
+ if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ return;
+ }
+
+ if (pe__group_flag_is_set(primary, pe__group_colocated)) {
+
+ if (colocation->score >= INFINITY) {
+ /* For mandatory colocations, the entire group must be assignable
+ * (and in the specified role if any), so apply the colocation based
+ * on the last member.
+ */
+ member = pe__last_group_member(primary);
+ } else if (primary->children != NULL) {
+ /* For optional colocations, whether the group is partially or fully
+ * up doesn't matter, so apply the colocation based on the first
+ * member.
+ */
+ member = (pe_resource_t *) primary->children->data;
+ }
+ if (member == NULL) {
+ return; // Nothing to colocate with
+ }
+
+ member->cmds->apply_coloc_score(dependent, member, colocation, false);
+ return;
+ }
+
+ if (colocation->score >= INFINITY) {
+ pcmk__config_err("%s: Cannot perform mandatory colocation with"
+ " non-colocated group %s",
+ dependent->id, primary->id);
+ return;
+ }
+
+ // Colocate dependent with each member individually
+ for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
+ member = (pe_resource_t *) iter->data;
+ member->cmds->apply_coloc_score(dependent, member, colocation, false);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint, apply its score to the dependent's
+ * allowed node weights (if we are still placing resources) or priority (if
+ * we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ * \param[in] for_dependent true if called on behalf of dependent
+ */
+void
+pcmk__group_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent)
+{
+ CRM_ASSERT((dependent != NULL) && (primary != NULL)
+ && (colocation != NULL));
+
+ if (for_dependent) {
+ colocate_group_with(dependent, primary, colocation);
+
+ } else {
+ // Method should only be called for primitive dependents
+ CRM_ASSERT(dependent->variant == pe_native);
+
+ colocate_with_group(dependent, primary, colocation);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return action flags for a given group resource action
+ *
+ * \param[in,out] action Group action to get flags for
+ * \param[in] node If not NULL, limit effects to this node
+ *
+ * \return Flags appropriate to \p action on \p node
+ */
+enum pe_action_flags
+pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node)
+{
+ // Default flags for a group action
+ enum pe_action_flags flags = pe_action_optional
+ |pe_action_runnable
+ |pe_action_pseudo;
+
+ CRM_ASSERT(action != NULL);
+
+ // Update flags considering each member's own flags for same action
+ for (GList *iter = action->rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+
+ // Check whether member has the same action
+ enum action_tasks task = get_complex_task(member, action->task);
+ const char *task_s = task2text(task);
+ pe_action_t *member_action = find_first_action(member->actions, NULL,
+ task_s, node);
+
+ if (member_action != NULL) {
+ enum pe_action_flags member_flags;
+
+ member_flags = member->cmds->action_flags(member_action, node);
+
+ // Group action is mandatory if any member action is
+ if (pcmk_is_set(flags, pe_action_optional)
+ && !pcmk_is_set(member_flags, pe_action_optional)) {
+ pe_rsc_trace(action->rsc, "%s is mandatory because %s is",
+ action->uuid, member_action->uuid);
+ pe__clear_raw_action_flags(flags, "group action",
+ pe_action_optional);
+ pe__clear_action_flags(action, pe_action_optional);
+ }
+
+ // Group action is unrunnable if any member action is
+ if (!pcmk__str_eq(task_s, action->task, pcmk__str_none)
+ && pcmk_is_set(flags, pe_action_runnable)
+ && !pcmk_is_set(member_flags, pe_action_runnable)) {
+
+ pe_rsc_trace(action->rsc, "%s is unrunnable because %s is",
+ action->uuid, member_action->uuid);
+ pe__clear_raw_action_flags(flags, "group action",
+ pe_action_runnable);
+ pe__clear_action_flags(action, pe_action_runnable);
+ }
+
+ /* Group (pseudo-)actions other than stop or demote are unrunnable
+ * unless every member will do it.
+ */
+ } else if ((task != stop_rsc) && (task != action_demote)) {
+ pe_rsc_trace(action->rsc,
+ "%s is not runnable because %s will not %s",
+ action->uuid, member->id, task_s);
+ pe__clear_raw_action_flags(flags, "group action",
+ pe_action_runnable);
+ }
+ }
+
+ return flags;
+}
+
+/*!
+ * \internal
+ * \brief Update two actions according to an ordering between them
+ *
+ * Given information about an ordering of two actions, update the actions' flags
+ * (and runnable_before members if appropriate) as appropriate for the ordering.
+ * Effects may cascade to other orderings involving the actions as well.
+ *
+ * \param[in,out] first 'First' action in an ordering
+ * \param[in,out] then 'Then' action in an ordering
+ * \param[in] node If not NULL, limit scope of ordering to this node
+ * (only used when interleaving instances)
+ * \param[in] flags Action flags for \p first for ordering purposes
+ * \param[in] filter Action flags to limit scope of certain updates (may
+ * include pe_action_optional to affect only mandatory
+ * actions, and pe_action_runnable to affect only
+ * runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+uint32_t
+pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set)
+{
+ uint32_t changed = pcmk__updated_none;
+
+ CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
+
+ // Group method can be called only for group action as "then" action
+ CRM_ASSERT(then->rsc != NULL);
+
+ // Update the actions for the group itself
+ changed |= pcmk__update_ordered_actions(first, then, node, flags, filter,
+ type, data_set);
+
+ // Update the actions for each group member
+ for (GList *iter = then->rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+
+ pe_action_t *member_action = find_first_action(member->actions, NULL,
+ then->task, node);
+
+ if (member_action != NULL) {
+ changed |= member->cmds->update_ordered_actions(first,
+ member_action, node,
+ flags, filter, type,
+ data_set);
+ }
+ }
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Apply a location constraint to a group's allowed node scores
+ *
+ * \param[in,out] rsc Group resource to apply constraint to
+ * \param[in,out] location Location constraint to apply
+ */
+void
+pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location)
+{
+ GList *node_list_orig = NULL;
+ GList *node_list_copy = NULL;
+ bool reset_scores = true;
+
+ CRM_ASSERT((rsc != NULL) && (location != NULL));
+
+ node_list_orig = location->node_list_rh;
+ node_list_copy = pcmk__copy_node_list(node_list_orig, true);
+ reset_scores = pe__group_flag_is_set(rsc, pe__group_colocated);
+
+ // Apply the constraint for the group itself (updates node scores)
+ pcmk__apply_location(rsc, location);
+
+ // Apply the constraint for each member
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+
+ member->cmds->apply_location(member, location);
+
+ if (reset_scores) {
+ /* The first member of colocated groups needs to use the original
+ * node scores, but subsequent members should work on a copy, since
+ * the first member's scores already incorporate theirs.
+ */
+ reset_scores = false;
+ location->node_list_rh = node_list_copy;
+ }
+ }
+
+ location->node_list_rh = node_list_orig;
+ g_list_free_full(node_list_copy, free);
+}
+
+// Group implementation of resource_alloc_functions_t:colocated_resources()
+GList *
+pcmk__group_colocated_resources(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc,
+ GList *colocated_rscs)
+{
+ const pe_resource_t *member = NULL;
+
+ CRM_ASSERT(rsc != NULL);
+
+ if (orig_rsc == NULL) {
+ orig_rsc = rsc;
+ }
+
+ if (pe__group_flag_is_set(rsc, pe__group_colocated)
+ || pe_rsc_is_clone(rsc->parent)) {
+ /* This group has colocated members and/or is cloned -- either way,
+ * add every child's colocated resources to the list. The first and last
+ * members will include the group's own colocations.
+ */
+ colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc);
+ for (const GList *iter = rsc->children;
+ iter != NULL; iter = iter->next) {
+
+ member = (const pe_resource_t *) iter->data;
+ colocated_rscs = member->cmds->colocated_resources(member, orig_rsc,
+ colocated_rscs);
+ }
+
+ } else if (rsc->children != NULL) {
+ /* This group's members are not colocated, and the group is not cloned,
+ * so just add the group's own colocations to the list.
+ */
+ colocated_rscs = pcmk__colocated_resources(rsc, orig_rsc, colocated_rscs);
+ }
+
+ return colocated_rscs;
+}
+
+// Group implementation of resource_alloc_functions_t:with_this_colocations()
+void
+pcmk__with_group_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+
+{
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_group)
+ && (orig_rsc != NULL) && (list != NULL),
+ return);
+
+ // Ignore empty groups
+ if (rsc->children == NULL) {
+ return;
+ }
+
+ /* "With this" colocations are needed only for the group itself and for its
+ * last member. Add the group's colocations plus any relevant
+ * parent colocations if cloned.
+ */
+ if ((rsc == orig_rsc) || (orig_rsc == pe__last_group_member(rsc))) {
+ crm_trace("Adding 'with %s' colocations to list for %s",
+ rsc->id, orig_rsc->id);
+ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs);
+ if (rsc->parent != NULL) { // Cloned group
+ rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc,
+ list);
+ }
+ }
+}
+
+// Group implementation of resource_alloc_functions_t:this_with_colocations()
+void
+pcmk__group_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_group)
+ && (orig_rsc != NULL) && (list != NULL),
+ return);
+
+ // Ignore empty groups
+ if (rsc->children == NULL) {
+ return;
+ }
+
+ /* Colocations for the group itself, or for its first member, consist of the
+ * group's colocations plus any relevant parent colocations if cloned.
+ */
+ if ((rsc == orig_rsc)
+ || (orig_rsc == (const pe_resource_t *) rsc->children->data)) {
+ crm_trace("Adding '%s with' colocations to list for %s",
+ rsc->id, orig_rsc->id);
+ pcmk__add_this_with_list(list, rsc->rsc_cons);
+ if (rsc->parent != NULL) { // Cloned group
+ rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc,
+ list);
+ }
+ return;
+ }
+
+ /* Later group members honor the group's colocations indirectly, due to the
+ * internal group colocations that chain everything from the first member.
+ * However, if an earlier group member is unmanaged, this chaining will not
+ * happen, so the group's mandatory colocations must be explicitly added.
+ */
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ const pe_resource_t *member = (const pe_resource_t *) iter->data;
+
+ if (orig_rsc == member) {
+ break; // We've seen all earlier members, and none are unmanaged
+ }
+
+ if (!pcmk_is_set(member->flags, pe_rsc_managed)) {
+ crm_trace("Adding mandatory '%s with' colocations to list for "
+ "member %s because earlier member %s is unmanaged",
+ rsc->id, orig_rsc->id, member->id);
+ for (const GList *cons_iter = rsc->rsc_cons; cons_iter != NULL;
+ cons_iter = cons_iter->next) {
+ const pcmk__colocation_t *colocation = NULL;
+
+ colocation = (const pcmk__colocation_t *) cons_iter->data;
+ if (colocation->score == INFINITY) {
+ pcmk__add_this_with(list, colocation);
+ }
+ }
+ // @TODO Add mandatory (or all?) clone constraints if cloned
+ break;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update nodes with scores of colocated resources' nodes
+ *
+ * Given a table of nodes and a resource, update the nodes' scores with the
+ * scores of the best nodes matching the attribute used for each of the
+ * resource's relevant colocations.
+ *
+ * \param[in,out] rsc Resource to check colocations for
+ * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID)
+ * \param[in,out] nodes Nodes to update
+ * \param[in] attr Colocation attribute (NULL to use default)
+ * \param[in] factor Incorporate scores multiplied by this factor
+ * \param[in] flags Bitmask of enum pcmk__coloc_select values
+ *
+ * \note The caller remains responsible for freeing \p *nodes.
+ */
+void
+pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
+ GHashTable **nodes, const char *attr,
+ float factor, uint32_t flags)
+{
+ pe_resource_t *member = NULL;
+
+ CRM_CHECK((rsc != NULL) && (nodes != NULL), return);
+
+ if (log_id == NULL) {
+ log_id = rsc->id;
+ }
+
+ // Avoid infinite recursion
+ if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
+ pe_rsc_info(rsc, "%s: Breaking dependency loop at %s",
+ log_id, rsc->id);
+ return;
+ }
+ pe__set_resource_flags(rsc, pe_rsc_merging);
+
+ // Ignore empty groups (only possible with schema validation disabled)
+ if (rsc->children == NULL) {
+ return;
+ }
+
+ /* Refer the operation to the first or last member as appropriate.
+ *
+ * cmp_resources() is the only caller that passes a NULL nodes table,
+ * and is also the only caller using pcmk__coloc_select_this_with.
+ * For "this with" colocations, the last member will recursively incorporate
+ * all the other members' "this with" colocations via the internal group
+ * colocations (and via the first member, the group's own colocations).
+ *
+ * For "with this" colocations, the first member works similarly.
+ */
+ if (*nodes == NULL) {
+ member = pe__last_group_member(rsc);
+ } else {
+ member = rsc->children->data;
+ }
+ pe_rsc_trace(rsc, "%s: Merging scores from group %s using member %s "
+ "(at %.6f)", log_id, rsc->id, member->id, factor);
+ member->cmds->add_colocated_node_scores(member, log_id, nodes, attr, factor,
+ flags);
+ pe__clear_resource_flags(rsc, pe_rsc_merging);
+}
+
+// Group implementation of resource_alloc_functions_t:add_utilization()
+void
+pcmk__group_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization)
+{
+ pe_resource_t *member = NULL;
+
+ CRM_ASSERT((rsc != NULL) && (orig_rsc != NULL) && (utilization != NULL));
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return;
+ }
+
+ pe_rsc_trace(orig_rsc, "%s: Adding group %s as colocated utilization",
+ orig_rsc->id, rsc->id);
+ if (pe__group_flag_is_set(rsc, pe__group_colocated)
+ || pe_rsc_is_clone(rsc->parent)) {
+ // Every group member will be on same node, so sum all members
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ member = (pe_resource_t *) iter->data;
+
+ if (pcmk_is_set(member->flags, pe_rsc_provisional)
+ && (g_list_find(all_rscs, member) == NULL)) {
+ member->cmds->add_utilization(member, orig_rsc, all_rscs,
+ utilization);
+ }
+ }
+
+ } else if (rsc->children != NULL) {
+ // Just add first member's utilization
+ member = (pe_resource_t *) rsc->children->data;
+ if ((member != NULL)
+ && pcmk_is_set(member->flags, pe_rsc_provisional)
+ && (g_list_find(all_rscs, member) == NULL)) {
+
+ member->cmds->add_utilization(member, orig_rsc, all_rscs,
+ utilization);
+ }
+ }
+}
+
+// Group implementation of resource_alloc_functions_t:shutdown_lock()
+void
+pcmk__group_shutdown_lock(pe_resource_t *rsc)
+{
+ CRM_ASSERT(rsc != NULL);
+
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *member = (pe_resource_t *) iter->data;
+
+ member->cmds->shutdown_lock(member);
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c
new file mode 100644
index 0000000..c880196
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_instances.c
@@ -0,0 +1,1659 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+/* This file is intended for code usable with both clone instances and bundle
+ * replica containers.
+ */
+
+#include <crm_internal.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Check whether a clone or bundle has instances for all available nodes
+ *
+ * \param[in] collective Clone or bundle to check
+ *
+ * \return true if \p collective has enough instances for all of its available
+ * allowed nodes, otherwise false
+ */
+static bool
+can_run_everywhere(const pe_resource_t *collective)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+ int available_nodes = 0;
+ int max_instances = 0;
+
+ switch (collective->variant) {
+ case pe_clone:
+ max_instances = pe__clone_max(collective);
+ break;
+ case pe_container:
+ max_instances = pe__bundle_max(collective);
+ break;
+ default:
+ return false; // Not actually possible
+ }
+
+ g_hash_table_iter_init(&iter, collective->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ if (pcmk__node_available(node, false, false)
+ && (max_instances < ++available_nodes)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is allowed to run an instance
+ *
+ * \param[in] instance Clone instance or bundle container to check
+ * \param[in] node Node to check
+ * \param[in] max_per_node Maximum number of instances allowed to run on a node
+ *
+ * \return true if \p node is allowed to run \p instance, otherwise false
+ */
+static bool
+can_run_instance(const pe_resource_t *instance, const pe_node_t *node,
+ int max_per_node)
+{
+ pe_node_t *allowed_node = NULL;
+
+ if (pcmk_is_set(instance->flags, pe_rsc_orphan)) {
+ pe_rsc_trace(instance, "%s cannot run on %s: orphaned",
+ instance->id, pe__node_name(node));
+ return false;
+ }
+
+ if (!pcmk__node_available(node, false, false)) {
+ pe_rsc_trace(instance,
+ "%s cannot run on %s: node cannot run resources",
+ instance->id, pe__node_name(node));
+ return false;
+ }
+
+ allowed_node = pcmk__top_allowed_node(instance, node);
+ if (allowed_node == NULL) {
+ crm_warn("%s cannot run on %s: node not allowed",
+ instance->id, pe__node_name(node));
+ return false;
+ }
+
+ if (allowed_node->weight < 0) {
+ pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there",
+ instance->id, pe__node_name(node),
+ pcmk_readable_score(allowed_node->weight));
+ return false;
+ }
+
+ if (allowed_node->count >= max_per_node) {
+ pe_rsc_trace(instance,
+ "%s cannot run on %s: node already has %d instance%s",
+ instance->id, pe__node_name(node), max_per_node,
+ pcmk__plural_s(max_per_node));
+ return false;
+ }
+
+ pe_rsc_trace(instance, "%s can run on %s (%d already running)",
+ instance->id, pe__node_name(node), allowed_node->count);
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Ban a clone instance or bundle replica from unavailable allowed nodes
+ *
+ * \param[in,out] instance Clone instance or bundle replica to ban
+ * \param[in] max_per_node Maximum instances allowed to run on a node
+ */
+static void
+ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node)
+{
+ if (instance->allowed_nodes != NULL) {
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ g_hash_table_iter_init(&iter, instance->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (!can_run_instance(instance, node, max_per_node)) {
+ pe_rsc_trace(instance, "Banning %s from unavailable node %s",
+ instance->id, pe__node_name(node));
+ node->weight = -INFINITY;
+ for (GList *child_iter = instance->children;
+ child_iter != NULL; child_iter = child_iter->next) {
+ pe_resource_t *child = (pe_resource_t *) child_iter->data;
+ pe_node_t *child_node = NULL;
+
+ child_node = pe_hash_table_lookup(child->allowed_nodes,
+ node->details->id);
+ if (child_node != NULL) {
+ pe_rsc_trace(instance,
+ "Banning %s child %s "
+ "from unavailable node %s",
+ instance->id, child->id,
+ pe__node_name(node));
+ child_node->weight = -INFINITY;
+ }
+ }
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a hash table with a single node in it
+ *
+ * \param[in] node Node to copy into new table
+ *
+ * \return Newly created hash table containing a copy of \p node
+ * \note The caller is responsible for freeing the result with
+ * g_hash_table_destroy().
+ */
+static GHashTable *
+new_node_table(pe_node_t *node)
+{
+ GHashTable *table = pcmk__strkey_table(NULL, free);
+
+ node = pe__copy_node(node);
+ g_hash_table_insert(table, (gpointer) node->details->id, node);
+ return table;
+}
+
+/*!
+ * \internal
+ * \brief Apply a resource's parent's colocation scores to a node table
+ *
+ * \param[in] rsc Resource whose colocations should be applied
+ * \param[in,out] nodes Node table to apply colocations to
+ */
+static void
+apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
+{
+ GList *iter = NULL;
+ pcmk__colocation_t *colocation = NULL;
+ pe_resource_t *other = NULL;
+ float factor = 0.0;
+
+ /* Because the this_with_colocations() and with_this_colocations() methods
+ * boil down to copies of rsc_cons and rsc_cons_lhs for clones and bundles,
+ * we can use those here directly for efficiency.
+ */
+ for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
+ colocation = (pcmk__colocation_t *) iter->data;
+ other = colocation->primary;
+ factor = colocation->score / (float) INFINITY,
+ other->cmds->add_colocated_node_scores(other, rsc->id, nodes,
+ colocation->node_attribute,
+ factor,
+ pcmk__coloc_select_default);
+ }
+ for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
+ colocation = (pcmk__colocation_t *) iter->data;
+ if (!pcmk__colocation_has_influence(colocation, rsc)) {
+ continue;
+ }
+ other = colocation->dependent;
+ factor = colocation->score / (float) INFINITY,
+ other->cmds->add_colocated_node_scores(other, rsc->id, nodes,
+ colocation->node_attribute,
+ factor,
+ pcmk__coloc_select_nonnegative);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Compare clone or bundle instances based on colocation scores
+ *
+ * Determine the relative order in which two clone or bundle instances should be
+ * assigned to nodes, considering the scores of colocation constraints directly
+ * or indirectly involving them.
+ *
+ * \param[in] instance1 First instance to compare
+ * \param[in] instance2 Second instance to compare
+ *
+ * \return A negative number if \p instance1 should be assigned first,
+ * a positive number if \p instance2 should be assigned first,
+ * or 0 if assignment order doesn't matter
+ */
+static int
+cmp_instance_by_colocation(const pe_resource_t *instance1,
+ const pe_resource_t *instance2)
+{
+ int rc = 0;
+ pe_node_t *node1 = NULL;
+ pe_node_t *node2 = NULL;
+ pe_node_t *current_node1 = pe__current_node(instance1);
+ pe_node_t *current_node2 = pe__current_node(instance2);
+ GHashTable *colocated_scores1 = NULL;
+ GHashTable *colocated_scores2 = NULL;
+
+ CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
+ && (instance2 != NULL) && (instance2->parent != NULL)
+ && (current_node1 != NULL) && (current_node2 != NULL));
+
+ // Create node tables initialized with each node
+ colocated_scores1 = new_node_table(current_node1);
+ colocated_scores2 = new_node_table(current_node2);
+
+ // Apply parental colocations
+ apply_parent_colocations(instance1, &colocated_scores1);
+ apply_parent_colocations(instance2, &colocated_scores2);
+
+ // Find original nodes again, with scores updated for colocations
+ node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
+ node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
+
+ // Compare nodes by updated scores
+ if (node1->weight < node2->weight) {
+ crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
+ instance1->id, node1->weight, pe__node_name(node1),
+ instance2->id, node2->weight, pe__node_name(node2));
+ rc = 1;
+
+ } else if (node1->weight > node2->weight) {
+ crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
+ instance1->id, node1->weight, pe__node_name(node1),
+ instance2->id, node2->weight, pe__node_name(node2));
+ rc = -1;
+ }
+
+ g_hash_table_destroy(colocated_scores1);
+ g_hash_table_destroy(colocated_scores2);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource or any of its children are failed
+ *
+ * \param[in] rsc Resource to check
+ *
+ * \return true if \p rsc or any of its children are failed, otherwise false
+ */
+static bool
+did_fail(const pe_resource_t *rsc)
+{
+ if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ return true;
+ }
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ if (did_fail((const pe_resource_t *) iter->data)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is allowed to run a resource
+ *
+ * \param[in] rsc Resource to check
+ * \param[in,out] node Node to check (will be set NULL if not allowed)
+ *
+ * \return true if *node is either NULL or allowed for \p rsc, otherwise false
+ */
+static bool
+node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
+{
+ if (*node != NULL) {
+ pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
+ (*node)->details->id);
+ if ((allowed == NULL) || (allowed->weight < 0)) {
+ pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
+ rsc->id, pe__node_name(*node));
+ *node = NULL;
+ return false;
+ }
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Compare two clone or bundle instances' instance numbers
+ *
+ * \param[in] a First instance to compare
+ * \param[in] b Second instance to compare
+ *
+ * \return A negative number if \p a's instance number is lower,
+ * a positive number if \p b's instance number is lower,
+ * or 0 if their instance numbers are the same
+ */
+gint
+pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
+{
+ const pe_resource_t *instance1 = (const pe_resource_t *) a;
+ const pe_resource_t *instance2 = (const pe_resource_t *) b;
+ char *div1 = NULL;
+ char *div2 = NULL;
+
+ CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
+
+ // Clone numbers are after a colon, bundle numbers after a dash
+ div1 = strrchr(instance1->id, ':');
+ if (div1 == NULL) {
+ div1 = strrchr(instance1->id, '-');
+ }
+ div2 = strrchr(instance2->id, ':');
+ if (div2 == NULL) {
+ div2 = strrchr(instance2->id, '-');
+ }
+ CRM_ASSERT((div1 != NULL) && (div2 != NULL));
+
+ return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
+}
+
+/*!
+ * \internal
+ * \brief Compare clone or bundle instances according to assignment order
+ *
+ * Compare two clone or bundle instances according to the order they should be
+ * assigned to nodes, preferring (in order):
+ *
+ * - Active instance that is less multiply active
+ * - Instance that is not active on a disallowed node
+ * - Instance with higher configured priority
+ * - Active instance whose current node can run resources
+ * - Active instance whose parent is allowed on current node
+ * - Active instance whose current node has fewer other instances
+ * - Active instance
+ * - Instance that isn't failed
+ * - Instance whose colocations result in higher score on current node
+ * - Instance with lower ID in lexicographic order
+ *
+ * \param[in] a First instance to compare
+ * \param[in] b Second instance to compare
+ *
+ * \return A negative number if \p a should be assigned first,
+ * a positive number if \p b should be assigned first,
+ * or 0 if assignment order doesn't matter
+ */
+gint
+pcmk__cmp_instance(gconstpointer a, gconstpointer b)
+{
+ int rc = 0;
+ pe_node_t *node1 = NULL;
+ pe_node_t *node2 = NULL;
+ unsigned int nnodes1 = 0;
+ unsigned int nnodes2 = 0;
+
+ bool can1 = true;
+ bool can2 = true;
+
+ const pe_resource_t *instance1 = (const pe_resource_t *) a;
+ const pe_resource_t *instance2 = (const pe_resource_t *) b;
+
+ CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
+
+ node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
+ node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
+
+ /* If both instances are running and at least one is multiply
+ * active, prefer instance that's running on fewer nodes.
+ */
+ if ((nnodes1 > 0) && (nnodes2 > 0)) {
+ if (nnodes1 < nnodes2) {
+ crm_trace("Assign %s (active on %d) before %s (active on %d): "
+ "less multiply active",
+ instance1->id, nnodes1, instance2->id, nnodes2);
+ return -1;
+
+ } else if (nnodes1 > nnodes2) {
+ crm_trace("Assign %s (active on %d) after %s (active on %d): "
+ "more multiply active",
+ instance1->id, nnodes1, instance2->id, nnodes2);
+ return 1;
+ }
+ }
+
+ /* An instance that is either inactive or active on an allowed node is
+ * preferred over an instance that is active on a no-longer-allowed node.
+ */
+ can1 = node_is_allowed(instance1, &node1);
+ can2 = node_is_allowed(instance2, &node2);
+ if (can1 && !can2) {
+ crm_trace("Assign %s before %s: not active on a disallowed node",
+ instance1->id, instance2->id);
+ return -1;
+
+ } else if (!can1 && can2) {
+ crm_trace("Assign %s after %s: active on a disallowed node",
+ instance1->id, instance2->id);
+ return 1;
+ }
+
+ // Prefer instance with higher configured priority
+ if (instance1->priority > instance2->priority) {
+ crm_trace("Assign %s before %s: priority (%d > %d)",
+ instance1->id, instance2->id,
+ instance1->priority, instance2->priority);
+ return -1;
+
+ } else if (instance1->priority < instance2->priority) {
+ crm_trace("Assign %s after %s: priority (%d < %d)",
+ instance1->id, instance2->id,
+ instance1->priority, instance2->priority);
+ return 1;
+ }
+
+ // Prefer active instance
+ if ((node1 == NULL) && (node2 == NULL)) {
+ crm_trace("No assignment preference for %s vs. %s: inactive",
+ instance1->id, instance2->id);
+ return 0;
+
+ } else if (node1 == NULL) {
+ crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
+ return 1;
+
+ } else if (node2 == NULL) {
+ crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
+ return -1;
+ }
+
+ // Prefer instance whose current node can run resources
+ can1 = pcmk__node_available(node1, false, false);
+ can2 = pcmk__node_available(node2, false, false);
+ if (can1 && !can2) {
+ crm_trace("Assign %s before %s: current node can run resources",
+ instance1->id, instance2->id);
+ return -1;
+
+ } else if (!can1 && can2) {
+ crm_trace("Assign %s after %s: current node can't run resources",
+ instance1->id, instance2->id);
+ return 1;
+ }
+
+ // Prefer instance whose parent is allowed to run on instance's current node
+ node1 = pcmk__top_allowed_node(instance1, node1);
+ node2 = pcmk__top_allowed_node(instance2, node2);
+ if ((node1 == NULL) && (node2 == NULL)) {
+ crm_trace("No assignment preference for %s vs. %s: "
+ "parent not allowed on either instance's current node",
+ instance1->id, instance2->id);
+ return 0;
+
+ } else if (node1 == NULL) {
+ crm_trace("Assign %s after %s: parent not allowed on current node",
+ instance1->id, instance2->id);
+ return 1;
+
+ } else if (node2 == NULL) {
+ crm_trace("Assign %s before %s: parent allowed on current node",
+ instance1->id, instance2->id);
+ return -1;
+ }
+
+ // Prefer instance whose current node is running fewer other instances
+ if (node1->count < node2->count) {
+ crm_trace("Assign %s before %s: fewer active instances on current node",
+ instance1->id, instance2->id);
+ return -1;
+
+ } else if (node1->count > node2->count) {
+ crm_trace("Assign %s after %s: more active instances on current node",
+ instance1->id, instance2->id);
+ return 1;
+ }
+
+ // Prefer instance that isn't failed
+ can1 = did_fail(instance1);
+ can2 = did_fail(instance2);
+ if (!can1 && can2) {
+ crm_trace("Assign %s before %s: not failed",
+ instance1->id, instance2->id);
+ return -1;
+ } else if (can1 && !can2) {
+ crm_trace("Assign %s after %s: failed",
+ instance1->id, instance2->id);
+ return 1;
+ }
+
+ // Prefer instance with higher cumulative colocation score on current node
+ rc = cmp_instance_by_colocation(instance1, instance2);
+ if (rc != 0) {
+ return rc;
+ }
+
+ // Prefer instance with lower instance number
+ rc = pcmk__cmp_instance_number(instance1, instance2);
+ if (rc < 0) {
+ crm_trace("Assign %s before %s: instance number",
+ instance1->id, instance2->id);
+ } else if (rc > 0) {
+ crm_trace("Assign %s after %s: instance number",
+ instance1->id, instance2->id);
+ } else {
+ crm_trace("No assignment preference for %s vs. %s",
+ instance1->id, instance2->id);
+ }
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Choose a node for an instance
+ *
+ * \param[in,out] instance Clone instance or bundle replica container
+ * \param[in] prefer If not NULL, attempt early assignment to this
+ * node, if still the best choice; otherwise,
+ * perform final assignment
+ * \param[in] max_per_node Assign at most this many instances to one node
+ *
+ * \return true if \p instance could be assigned to a node, otherwise false
+ */
+static bool
+assign_instance(pe_resource_t *instance, const pe_node_t *prefer,
+ int max_per_node)
+{
+ pe_node_t *chosen = NULL;
+ pe_node_t *allowed = NULL;
+
+ CRM_ASSERT(instance != NULL);
+ pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
+ ((prefer == NULL)? "no node" : prefer->details->uname));
+
+ if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
+ // Instance is already assigned
+ return instance->fns->location(instance, NULL, FALSE) != NULL;
+ }
+
+ if (pcmk_is_set(instance->flags, pe_rsc_allocating)) {
+ pe_rsc_debug(instance,
+ "Assignment loop detected involving %s colocations",
+ instance->id);
+ return false;
+ }
+
+ if (prefer != NULL) { // Possible early assignment to preferred node
+
+ // Get preferred node with instance's scores
+ allowed = g_hash_table_lookup(instance->allowed_nodes,
+ prefer->details->id);
+
+ if ((allowed == NULL) || (allowed->weight < 0)) {
+ pe_rsc_trace(instance,
+ "Not assigning %s to preferred node %s: unavailable",
+ instance->id, pe__node_name(prefer));
+ return false;
+ }
+ }
+
+ ban_unavailable_allowed_nodes(instance, max_per_node);
+
+ if (prefer == NULL) { // Final assignment
+ chosen = instance->cmds->assign(instance, NULL);
+
+ } else { // Possible early assignment to preferred node
+ GHashTable *backup = pcmk__copy_node_table(instance->allowed_nodes);
+
+ chosen = instance->cmds->assign(instance, prefer);
+
+ // Revert nodes if preferred node won't be assigned
+ if ((chosen != NULL) && (chosen->details != prefer->details)) {
+ crm_info("Not assigning %s to preferred node %s: %s is better",
+ instance->id, pe__node_name(prefer),
+ pe__node_name(chosen));
+ g_hash_table_destroy(instance->allowed_nodes);
+ instance->allowed_nodes = backup;
+ pcmk__unassign_resource(instance);
+ chosen = NULL;
+ } else if (backup != NULL) {
+ g_hash_table_destroy(backup);
+ }
+ }
+
+ // The parent tracks how many instances have been assigned to each node
+ if (chosen != NULL) {
+ allowed = pcmk__top_allowed_node(instance, chosen);
+ if (allowed == NULL) {
+ /* The instance is allowed on the node, but its parent isn't. This
+ * shouldn't be possible if the resource is managed, and we won't be
+ * able to limit the number of instances assigned to the node.
+ */
+ CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed));
+
+ } else {
+ allowed->count++;
+ }
+ }
+ return chosen != NULL;
+}
+
+/*!
+ * \internal
+ * \brief Reset the node counts of a resource's allowed nodes to zero
+ *
+ * \param[in,out] rsc Resource to reset
+ *
+ * \return Number of nodes that are available to run resources
+ */
+static unsigned int
+reset_allowed_node_counts(pe_resource_t *rsc)
+{
+ unsigned int available_nodes = 0;
+ pe_node_t *node = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ node->count = 0;
+ if (pcmk__node_available(node, false, false)) {
+ available_nodes++;
+ }
+ }
+ return available_nodes;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an instance has a preferred node
+ *
+ * \param[in] rsc Clone or bundle being assigned (for logs only)
+ * \param[in] instance Clone instance or bundle replica container
+ * \param[in] optimal_per_node Optimal number of instances per node
+ *
+ * \return Instance's current node if still available, otherwise NULL
+ */
+static const pe_node_t *
+preferred_node(const pe_resource_t *rsc, const pe_resource_t *instance,
+ int optimal_per_node)
+{
+ const pe_node_t *node = NULL;
+ const pe_node_t *parent_node = NULL;
+
+ // Check whether instance is active, healthy, and not yet assigned
+ if ((instance->running_on == NULL)
+ || !pcmk_is_set(instance->flags, pe_rsc_provisional)
+ || pcmk_is_set(instance->flags, pe_rsc_failed)) {
+ return NULL;
+ }
+
+ // Check whether instance's current node can run resources
+ node = pe__current_node(instance);
+ if (!pcmk__node_available(node, true, false)) {
+ pe_rsc_trace(rsc, "Not assigning %s to %s early (unavailable)",
+ instance->id, pe__node_name(node));
+ return NULL;
+ }
+
+ // Check whether node already has optimal number of instances assigned
+ parent_node = pcmk__top_allowed_node(instance, node);
+ if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
+ pe_rsc_trace(rsc,
+ "Not assigning %s to %s early "
+ "(optimal instances already assigned)",
+ instance->id, pe__node_name(node));
+ return NULL;
+ }
+
+ return node;
+}
+
+/*!
+ * \internal
+ * \brief Assign collective instances to nodes
+ *
+ * \param[in,out] collective Clone or bundle resource being assigned
+ * \param[in,out] instances List of clone instances or bundle containers
+ * \param[in] max_total Maximum instances to assign in total
+ * \param[in] max_per_node Maximum instances to assign to any one node
+ */
+void
+pcmk__assign_instances(pe_resource_t *collective, GList *instances,
+ int max_total, int max_per_node)
+{
+ // Reuse node count to track number of assigned instances
+ unsigned int available_nodes = reset_allowed_node_counts(collective);
+
+ int optimal_per_node = 0;
+ int assigned = 0;
+ GList *iter = NULL;
+ pe_resource_t *instance = NULL;
+ const pe_node_t *current = NULL;
+
+ if (available_nodes > 0) {
+ optimal_per_node = max_total / available_nodes;
+ }
+ if (optimal_per_node < 1) {
+ optimal_per_node = 1;
+ }
+
+ pe_rsc_debug(collective,
+ "Assigning up to %d %s instance%s to up to %u node%s "
+ "(at most %d per host, %d optimal)",
+ max_total, collective->id, pcmk__plural_s(max_total),
+ available_nodes, pcmk__plural_s(available_nodes),
+ max_per_node, optimal_per_node);
+
+ // Assign as many instances as possible to their current location
+ for (iter = instances; (iter != NULL) && (assigned < max_total);
+ iter = iter->next) {
+ instance = (pe_resource_t *) iter->data;
+
+ current = preferred_node(collective, instance, optimal_per_node);
+ if ((current != NULL)
+ && assign_instance(instance, current, max_per_node)) {
+ pe_rsc_trace(collective, "Assigned %s to current node %s",
+ instance->id, pe__node_name(current));
+ assigned++;
+ }
+ }
+
+ pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node",
+ assigned, max_total, pcmk__plural_s(max_total));
+
+ for (iter = instances; iter != NULL; iter = iter->next) {
+ instance = (pe_resource_t *) iter->data;
+
+ if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
+ continue; // Already assigned
+ }
+
+ if (instance->running_on != NULL) {
+ current = pe__current_node(instance);
+ if (pcmk__top_allowed_node(instance, current) == NULL) {
+ const char *unmanaged = "";
+
+ if (!pcmk_is_set(instance->flags, pe_rsc_managed)) {
+ unmanaged = "Unmanaged resource ";
+ }
+ crm_notice("%s%s is running on %s which is no longer allowed",
+ unmanaged, instance->id, pe__node_name(current));
+ }
+ }
+
+ if (assigned >= max_total) {
+ pe_rsc_debug(collective,
+ "Not assigning %s because maximum %d instances "
+ "already assigned",
+ instance->id, max_total);
+ resource_location(instance, NULL, -INFINITY,
+ "collective_limit_reached", collective->cluster);
+
+ } else if (assign_instance(instance, NULL, max_per_node)) {
+ assigned++;
+ }
+ }
+
+ pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
+ assigned, max_total, pcmk__plural_s(max_total),
+ collective->id);
+}
+
+enum instance_state {
+ instance_starting = (1 << 0),
+ instance_stopping = (1 << 1),
+
+ /* This indicates that some instance is restarting. It's not the same as
+ * instance_starting|instance_stopping, which would indicate that some
+ * instance is starting, and some instance (not necessarily the same one) is
+ * stopping.
+ */
+ instance_restarting = (1 << 2),
+
+ instance_active = (1 << 3),
+
+ instance_all = instance_starting|instance_stopping
+ |instance_restarting|instance_active,
+};
+
+/*!
+ * \internal
+ * \brief Check whether an instance is active, starting, and/or stopping
+ *
+ * \param[in] instance Clone instance or bundle replica container
+ * \param[in,out] state Whether any instance is starting, stopping, etc.
+ */
+static void
+check_instance_state(const pe_resource_t *instance, uint32_t *state)
+{
+ const GList *iter = NULL;
+ uint32_t instance_state = 0; // State of just this instance
+
+ // No need to check further if all conditions have already been detected
+ if (pcmk_all_flags_set(*state, instance_all)) {
+ return;
+ }
+
+ // If instance is a collective (a cloned group), check its children instead
+ if (instance->variant > pe_native) {
+ for (iter = instance->children;
+ (iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
+ iter = iter->next) {
+ check_instance_state((const pe_resource_t *) iter->data, state);
+ }
+ return;
+ }
+
+ // If we get here, instance is a primitive
+
+ if (instance->running_on != NULL) {
+ instance_state |= instance_active;
+ }
+
+ // Check each of the instance's actions for runnable start or stop
+ for (iter = instance->actions;
+ (iter != NULL) && !pcmk_all_flags_set(instance_state,
+ instance_starting
+ |instance_stopping);
+ iter = iter->next) {
+
+ const pe_action_t *action = (const pe_action_t *) iter->data;
+ const bool optional = pcmk_is_set(action->flags, pe_action_optional);
+
+ if (pcmk__str_eq(RSC_START, action->task, pcmk__str_none)) {
+ if (!optional && pcmk_is_set(action->flags, pe_action_runnable)) {
+ pe_rsc_trace(instance, "Instance is starting due to %s",
+ action->uuid);
+ instance_state |= instance_starting;
+ } else {
+ pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
+ action->uuid, instance->id,
+ (optional? "optional" : "unrunnable"));
+ }
+
+ } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_none)) {
+ /* Only stop actions can be pseudo-actions for primitives. That
+ * indicates that the node they are on is being fenced, so the stop
+ * is implied rather than actually executed.
+ */
+ if (!optional
+ && pcmk_any_flags_set(action->flags,
+ pe_action_pseudo|pe_action_runnable)) {
+ pe_rsc_trace(instance, "Instance is stopping due to %s",
+ action->uuid);
+ instance_state |= instance_stopping;
+ } else {
+ pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
+ action->uuid, instance->id,
+ (optional? "optional" : "unrunnable"));
+ }
+ }
+ }
+
+ if (pcmk_all_flags_set(instance_state,
+ instance_starting|instance_stopping)) {
+ instance_state |= instance_restarting;
+ }
+ *state |= instance_state;
+}
+
+/*!
+ * \internal
+ * \brief Create actions for collective resource instances
+ *
+ * \param[in,out] collective Clone or bundle resource to create actions for
+ * \param[in,out] instances List of clone instances or bundle containers
+ */
+void
+pcmk__create_instance_actions(pe_resource_t *collective, GList *instances)
+{
+ uint32_t state = 0;
+
+ pe_action_t *stop = NULL;
+ pe_action_t *stopped = NULL;
+
+ pe_action_t *start = NULL;
+ pe_action_t *started = NULL;
+
+ pe_rsc_trace(collective, "Creating collective instance actions for %s",
+ collective->id);
+
+ // Create actions for each instance appropriate to its variant
+ for (GList *iter = instances; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+
+ instance->cmds->create_actions(instance);
+ check_instance_state(instance, &state);
+ }
+
+ // Create pseudo-actions for rsc start and started
+ start = pe__new_rsc_pseudo_action(collective, RSC_START,
+ !pcmk_is_set(state, instance_starting),
+ true);
+ started = pe__new_rsc_pseudo_action(collective, RSC_STARTED,
+ !pcmk_is_set(state, instance_starting),
+ false);
+ started->priority = INFINITY;
+ if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
+ pe__set_action_flags(started, pe_action_runnable);
+ }
+
+ // Create pseudo-actions for rsc stop and stopped
+ stop = pe__new_rsc_pseudo_action(collective, RSC_STOP,
+ !pcmk_is_set(state, instance_stopping),
+ true);
+ stopped = pe__new_rsc_pseudo_action(collective, RSC_STOPPED,
+ !pcmk_is_set(state, instance_stopping),
+ true);
+ stopped->priority = INFINITY;
+ if (!pcmk_is_set(state, instance_restarting)) {
+ pe__set_action_flags(stop, pe_action_migrate_runnable);
+ }
+
+ if (collective->variant == pe_clone) {
+ pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
+ stopped);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Get a list of clone instances or bundle replica containers
+ *
+ * \param[in] rsc Clone or bundle resource
+ *
+ * \return Clone instances if \p rsc is a clone, or a newly created list of
+ * \p rsc's replica containers if \p rsc is a bundle
+ * \note The caller must call free_instance_list() on the result when the list
+ * is no longer needed.
+ */
+static inline GList *
+get_instance_list(const pe_resource_t *rsc)
+{
+ if (rsc->variant == pe_container) {
+ return pe__bundle_containers(rsc);
+ } else {
+ return rsc->children;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Free any memory created by get_instance_list()
+ *
+ * \param[in] rsc Clone or bundle resource passed to get_instance_list()
+ * \param[in,out] list Return value of get_instance_list() for \p rsc
+ */
+static inline void
+free_instance_list(const pe_resource_t *rsc, GList *list)
+{
+ if (list != rsc->children) {
+ g_list_free(list);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether an instance is compatible with a role and node
+ *
+ * \param[in] instance Clone instance or bundle replica container
+ * \param[in] node Instance must match this node
+ * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
+ * \param[in] current If true, compare instance's original node and role,
+ * otherwise compare assigned next node and role
+ *
+ * \return true if \p instance is compatible with \p node and \p role,
+ * otherwise false
+ */
+bool
+pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node,
+ enum rsc_role_e role, bool current)
+{
+ pe_node_t *instance_node = NULL;
+
+ CRM_CHECK((instance != NULL) && (node != NULL), return false);
+
+ if ((role != RSC_ROLE_UNKNOWN)
+ && (role != instance->fns->state(instance, current))) {
+ pe_rsc_trace(instance,
+ "%s is not a compatible instance (role is not %s)",
+ instance->id, role2text(role));
+ return false;
+ }
+
+ if (!is_set_recursive(instance, pe_rsc_block, true)) {
+ // We only want instances that haven't failed
+ instance_node = instance->fns->location(instance, NULL, current);
+ }
+
+ if (instance_node == NULL) {
+ pe_rsc_trace(instance,
+ "%s is not a compatible instance (not assigned to a node)",
+ instance->id);
+ return false;
+ }
+
+ if (instance_node->details != node->details) {
+ pe_rsc_trace(instance,
+ "%s is not a compatible instance (assigned to %s not %s)",
+ instance->id, pe__node_name(instance_node),
+ pe__node_name(node));
+ return false;
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Find an instance that matches a given resource by node and role
+ *
+ * \param[in] match_rsc Resource that instance must match (for logging only)
+ * \param[in] rsc Clone or bundle resource to check for matching instance
+ * \param[in] node Instance must match this node
+ * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
+ * \param[in] current If true, compare instance's original node and role,
+ * otherwise compare assigned next node and role
+ *
+ * \return \p rsc instance matching \p node and \p role if any, otherwise NULL
+ */
+static pe_resource_t *
+find_compatible_instance_on_node(const pe_resource_t *match_rsc,
+ const pe_resource_t *rsc,
+ const pe_node_t *node, enum rsc_role_e role,
+ bool current)
+{
+ GList *instances = NULL;
+
+ instances = get_instance_list(rsc);
+ for (GList *iter = instances; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+
+ if (pcmk__instance_matches(instance, node, role, current)) {
+ pe_rsc_trace(match_rsc, "Found %s %s instance %s compatible with %s on %s",
+ role == RSC_ROLE_UNKNOWN? "matching" : role2text(role),
+ rsc->id, instance->id, match_rsc->id,
+ pe__node_name(node));
+ free_instance_list(rsc, instances); // Only frees list, not contents
+ return instance;
+ }
+ }
+ free_instance_list(rsc, instances);
+
+ pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s",
+ ((role == RSC_ROLE_UNKNOWN)? "matching" : role2text(role)),
+ rsc->id, match_rsc->id, pe__node_name(node));
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Find a clone instance or bundle container compatible with a resource
+ *
+ * \param[in] match_rsc Resource that instance must match
+ * \param[in] rsc Clone or bundle resource to check for matching instance
+ * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
+ * \param[in] current If true, compare instance's original node and role,
+ * otherwise compare assigned next node and role
+ *
+ * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
+ * if any, otherwise NULL
+ */
+pe_resource_t *
+pcmk__find_compatible_instance(const pe_resource_t *match_rsc,
+ const pe_resource_t *rsc, enum rsc_role_e role,
+ bool current)
+{
+ pe_resource_t *instance = NULL;
+ GList *nodes = NULL;
+ const pe_node_t *node = match_rsc->fns->location(match_rsc, NULL, current);
+
+ // If match_rsc has a node, check only that node
+ if (node != NULL) {
+ return find_compatible_instance_on_node(match_rsc, rsc, node, role,
+ current);
+ }
+
+ // Otherwise check for an instance matching any of match_rsc's allowed nodes
+ nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
+ NULL);
+ for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
+ iter = iter->next) {
+ instance = find_compatible_instance_on_node(match_rsc, rsc,
+ (pe_node_t *) iter->data,
+ role, current);
+ }
+
+ if (instance == NULL) {
+ pe_rsc_debug(rsc, "No %s instance found compatible with %s",
+ rsc->id, match_rsc->id);
+ }
+ g_list_free(nodes);
+ return instance;
+}
+
+/*!
+ * \internal
+ * \brief Unassign an instance if mandatory ordering has no interleave match
+ *
+ * \param[in] first 'First' action in an ordering
+ * \param[in] then 'Then' action in an ordering
+ * \param[in,out] then_instance 'Then' instance that has no interleave match
+ * \param[in] type Group of enum pe_ordering flags to apply
+ * \param[in] current If true, "then" action is stopped or demoted
+ *
+ * \return true if \p then_instance was unassigned, otherwise false
+ */
+static bool
+unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then,
+ pe_resource_t *then_instance, uint32_t type, bool current)
+{
+ // Allow "then" instance to go down even without an interleave match
+ if (current) {
+ pe_rsc_trace(then->rsc,
+ "%s has no instance to order before stopping "
+ "or demoting %s",
+ first->rsc->id, then_instance->id);
+
+ /* If the "first" action must be runnable, but there is no "first"
+ * instance, the "then" instance must not be allowed to come up.
+ */
+ } else if (pcmk_any_flags_set(type, pe_order_runnable_left
+ |pe_order_implies_then)) {
+ pe_rsc_info(then->rsc,
+ "Inhibiting %s from being active "
+ "because there is no %s instance to interleave",
+ then_instance->id, first->rsc->id);
+ return pcmk__assign_resource(then_instance, NULL, true);
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Find first matching action for a clone instance or bundle container
+ *
+ * \param[in] action Action in an interleaved ordering
+ * \param[in] instance Clone instance or bundle container being interleaved
+ * \param[in] action_name Action to look for
+ * \param[in] node If not NULL, require action to be on this node
+ * \param[in] for_first If true, \p instance is the 'first' resource in the
+ * ordering, otherwise it is the 'then' resource
+ *
+ * \return First action for \p instance (or in some cases if \p instance is a
+ * bundle container, its containerized resource) that matches
+ * \p action_name and \p node if any, otherwise NULL
+ */
+static pe_action_t *
+find_instance_action(const pe_action_t *action, const pe_resource_t *instance,
+ const char *action_name, const pe_node_t *node,
+ bool for_first)
+{
+ const pe_resource_t *rsc = NULL;
+ pe_action_t *matching_action = NULL;
+
+ /* If instance is a bundle container, sometimes we should interleave the
+ * action for the container itself, and sometimes for the containerized
+ * resource.
+ *
+ * For example, given "start bundle A then bundle B", B likely requires the
+ * service inside A's container to be active, rather than just the
+ * container, so we should interleave the action for A's containerized
+ * resource. On the other hand, it's possible B's container itself requires
+ * something from A, so we should interleave the action for B's container.
+ *
+ * Essentially, for 'first', we should use the containerized resource for
+ * everything except stop, and for 'then', we should use the container for
+ * everything except promote and demote (which can only be performed on the
+ * containerized resource).
+ */
+ if ((for_first && !pcmk__str_any_of(action->task, CRMD_ACTION_STOP,
+ CRMD_ACTION_STOPPED, NULL))
+
+ || (!for_first && pcmk__str_any_of(action->task, CRMD_ACTION_PROMOTE,
+ CRMD_ACTION_PROMOTED,
+ CRMD_ACTION_DEMOTE,
+ CRMD_ACTION_DEMOTED, NULL))) {
+
+ rsc = pcmk__get_rsc_in_container(instance);
+ }
+ if (rsc == NULL) {
+ rsc = instance; // No containerized resource, use instance itself
+ } else {
+ node = NULL; // Containerized actions are on bundle-created guest
+ }
+
+ matching_action = find_first_action(rsc->actions, NULL, action_name, node);
+ if (matching_action != NULL) {
+ return matching_action;
+ }
+
+ if (pcmk_is_set(instance->flags, pe_rsc_orphan)
+ || pcmk__str_any_of(action_name, RSC_STOP, RSC_DEMOTE, NULL)) {
+ crm_trace("No %s action found for %s%s",
+ action_name,
+ pcmk_is_set(instance->flags, pe_rsc_orphan)? "orphan " : "",
+ instance->id);
+ } else {
+ crm_err("No %s action found for %s to interleave (bug?)",
+ action_name, instance->id);
+ }
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Get the original action name of a bundle or clone action
+ *
+ * Given an action for a bundle or clone, get the original action name,
+ * mapping notify to the action being notified, and if the instances are
+ * primitives, mapping completion actions to the action that was completed
+ * (for example, stopped to stop).
+ *
+ * \param[in] action Clone or bundle action to check
+ *
+ * \return Original action name for \p action
+ */
+static const char *
+orig_action_name(const pe_action_t *action)
+{
+ const pe_resource_t *instance = action->rsc->children->data; // Any instance
+ char *action_type = NULL;
+ const char *action_name = action->task;
+ enum action_tasks orig_task = no_action;
+
+ if (pcmk__strcase_any_of(action->task, CRMD_ACTION_NOTIFY,
+ CRMD_ACTION_NOTIFIED, NULL)) {
+ // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
+ CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
+ return task2text(no_action));
+ action_name = strstr(action_type, "_notify_");
+ CRM_CHECK(action_name != NULL, return task2text(no_action));
+ action_name += strlen("_notify_");
+ }
+ orig_task = get_complex_task(instance, action_name);
+ free(action_type);
+ return task2text(orig_task);
+}
+
+/*!
+ * \internal
+ * \brief Update two interleaved actions according to an ordering between them
+ *
+ * Given information about an ordering of two interleaved actions, update the
+ * actions' flags (and runnable_before members if appropriate) as appropriate
+ * for the ordering. Effects may cascade to other orderings involving the
+ * actions as well.
+ *
+ * \param[in,out] first 'First' action in an ordering
+ * \param[in,out] then 'Then' action in an ordering
+ * \param[in] node If not NULL, limit scope of ordering to this node
+ * \param[in] filter Action flags to limit scope of certain updates (may
+ * include pe_action_optional to affect only mandatory
+ * actions, and pe_action_runnable to affect only
+ * runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+static uint32_t
+update_interleaved_actions(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t filter,
+ uint32_t type)
+{
+ GList *instances = NULL;
+ uint32_t changed = pcmk__updated_none;
+ const char *orig_first_task = orig_action_name(first);
+
+ // Stops and demotes must be interleaved with instance on current node
+ bool current = pcmk__ends_with(first->uuid, "_" CRMD_ACTION_STOPPED "_0")
+ || pcmk__ends_with(first->uuid,
+ "_" CRMD_ACTION_DEMOTED "_0");
+
+ // Update the specified actions for each "then" instance individually
+ instances = get_instance_list(then->rsc);
+ for (GList *iter = instances; iter != NULL; iter = iter->next) {
+ pe_resource_t *first_instance = NULL;
+ pe_resource_t *then_instance = iter->data;
+
+ pe_action_t *first_action = NULL;
+ pe_action_t *then_action = NULL;
+
+ // Find a "first" instance to interleave with this "then" instance
+ first_instance = pcmk__find_compatible_instance(then_instance,
+ first->rsc,
+ RSC_ROLE_UNKNOWN,
+ current);
+
+ if (first_instance == NULL) { // No instance can be interleaved
+ if (unassign_if_mandatory(first, then, then_instance, type,
+ current)) {
+ pcmk__set_updated_flags(changed, first, pcmk__updated_then);
+ }
+ continue;
+ }
+
+ first_action = find_instance_action(first, first_instance,
+ orig_first_task, node, true);
+ if (first_action == NULL) {
+ continue;
+ }
+
+ then_action = find_instance_action(then, then_instance, then->task,
+ node, false);
+ if (then_action == NULL) {
+ continue;
+ }
+
+ if (order_actions(first_action, then_action, type)) {
+ pcmk__set_updated_flags(changed, first,
+ pcmk__updated_first|pcmk__updated_then);
+ }
+
+ changed |= then_instance->cmds->update_ordered_actions(
+ first_action, then_action, node,
+ first_instance->cmds->action_flags(first_action, node), filter,
+ type, then->rsc->cluster);
+ }
+ free_instance_list(then->rsc, instances);
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Check whether two actions in an ordering can be interleaved
+ *
+ * \param[in] first 'First' action in the ordering
+ * \param[in] then 'Then' action in the ordering
+ *
+ * \return true if \p first and \p then can be interleaved, otherwise false
+ */
+static bool
+can_interleave_actions(const pe_action_t *first, const pe_action_t *then)
+{
+ bool interleave = false;
+ pe_resource_t *rsc = NULL;
+
+ if ((first->rsc == NULL) || (then->rsc == NULL)) {
+ crm_trace("Not interleaving %s with %s: not resource actions",
+ first->uuid, then->uuid);
+ return false;
+ }
+
+ if (first->rsc == then->rsc) {
+ crm_trace("Not interleaving %s with %s: same resource",
+ first->uuid, then->uuid);
+ return false;
+ }
+
+ if ((first->rsc->variant < pe_clone) || (then->rsc->variant < pe_clone)) {
+ crm_trace("Not interleaving %s with %s: not clones or bundles",
+ first->uuid, then->uuid);
+ return false;
+ }
+
+ if (pcmk__ends_with(then->uuid, "_stop_0")
+ || pcmk__ends_with(then->uuid, "_demote_0")) {
+ rsc = first->rsc;
+ } else {
+ rsc = then->rsc;
+ }
+
+ interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
+ XML_RSC_ATTR_INTERLEAVE));
+ pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
+ first->uuid, then->uuid, (interleave? "" : "not "), rsc->id);
+ return interleave;
+}
+
+/*!
+ * \internal
+ * \brief Update non-interleaved instance actions according to an ordering
+ *
+ * Given information about an ordering of two non-interleaved actions, update
+ * the actions' flags (and runnable_before members if appropriate) as
+ * appropriate for the ordering. Effects may cascade to other orderings
+ * involving the actions as well.
+ *
+ * \param[in,out] instance Clone instance or bundle container
+ * \param[in,out] first "First" action in ordering
+ * \param[in] then "Then" action in ordering (for \p instance's parent)
+ * \param[in] node If not NULL, limit scope of ordering to this node
+ * \param[in] flags Action flags for \p first for ordering purposes
+ * \param[in] filter Action flags to limit scope of certain updates (may
+ * include pe_action_optional to affect only mandatory
+ * actions, and pe_action_runnable to affect only
+ * runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+static uint32_t
+update_noninterleaved_actions(pe_resource_t *instance, pe_action_t *first,
+ const pe_action_t *then, const pe_node_t *node,
+ uint32_t flags, uint32_t filter, uint32_t type)
+{
+ pe_action_t *instance_action = NULL;
+ uint32_t instance_flags = 0;
+ uint32_t changed = pcmk__updated_none;
+
+ // Check whether instance has an equivalent of "then" action
+ instance_action = find_first_action(instance->actions, NULL, then->task,
+ node);
+ if (instance_action == NULL) {
+ return changed;
+ }
+
+ // Check whether action is runnable
+ instance_flags = instance->cmds->action_flags(instance_action, node);
+ if (!pcmk_is_set(instance_flags, pe_action_runnable)) {
+ return changed;
+ }
+
+ // If so, update actions for the instance
+ changed = instance->cmds->update_ordered_actions(first, instance_action,
+ node, flags, filter, type,
+ instance->cluster);
+
+ // Propagate any changes to later actions
+ if (pcmk_is_set(changed, pcmk__updated_then)) {
+ for (GList *after_iter = instance_action->actions_after;
+ after_iter != NULL; after_iter = after_iter->next) {
+ pe_action_wrapper_t *after = after_iter->data;
+
+ pcmk__update_action_for_orderings(after->action, instance->cluster);
+ }
+ }
+
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Update two actions according to an ordering between them
+ *
+ * Given information about an ordering of two clone or bundle actions, update
+ * the actions' flags (and runnable_before members if appropriate) as
+ * appropriate for the ordering. Effects may cascade to other orderings
+ * involving the actions as well.
+ *
+ * \param[in,out] first 'First' action in an ordering
+ * \param[in,out] then 'Then' action in an ordering
+ * \param[in] node If not NULL, limit scope of ordering to this node
+ * (only used when interleaving instances)
+ * \param[in] flags Action flags for \p first for ordering purposes
+ * \param[in] filter Action flags to limit scope of certain updates (may
+ * include pe_action_optional to affect only mandatory
+ * actions, and pe_action_runnable to affect only
+ * runnable actions)
+ * \param[in] type Group of enum pe_ordering flags to apply
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Group of enum pcmk__updated flags indicating what was updated
+ */
+uint32_t
+pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ const pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set)
+{
+ if (then->rsc == NULL) {
+ return pcmk__updated_none;
+
+ } else if (can_interleave_actions(first, then)) {
+ return update_interleaved_actions(first, then, node, filter, type);
+
+ } else {
+ uint32_t changed = pcmk__updated_none;
+ GList *instances = get_instance_list(then->rsc);
+
+ // Update actions for the clone or bundle resource itself
+ changed |= pcmk__update_ordered_actions(first, then, node, flags,
+ filter, type, data_set);
+
+ // Update the 'then' clone instances or bundle containers individually
+ for (GList *iter = instances; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = iter->data;
+
+ changed |= update_noninterleaved_actions(instance, first, then,
+ node, flags, filter, type);
+ }
+ free_instance_list(then->rsc, instances);
+ return changed;
+ }
+}
+
+#define pe__clear_action_summary_flags(flags, action, flag) do { \
+ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Action summary", action->rsc->id, \
+ flags, flag, #flag); \
+ } while (0)
+
+/*!
+ * \internal
+ * \brief Return action flags for a given clone or bundle action
+ *
+ * \param[in,out] action Action for a clone or bundle
+ * \param[in] instances Clone instances or bundle containers
+ * \param[in] node If not NULL, limit effects to this node
+ *
+ * \return Flags appropriate to \p action on \p node
+ */
+enum pe_action_flags
+pcmk__collective_action_flags(pe_action_t *action, const GList *instances,
+ const pe_node_t *node)
+{
+ bool any_runnable = false;
+ enum pe_action_flags flags;
+ const char *action_name = orig_action_name(action);
+
+ // Set original assumptions (optional and runnable may be cleared below)
+ flags = pe_action_optional|pe_action_runnable|pe_action_pseudo;
+
+ for (const GList *iter = instances; iter != NULL; iter = iter->next) {
+ const pe_resource_t *instance = iter->data;
+ const pe_node_t *instance_node = NULL;
+ pe_action_t *instance_action = NULL;
+ enum pe_action_flags instance_flags;
+
+ // Node is relevant only to primitive instances
+ if (instance->variant == pe_native) {
+ instance_node = node;
+ }
+
+ instance_action = find_first_action(instance->actions, NULL,
+ action_name, instance_node);
+ if (instance_action == NULL) {
+ pe_rsc_trace(action->rsc, "%s has no %s action on %s",
+ instance->id, action_name, pe__node_name(node));
+ continue;
+ }
+
+ pe_rsc_trace(action->rsc, "%s has %s for %s on %s",
+ instance->id, instance_action->uuid, action_name,
+ pe__node_name(node));
+
+ instance_flags = instance->cmds->action_flags(instance_action, node);
+
+ // If any instance action is mandatory, so is the collective action
+ if (pcmk_is_set(flags, pe_action_optional)
+ && !pcmk_is_set(instance_flags, pe_action_optional)) {
+ pe_rsc_trace(instance, "%s is mandatory because %s is",
+ action->uuid, instance_action->uuid);
+ pe__clear_action_summary_flags(flags, action, pe_action_optional);
+ pe__clear_action_flags(action, pe_action_optional);
+ }
+
+ // If any instance action is runnable, so is the collective action
+ if (pcmk_is_set(instance_flags, pe_action_runnable)) {
+ any_runnable = true;
+ }
+ }
+
+ if (!any_runnable) {
+ pe_rsc_trace(action->rsc,
+ "%s is not runnable because no instance can run %s",
+ action->uuid, action_name);
+ pe__clear_action_summary_flags(flags, action, pe_action_runnable);
+ if (node == NULL) {
+ pe__clear_action_flags(action, pe_action_runnable);
+ }
+ }
+
+ return flags;
+}
+
+/*!
+ * \internal
+ * \brief Add a collective resource's colocations to a list for an instance
+ *
+ * \param[in,out] list Colocation list to add to
+ * \param[in] instance Clone or bundle instance or instance group member
+ * \param[in] collective Clone or bundle resource with colocations to add
+ * \param[in] with_this If true, add collective's "with this" colocations,
+ * otherwise add its "this with" colocations
+ */
+void
+pcmk__add_collective_constraints(GList **list, const pe_resource_t *instance,
+ const pe_resource_t *collective,
+ bool with_this)
+{
+ const GList *colocations = NULL;
+ bool everywhere = false;
+
+ CRM_CHECK((list != NULL) && (instance != NULL), return);
+
+ if (collective == NULL) {
+ return;
+ }
+ switch (collective->variant) {
+ case pe_clone:
+ case pe_container:
+ break;
+ default:
+ return;
+ }
+
+ everywhere = can_run_everywhere(collective);
+
+ if (with_this) {
+ colocations = collective->rsc_cons_lhs;
+ } else {
+ colocations = collective->rsc_cons;
+ }
+
+ for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
+ const pcmk__colocation_t *colocation = iter->data;
+
+ if (with_this
+ && !pcmk__colocation_has_influence(colocation, instance)) {
+ continue;
+ }
+ if (!everywhere || (colocation->score < 0)
+ || (!with_this && (colocation->score == INFINITY))) {
+
+ if (with_this) {
+ pcmk__add_with_this(list, colocation);
+ } else {
+ pcmk__add_this_with(list, colocation);
+ }
+ }
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c
new file mode 100644
index 0000000..b4ce4ff
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_location.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+static int
+get_node_score(const char *rule, const char *score, bool raw,
+ pe_node_t *node, pe_resource_t *rsc)
+{
+ int score_f = 0;
+
+ if (score == NULL) {
+ pe_err("Rule %s: no score specified. Assuming 0.", rule);
+
+ } else if (raw) {
+ score_f = char2score(score);
+
+ } else {
+ const char *attr_score = pe_node_attribute_calculated(node, score, rsc);
+
+ if (attr_score == NULL) {
+ crm_debug("Rule %s: %s did not have a value for %s",
+ rule, pe__node_name(node), score);
+ score_f = -INFINITY;
+
+ } else {
+ crm_debug("Rule %s: %s had value %s for %s",
+ rule, pe__node_name(node), attr_score, score);
+ score_f = char2score(attr_score);
+ }
+ }
+ return score_f;
+}
+
+static pe__location_t *
+generate_location_rule(pe_resource_t *rsc, xmlNode *rule_xml,
+ const char *discovery, crm_time_t *next_change,
+ pe_working_set_t *data_set,
+ pe_re_match_data_t *re_match_data)
+{
+ const char *rule_id = NULL;
+ const char *score = NULL;
+ const char *boolean = NULL;
+ const char *role = NULL;
+
+ GList *gIter = NULL;
+ GList *match_L = NULL;
+
+ bool do_and = true;
+ bool accept = true;
+ bool raw_score = true;
+ bool score_allocated = false;
+
+ pe__location_t *location_rule = NULL;
+
+ rule_xml = expand_idref(rule_xml, data_set->input);
+ if (rule_xml == NULL) {
+ return NULL;
+ }
+
+ rule_id = crm_element_value(rule_xml, XML_ATTR_ID);
+ boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP);
+ role = crm_element_value(rule_xml, XML_RULE_ATTR_ROLE);
+
+ crm_trace("Processing rule: %s", rule_id);
+
+ if ((role != NULL) && (text2role(role) == RSC_ROLE_UNKNOWN)) {
+ pe_err("Bad role specified for %s: %s", rule_id, role);
+ return NULL;
+ }
+
+ score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE);
+ if (score == NULL) {
+ score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE_ATTRIBUTE);
+ if (score != NULL) {
+ raw_score = false;
+ }
+ }
+ if (pcmk__str_eq(boolean, "or", pcmk__str_casei)) {
+ do_and = false;
+ }
+
+ location_rule = pcmk__new_location(rule_id, rsc, 0, discovery, NULL,
+ data_set);
+
+ if (location_rule == NULL) {
+ return NULL;
+ }
+
+ if ((re_match_data != NULL) && (re_match_data->nregs > 0)
+ && (re_match_data->pmatch[0].rm_so != -1) && !raw_score) {
+
+ char *result = pe_expand_re_matches(score, re_match_data);
+
+ if (result != NULL) {
+ score = result;
+ score_allocated = true;
+ }
+ }
+
+ if (role != NULL) {
+ crm_trace("Setting role filter: %s", role);
+ location_rule->role_filter = text2role(role);
+ if (location_rule->role_filter == RSC_ROLE_UNPROMOTED) {
+ /* Any promotable clone cannot be promoted without being in the
+ * unpromoted role first. Ergo, any constraint for the unpromoted
+ * role applies to every role.
+ */
+ location_rule->role_filter = RSC_ROLE_UNKNOWN;
+ }
+ }
+ if (do_and) {
+ GList *gIter = NULL;
+
+ match_L = pcmk__copy_node_list(data_set->nodes, true);
+ for (gIter = match_L; gIter != NULL; gIter = gIter->next) {
+ pe_node_t *node = (pe_node_t *) gIter->data;
+
+ node->weight = get_node_score(rule_id, score, raw_score, node, rsc);
+ }
+ }
+
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+ int score_f = 0;
+ pe_node_t *node = (pe_node_t *) gIter->data;
+ pe_match_data_t match_data = {
+ .re = re_match_data,
+ .params = pe_rsc_params(rsc, node, data_set),
+ .meta = rsc->meta,
+ };
+
+ accept = pe_test_rule(rule_xml, node->details->attrs, RSC_ROLE_UNKNOWN,
+ data_set->now, next_change, &match_data);
+
+ crm_trace("Rule %s %s on %s", ID(rule_xml), accept? "passed" : "failed",
+ pe__node_name(node));
+
+ score_f = get_node_score(rule_id, score, raw_score, node, rsc);
+
+ if (accept) {
+ pe_node_t *local = pe_find_node_id(match_L, node->details->id);
+
+ if ((local == NULL) && do_and) {
+ continue;
+
+ } else if (local == NULL) {
+ local = pe__copy_node(node);
+ match_L = g_list_append(match_L, local);
+ }
+
+ if (!do_and) {
+ local->weight = pcmk__add_scores(local->weight, score_f);
+ }
+ crm_trace("%s has score %s after %s", pe__node_name(node),
+ pcmk_readable_score(local->weight), rule_id);
+
+ } else if (do_and && !accept) {
+ // Remove it
+ pe_node_t *delete = pe_find_node_id(match_L, node->details->id);
+
+ if (delete != NULL) {
+ match_L = g_list_remove(match_L, delete);
+ crm_trace("%s did not match", pe__node_name(node));
+ }
+ free(delete);
+ }
+ }
+
+ if (score_allocated) {
+ free((char *)score);
+ }
+
+ location_rule->node_list_rh = match_L;
+ if (location_rule->node_list_rh == NULL) {
+ crm_trace("No matching nodes for rule %s", rule_id);
+ return NULL;
+ }
+
+ crm_trace("%s: %d nodes matched",
+ rule_id, g_list_length(location_rule->node_list_rh));
+ return location_rule;
+}
+
+static void
+unpack_rsc_location(xmlNode *xml_obj, pe_resource_t *rsc, const char *role,
+ const char *score, pe_working_set_t *data_set,
+ pe_re_match_data_t *re_match_data)
+{
+ pe__location_t *location = NULL;
+ const char *rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE);
+ const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE);
+ const char *discovery = crm_element_value(xml_obj, XML_LOCATION_ATTR_DISCOVERY);
+
+ if (rsc == NULL) {
+ pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
+ "does not exist", id, rsc_id);
+ return;
+ }
+
+ if (score == NULL) {
+ score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
+ }
+
+ if ((node != NULL) && (score != NULL)) {
+ int score_i = char2score(score);
+ pe_node_t *match = pe_find_node(data_set->nodes, node);
+
+ if (!match) {
+ return;
+ }
+ location = pcmk__new_location(id, rsc, score_i, discovery, match,
+ data_set);
+
+ } else {
+ bool empty = true;
+ crm_time_t *next_change = crm_time_new_undefined();
+
+ /* This loop is logically parallel to pe_evaluate_rules(), except
+ * instead of checking whether any rule is active, we set up location
+ * constraints for each active rule.
+ */
+ for (xmlNode *rule_xml = first_named_child(xml_obj, XML_TAG_RULE);
+ rule_xml != NULL; rule_xml = crm_next_same_xml(rule_xml)) {
+ empty = false;
+ crm_trace("Unpacking %s/%s", id, ID(rule_xml));
+ generate_location_rule(rsc, rule_xml, discovery, next_change,
+ data_set, re_match_data);
+ }
+
+ if (empty) {
+ pcmk__config_err("Ignoring constraint '%s' because it contains "
+ "no rules", id);
+ }
+
+ /* If there is a point in the future when the evaluation of a rule will
+ * change, make sure the scheduler is re-run by that time.
+ */
+ if (crm_time_is_defined(next_change)) {
+ time_t t = (time_t) crm_time_get_seconds_since_epoch(next_change);
+
+ pe__update_recheck_time(t, data_set);
+ }
+ crm_time_free(next_change);
+ return;
+ }
+
+ if (role == NULL) {
+ role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE);
+ }
+
+ if ((location != NULL) && (role != NULL)) {
+ if (text2role(role) == RSC_ROLE_UNKNOWN) {
+ pe_err("Invalid constraint %s: Bad role %s", id, role);
+ return;
+
+ } else {
+ enum rsc_role_e r = text2role(role);
+ switch(r) {
+ case RSC_ROLE_UNKNOWN:
+ case RSC_ROLE_STARTED:
+ case RSC_ROLE_UNPROMOTED:
+ /* Applies to all */
+ location->role_filter = RSC_ROLE_UNKNOWN;
+ break;
+ default:
+ location->role_filter = r;
+ break;
+ }
+ }
+ }
+}
+
+static void
+unpack_simple_location(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE);
+
+ if (value) {
+ pe_resource_t *rsc;
+
+ rsc = pcmk__find_constraint_resource(data_set->resources, value);
+ unpack_rsc_location(xml_obj, rsc, NULL, NULL, data_set, NULL);
+ }
+
+ value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE_PATTERN);
+ if (value) {
+ regex_t *r_patt = calloc(1, sizeof(regex_t));
+ bool invert = false;
+ GList *rIter = NULL;
+
+ if (value[0] == '!') {
+ value++;
+ invert = true;
+ }
+
+ if (regcomp(r_patt, value, REG_EXTENDED) != 0) {
+ pcmk__config_err("Ignoring constraint '%s' because "
+ XML_LOC_ATTR_SOURCE_PATTERN
+ " has invalid value '%s'", id, value);
+ free(r_patt);
+ return;
+ }
+
+ for (rIter = data_set->resources; rIter; rIter = rIter->next) {
+ pe_resource_t *r = rIter->data;
+ int nregs = 0;
+ regmatch_t *pmatch = NULL;
+ int status;
+
+ if(r_patt->re_nsub > 0) {
+ nregs = r_patt->re_nsub + 1;
+ } else {
+ nregs = 1;
+ }
+ pmatch = calloc(nregs, sizeof(regmatch_t));
+
+ status = regexec(r_patt, r->id, nregs, pmatch, 0);
+
+ if (!invert && (status == 0)) {
+ pe_re_match_data_t re_match_data = {
+ .string = r->id,
+ .nregs = nregs,
+ .pmatch = pmatch
+ };
+
+ crm_debug("'%s' matched '%s' for %s", r->id, value, id);
+ unpack_rsc_location(xml_obj, r, NULL, NULL, data_set,
+ &re_match_data);
+
+ } else if (invert && (status != 0)) {
+ crm_debug("'%s' is an inverted match of '%s' for %s",
+ r->id, value, id);
+ unpack_rsc_location(xml_obj, r, NULL, NULL, data_set, NULL);
+
+ } else {
+ crm_trace("'%s' does not match '%s' for %s", r->id, value, id);
+ }
+
+ free(pmatch);
+ }
+
+ regfree(r_patt);
+ free(r_patt);
+ }
+}
+
+// \return Standard Pacemaker return code
+static int
+unpack_location_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
+ pe_working_set_t *data_set)
+{
+ const char *id = NULL;
+ const char *rsc_id = NULL;
+ const char *state = NULL;
+ pe_resource_t *rsc = NULL;
+ pe_tag_t *tag = NULL;
+ xmlNode *rsc_set = NULL;
+
+ *expanded_xml = NULL;
+
+ CRM_CHECK(xml_obj != NULL, return EINVAL);
+
+ id = ID(xml_obj);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return pcmk_rc_unpack_error;
+ }
+
+ // Check whether there are any resource sets with template or tag references
+ *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
+ if (*expanded_xml != NULL) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_location");
+ return pcmk_rc_ok;
+ }
+
+ rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE);
+ if (rsc_id == NULL) {
+ return pcmk_rc_ok;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, rsc_id, &rsc, &tag)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", id, rsc_id);
+ return pcmk_rc_unpack_error;
+
+ } else if (rsc != NULL) {
+ // No template is referenced
+ return pcmk_rc_ok;
+ }
+
+ state = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE);
+
+ *expanded_xml = copy_xml(xml_obj);
+
+ // Convert template/tag reference in "rsc" into resource_set under constraint
+ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, XML_LOC_ATTR_SOURCE,
+ false, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (rsc_set != NULL) {
+ if (state != NULL) {
+ // Move "rsc-role" into converted resource_set as "role" attribute
+ crm_xml_add(rsc_set, "role", state);
+ xml_remove_prop(*expanded_xml, XML_RULE_ATTR_ROLE);
+ }
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_location");
+
+ } else {
+ // No sets
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ }
+
+ return pcmk_rc_ok;
+}
+
+// \return Standard Pacemaker return code
+static int
+unpack_location_set(xmlNode *location, xmlNode *set, pe_working_set_t *data_set)
+{
+ xmlNode *xml_rsc = NULL;
+ pe_resource_t *resource = NULL;
+ const char *set_id;
+ const char *role;
+ const char *local_score;
+
+ CRM_CHECK(set != NULL, return EINVAL);
+
+ set_id = ID(set);
+ if (set_id == NULL) {
+ pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET " without "
+ XML_ATTR_ID " in constraint '%s'",
+ pcmk__s(ID(location), "(missing ID)"));
+ return pcmk_rc_unpack_error;
+ }
+
+ role = crm_element_value(set, "role");
+ local_score = crm_element_value(set, XML_RULE_ATTR_SCORE);
+
+ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ resource = pcmk__find_constraint_resource(data_set->resources,
+ ID(xml_rsc));
+ if (resource == NULL) {
+ pcmk__config_err("%s: No resource found for %s",
+ set_id, ID(xml_rsc));
+ return pcmk_rc_unpack_error;
+ }
+
+ unpack_rsc_location(location, resource, role, local_score, data_set,
+ NULL);
+ }
+
+ return pcmk_rc_ok;
+}
+
+void
+pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ xmlNode *set = NULL;
+ bool any_sets = false;
+
+ xmlNode *orig_xml = NULL;
+ xmlNode *expanded_xml = NULL;
+
+ if (unpack_location_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) {
+ return;
+ }
+
+ if (expanded_xml) {
+ orig_xml = xml_obj;
+ xml_obj = expanded_xml;
+ }
+
+ for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL;
+ set = crm_next_same_xml(set)) {
+
+ any_sets = true;
+ set = expand_idref(set, data_set->input);
+ if ((set == NULL) // Configuration error, message already logged
+ || (unpack_location_set(xml_obj, set, data_set) != pcmk_rc_ok)) {
+
+ if (expanded_xml) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+ }
+
+ if (expanded_xml) {
+ free_xml(expanded_xml);
+ xml_obj = orig_xml;
+ }
+
+ if (!any_sets) {
+ unpack_simple_location(xml_obj, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add a new location constraint to a cluster working set
+ *
+ * \param[in] id XML ID of location constraint
+ * \param[in,out] rsc Resource in location constraint
+ * \param[in] node_weight Constraint score
+ * \param[in] discover_mode Resource discovery option for constraint
+ * \param[in] node Node in constraint (or NULL if rule-based)
+ * \param[in,out] data_set Cluster working set to add constraint to
+ *
+ * \return Newly allocated location constraint
+ * \note The result will be added to \p data_set and should not be freed
+ * separately.
+ */
+pe__location_t *
+pcmk__new_location(const char *id, pe_resource_t *rsc,
+ int node_weight, const char *discover_mode,
+ pe_node_t *node, pe_working_set_t *data_set)
+{
+ pe__location_t *new_con = NULL;
+
+ if (id == NULL) {
+ pe_err("Invalid constraint: no ID specified");
+ return NULL;
+
+ } else if (rsc == NULL) {
+ pe_err("Invalid constraint %s: no resource specified", id);
+ return NULL;
+
+ } else if (node == NULL) {
+ CRM_CHECK(node_weight == 0, return NULL);
+ }
+
+ new_con = calloc(1, sizeof(pe__location_t));
+ if (new_con != NULL) {
+ new_con->id = strdup(id);
+ new_con->rsc_lh = rsc;
+ new_con->node_list_rh = NULL;
+ new_con->role_filter = RSC_ROLE_UNKNOWN;
+
+ if (pcmk__str_eq(discover_mode, "always",
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ new_con->discover_mode = pe_discover_always;
+
+ } else if (pcmk__str_eq(discover_mode, "never", pcmk__str_casei)) {
+ new_con->discover_mode = pe_discover_never;
+
+ } else if (pcmk__str_eq(discover_mode, "exclusive", pcmk__str_casei)) {
+ new_con->discover_mode = pe_discover_exclusive;
+ rsc->exclusive_discover = TRUE;
+
+ } else {
+ pe_err("Invalid " XML_LOCATION_ATTR_DISCOVERY " value %s "
+ "in location constraint", discover_mode);
+ }
+
+ if (node != NULL) {
+ pe_node_t *copy = pe__copy_node(node);
+
+ copy->weight = node_weight;
+ new_con->node_list_rh = g_list_prepend(NULL, copy);
+ }
+
+ data_set->placement_constraints = g_list_prepend(data_set->placement_constraints,
+ new_con);
+ rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con);
+ }
+
+ return new_con;
+}
+
+/*!
+ * \internal
+ * \brief Apply all location constraints
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__apply_locations(pe_working_set_t *data_set)
+{
+ for (GList *iter = data_set->placement_constraints;
+ iter != NULL; iter = iter->next) {
+ pe__location_t *location = iter->data;
+
+ location->rsc_lh->cmds->apply_location(location->rsc_lh, location);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply a location constraint to a resource's allowed node scores
+ *
+ * \param[in,out] rsc Resource to apply constraint to
+ * \param[in,out] location Location constraint to apply
+ *
+ * \note This does not consider the resource's children, so the resource's
+ * apply_location() method should be used instead in most cases.
+ */
+void
+pcmk__apply_location(pe_resource_t *rsc, pe__location_t *location)
+{
+ bool need_role = false;
+
+ CRM_CHECK((rsc != NULL) && (location != NULL), return);
+
+ // If a role was specified, ensure constraint is applicable
+ need_role = (location->role_filter > RSC_ROLE_UNKNOWN);
+ if (need_role && (location->role_filter != rsc->next_role)) {
+ pe_rsc_trace(rsc,
+ "Not applying %s to %s because role will be %s not %s",
+ location->id, rsc->id, role2text(rsc->next_role),
+ role2text(location->role_filter));
+ return;
+ }
+
+ if (location->node_list_rh == NULL) {
+ pe_rsc_trace(rsc, "Not applying %s to %s because no nodes match",
+ location->id, rsc->id);
+ return;
+ }
+
+ pe_rsc_trace(rsc, "Applying %s%s%s to %s", location->id,
+ (need_role? " for role " : ""),
+ (need_role? role2text(location->role_filter) : ""), rsc->id);
+
+ for (GList *gIter = location->node_list_rh; gIter != NULL;
+ gIter = gIter->next) {
+
+ pe_node_t *node = (pe_node_t *) gIter->data;
+ pe_node_t *weighted_node = NULL;
+
+ weighted_node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes,
+ node->details->id);
+ if (weighted_node == NULL) {
+ pe_rsc_trace(rsc, "* = %d on %s",
+ node->weight, pe__node_name(node));
+ weighted_node = pe__copy_node(node);
+ g_hash_table_insert(rsc->allowed_nodes,
+ (gpointer) weighted_node->details->id,
+ weighted_node);
+ } else {
+ pe_rsc_trace(rsc, "* + %d on %s",
+ node->weight, pe__node_name(node));
+ weighted_node->weight = pcmk__add_scores(weighted_node->weight,
+ node->weight);
+ }
+
+ if (weighted_node->rsc_discover_mode < location->discover_mode) {
+ if (location->discover_mode == pe_discover_exclusive) {
+ rsc->exclusive_discover = TRUE;
+ }
+ /* exclusive > never > always... always is default */
+ weighted_node->rsc_discover_mode = location->discover_mode;
+ }
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_migration.c b/lib/pacemaker/pcmk_sched_migration.c
new file mode 100644
index 0000000..7e6ba8e
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_migration.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2004-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Add migration source and target meta-attributes to an action
+ *
+ * \param[in,out] action Action to add meta-attributes to
+ * \param[in] source Node to add as migration source
+ * \param[in] target Node to add as migration target
+ */
+static void
+add_migration_meta(pe_action_t *action, const pe_node_t *source,
+ const pe_node_t *target)
+{
+ add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_SOURCE,
+ source->details->uname);
+
+ add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_TARGET,
+ target->details->uname);
+}
+
+/*!
+ * \internal
+ * \brief Create internal migration actions for a migrateable resource
+ *
+ * \param[in,out] rsc Resource to create migration actions for
+ * \param[in] current Node that resource is originally active on
+ */
+void
+pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current)
+{
+ pe_action_t *migrate_to = NULL;
+ pe_action_t *migrate_from = NULL;
+ pe_action_t *start = NULL;
+ pe_action_t *stop = NULL;
+
+ pe_rsc_trace(rsc, "Creating actions to %smigrate %s from %s to %s",
+ ((rsc->partial_migration_target == NULL)? "" : "partially "),
+ rsc->id, pe__node_name(current),
+ pe__node_name(rsc->allocated_to));
+ start = start_action(rsc, rsc->allocated_to, TRUE);
+ stop = stop_action(rsc, current, TRUE);
+
+ if (rsc->partial_migration_target == NULL) {
+ migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0),
+ RSC_MIGRATE, current, TRUE, TRUE,
+ rsc->cluster);
+ }
+ migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
+ RSC_MIGRATED, rsc->allocated_to, TRUE, TRUE,
+ rsc->cluster);
+
+ if ((migrate_from != NULL)
+ && ((migrate_to != NULL) || (rsc->partial_migration_target != NULL))) {
+
+ pe__set_action_flags(start, pe_action_migrate_runnable);
+ pe__set_action_flags(stop, pe_action_migrate_runnable);
+
+ // This is easier than trying to delete it from the graph
+ pe__set_action_flags(start, pe_action_pseudo);
+
+ if (rsc->partial_migration_target == NULL) {
+ pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
+
+ if (migrate_to != NULL) {
+ pe__set_action_flags(migrate_to, pe_action_migrate_runnable);
+ migrate_to->needs = start->needs;
+ }
+
+ // Probe -> migrate_to -> migrate_from
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0),
+ NULL, pe_order_optional, rsc->cluster);
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
+ NULL,
+ pe_order_optional|pe_order_implies_first_migratable,
+ rsc->cluster);
+ } else {
+ pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
+ migrate_from->needs = start->needs;
+
+ // Probe -> migrate_from (migrate_to already completed)
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
+ NULL, pe_order_optional, rsc->cluster);
+ }
+
+ // migrate_from before stop or start
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
+ pe_order_optional|pe_order_implies_first_migratable,
+ rsc->cluster);
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
+ pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left,
+ rsc->cluster);
+ }
+
+ if (migrate_to != NULL) {
+ add_migration_meta(migrate_to, current, rsc->allocated_to);
+
+ if (!rsc->is_remote_node) {
+ /* migrate_to takes place on the source node, but can affect the
+ * target node depending on how the agent is written. Because of
+ * this, pending migrate_to actions must be recorded in the CIB,
+ * in case the source node loses membership while the migrate_to
+ * action is still in flight.
+ *
+ * However we know Pacemaker Remote connection resources don't
+ * require this, so we skip this for them. (Although it wouldn't
+ * hurt, and now that record-pending defaults to true, skipping it
+ * matters even less.)
+ */
+ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true");
+ }
+ }
+
+ if (migrate_from != NULL) {
+ add_migration_meta(migrate_from, current, rsc->allocated_to);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Abort a dangling migration by scheduling a stop (and possibly cleanup)
+ *
+ * \param[in] data Source node of dangling migration
+ * \param[in,out] user_data Resource involved in dangling migration
+ */
+void
+pcmk__abort_dangling_migration(void *data, void *user_data)
+{
+ const pe_node_t *dangling_source = (const pe_node_t *) data;
+ pe_resource_t *rsc = (pe_resource_t *) user_data;
+
+ pe_action_t *stop = NULL;
+ bool cleanup = pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop);
+
+ pe_rsc_trace(rsc,
+ "Scheduling stop%s for %s on %s due to dangling migration",
+ (cleanup? " and cleanup" : ""), rsc->id,
+ pe__node_name(dangling_source));
+ stop = stop_action(rsc, dangling_source, FALSE);
+ pe__set_action_flags(stop, pe_action_dangle);
+ if (cleanup) {
+ pcmk__schedule_cleanup(rsc, dangling_source, false);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource can migrate
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Resource's current node
+ *
+ * \return true if \p rsc can migrate, otherwise false
+ */
+bool
+pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current)
+{
+ CRM_CHECK(rsc != NULL, return false);
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)) {
+ pe_rsc_trace(rsc, "%s cannot migrate because "
+ "the configuration does not allow it",
+ rsc->id);
+ return false;
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc, "%s cannot migrate because it is not managed",
+ rsc->id);
+ return false;
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ pe_rsc_trace(rsc, "%s cannot migrate because it is failed",
+ rsc->id);
+ return false;
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
+ pe_rsc_trace(rsc, "%s cannot migrate because it has a start pending",
+ rsc->id);
+ return false;
+ }
+
+ if ((current == NULL) || current->details->unclean) {
+ pe_rsc_trace(rsc, "%s cannot migrate because "
+ "its current node (%s) is unclean",
+ rsc->id, pe__node_name(current));
+ return false;
+ }
+
+ if ((rsc->allocated_to == NULL) || rsc->allocated_to->details->unclean) {
+ pe_rsc_trace(rsc, "%s cannot migrate because "
+ "its next node (%s) is unclean",
+ rsc->id, pe__node_name(rsc->allocated_to));
+ return false;
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Get an action name from an action or operation key
+ *
+ * \param[in] action If not NULL, get action name from here
+ * \param[in] key If not NULL, get action name from here
+ *
+ * \return Newly allocated copy of action name (or NULL if none available)
+ */
+static char *
+task_from_action_or_key(const pe_action_t *action, const char *key)
+{
+ char *res = NULL;
+
+ if (action != NULL) {
+ res = strdup(action->task);
+ CRM_ASSERT(res != NULL);
+ } else if (key != NULL) {
+ parse_op_key(key, NULL, &res, NULL);
+ }
+ return res;
+}
+
+/*!
+ * \internal
+ * \brief Order migration actions equivalent to a given ordering
+ *
+ * Orderings involving start, stop, demote, and promote actions must be honored
+ * during a migration as well, so duplicate any such ordering for the
+ * corresponding migration actions.
+ *
+ * \param[in,out] order Ordering constraint to check
+ */
+void
+pcmk__order_migration_equivalents(pe__ordering_t *order)
+{
+ char *first_task = NULL;
+ char *then_task = NULL;
+ bool then_migratable;
+ bool first_migratable;
+
+ // Only orderings between unrelated resources are relevant
+ if ((order->lh_rsc == NULL) || (order->rh_rsc == NULL)
+ || (order->lh_rsc == order->rh_rsc)
+ || is_parent(order->lh_rsc, order->rh_rsc)
+ || is_parent(order->rh_rsc, order->lh_rsc)) {
+ return;
+ }
+
+ // Only orderings involving at least one migratable resource are relevant
+ first_migratable = pcmk_is_set(order->lh_rsc->flags, pe_rsc_allow_migrate);
+ then_migratable = pcmk_is_set(order->rh_rsc->flags, pe_rsc_allow_migrate);
+ if (!first_migratable && !then_migratable) {
+ return;
+ }
+
+ // Check which actions are involved
+ first_task = task_from_action_or_key(order->lh_action,
+ order->lh_action_task);
+ then_task = task_from_action_or_key(order->rh_action,
+ order->rh_action_task);
+
+ if (pcmk__str_eq(first_task, RSC_START, pcmk__str_none)
+ && pcmk__str_eq(then_task, RSC_START, pcmk__str_none)) {
+
+ uint32_t flags = pe_order_optional;
+
+ if (first_migratable && then_migratable) {
+ /* A start then B start
+ * -> A migrate_from then B migrate_to */
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_MIGRATED, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0),
+ NULL, flags, order->lh_rsc->cluster);
+ }
+
+ if (then_migratable) {
+ if (first_migratable) {
+ pe__set_order_flags(flags, pe_order_apply_first_non_migratable);
+ }
+
+ /* A start then B start
+ * -> A start then B migrate_to (if start is not part of a
+ * migration)
+ */
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_START, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0),
+ NULL, flags, order->lh_rsc->cluster);
+ }
+
+ } else if (then_migratable
+ && pcmk__str_eq(first_task, RSC_STOP, pcmk__str_none)
+ && pcmk__str_eq(then_task, RSC_STOP, pcmk__str_none)) {
+
+ uint32_t flags = pe_order_optional;
+
+ if (first_migratable) {
+ pe__set_order_flags(flags, pe_order_apply_first_non_migratable);
+ }
+
+ /* For an ordering "stop A then stop B", if A is moving via restart, and
+ * B is migrating, enforce that B's migrate_to occurs after A's stop.
+ */
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0), NULL,
+ order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0),
+ NULL, flags, order->lh_rsc->cluster);
+
+ // Also order B's migrate_from after A's stop during partial migrations
+ if (order->rh_rsc->partial_migration_target) {
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0),
+ NULL, flags, order->lh_rsc->cluster);
+ }
+
+ } else if (pcmk__str_eq(first_task, RSC_PROMOTE, pcmk__str_none)
+ && pcmk__str_eq(then_task, RSC_START, pcmk__str_none)) {
+
+ uint32_t flags = pe_order_optional;
+
+ if (then_migratable) {
+ /* A promote then B start
+ * -> A promote then B migrate_to */
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_PROMOTE, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0),
+ NULL, flags, order->lh_rsc->cluster);
+ }
+
+ } else if (pcmk__str_eq(first_task, RSC_DEMOTE, pcmk__str_none)
+ && pcmk__str_eq(then_task, RSC_STOP, pcmk__str_none)) {
+
+ uint32_t flags = pe_order_optional;
+
+ if (then_migratable) {
+ /* A demote then B stop
+ * -> A demote then B migrate_to */
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0),
+ NULL, flags, order->lh_rsc->cluster);
+
+ // Also order B migrate_from after A demote during partial migrations
+ if (order->rh_rsc->partial_migration_target) {
+ pcmk__new_ordering(order->lh_rsc,
+ pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0),
+ NULL, order->rh_rsc,
+ pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0),
+ NULL, flags, order->lh_rsc->cluster);
+ }
+ }
+ }
+
+ free(first_task);
+ free(then_task);
+}
diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c
new file mode 100644
index 0000000..d7d5ba4
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_nodes.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/msg_xml.h>
+#include <crm/lrmd.h> // lrmd_event_data_t
+#include <crm/common/xml_internal.h>
+#include <pacemaker-internal.h>
+#include <pacemaker.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Check whether a node is available to run resources
+ *
+ * \param[in] node Node to check
+ * \param[in] consider_score If true, consider a negative score unavailable
+ * \param[in] consider_guest If true, consider a guest node unavailable whose
+ * resource will not be active
+ *
+ * \return true if node is online and not shutting down, unclean, or in standby
+ * or maintenance mode, otherwise false
+ */
+bool
+pcmk__node_available(const pe_node_t *node, bool consider_score,
+ bool consider_guest)
+{
+ if ((node == NULL) || (node->details == NULL) || !node->details->online
+ || node->details->shutdown || node->details->unclean
+ || node->details->standby || node->details->maintenance) {
+ return false;
+ }
+
+ if (consider_score && (node->weight < 0)) {
+ return false;
+ }
+
+ // @TODO Go through all callers to see which should set consider_guest
+ if (consider_guest && pe__is_guest_node(node)) {
+ pe_resource_t *guest = node->details->remote_rsc->container;
+
+ if (guest->fns->location(guest, NULL, FALSE) == NULL) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Copy a hash table of node objects
+ *
+ * \param[in] nodes Hash table to copy
+ *
+ * \return New copy of nodes (or NULL if nodes is NULL)
+ */
+GHashTable *
+pcmk__copy_node_table(GHashTable *nodes)
+{
+ GHashTable *new_table = NULL;
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ if (nodes == NULL) {
+ return NULL;
+ }
+ new_table = pcmk__strkey_table(NULL, free);
+ g_hash_table_iter_init(&iter, nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ pe_node_t *new_node = pe__copy_node(node);
+
+ g_hash_table_insert(new_table, (gpointer) new_node->details->id,
+ new_node);
+ }
+ return new_table;
+}
+
+/*!
+ * \internal
+ * \brief Copy a list of node objects
+ *
+ * \param[in] list List to copy
+ * \param[in] reset Set copies' scores to 0
+ *
+ * \return New list of shallow copies of nodes in original list
+ */
+GList *
+pcmk__copy_node_list(const GList *list, bool reset)
+{
+ GList *result = NULL;
+
+ for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) {
+ pe_node_t *new_node = NULL;
+ pe_node_t *this_node = (pe_node_t *) gIter->data;
+
+ new_node = pe__copy_node(this_node);
+ if (reset) {
+ new_node->weight = 0;
+ }
+ result = g_list_prepend(result, new_node);
+ }
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Compare two nodes for allocation desirability
+ *
+ * Given two nodes, check which one is more preferred by allocation criteria
+ * such as node weight and utilization.
+ *
+ * \param[in] a First node to compare
+ * \param[in] b Second node to compare
+ * \param[in] data Node that resource being assigned is active on, if any
+ *
+ * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are
+ * equally preferred
+ */
+static gint
+compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const pe_node_t *node1 = (const pe_node_t *) a;
+ const pe_node_t *node2 = (const pe_node_t *) b;
+ const pe_node_t *active = (const pe_node_t *) data;
+
+ int node1_weight = 0;
+ int node2_weight = 0;
+
+ int result = 0;
+
+ if (a == NULL) {
+ return 1;
+ }
+ if (b == NULL) {
+ return -1;
+ }
+
+ // Compare node weights
+
+ node1_weight = pcmk__node_available(node1, false, false)? node1->weight : -INFINITY;
+ node2_weight = pcmk__node_available(node2, false, false)? node2->weight : -INFINITY;
+
+ if (node1_weight > node2_weight) {
+ crm_trace("%s (%d) > %s (%d) : weight",
+ pe__node_name(node1), node1_weight, pe__node_name(node2),
+ node2_weight);
+ return -1;
+ }
+
+ if (node1_weight < node2_weight) {
+ crm_trace("%s (%d) < %s (%d) : weight",
+ pe__node_name(node1), node1_weight, pe__node_name(node2),
+ node2_weight);
+ return 1;
+ }
+
+ crm_trace("%s (%d) == %s (%d) : weight",
+ pe__node_name(node1), node1_weight, pe__node_name(node2),
+ node2_weight);
+
+ // If appropriate, compare node utilization
+
+ if (pcmk__str_eq(node1->details->data_set->placement_strategy, "minimal",
+ pcmk__str_casei)) {
+ goto equal;
+ }
+
+ if (pcmk__str_eq(node1->details->data_set->placement_strategy, "balanced",
+ pcmk__str_casei)) {
+ result = pcmk__compare_node_capacities(node1, node2);
+ if (result < 0) {
+ crm_trace("%s > %s : capacity (%d)",
+ pe__node_name(node1), pe__node_name(node2), result);
+ return -1;
+ } else if (result > 0) {
+ crm_trace("%s < %s : capacity (%d)",
+ pe__node_name(node1), pe__node_name(node2), result);
+ return 1;
+ }
+ }
+
+ // Compare number of allocated resources
+
+ if (node1->details->num_resources < node2->details->num_resources) {
+ crm_trace("%s (%d) > %s (%d) : resources",
+ pe__node_name(node1), node1->details->num_resources,
+ pe__node_name(node2), node2->details->num_resources);
+ return -1;
+
+ } else if (node1->details->num_resources > node2->details->num_resources) {
+ crm_trace("%s (%d) < %s (%d) : resources",
+ pe__node_name(node1), node1->details->num_resources,
+ pe__node_name(node2), node2->details->num_resources);
+ return 1;
+ }
+
+ // Check whether one node is already running desired resource
+
+ if (active != NULL) {
+ if (active->details == node1->details) {
+ crm_trace("%s (%d) > %s (%d) : active",
+ pe__node_name(node1), node1->details->num_resources,
+ pe__node_name(node2), node2->details->num_resources);
+ return -1;
+ } else if (active->details == node2->details) {
+ crm_trace("%s (%d) < %s (%d) : active",
+ pe__node_name(node1), node1->details->num_resources,
+ pe__node_name(node2), node2->details->num_resources);
+ return 1;
+ }
+ }
+
+ // If all else is equal, prefer node with lowest-sorting name
+equal:
+ crm_trace("%s = %s", pe__node_name(node1), pe__node_name(node2));
+ return strcmp(node1->details->uname, node2->details->uname);
+}
+
+/*!
+ * \internal
+ * \brief Sort a list of nodes by allocation desirability
+ *
+ * \param[in,out] nodes Node list to sort
+ * \param[in] active_node Node where resource being assigned is active
+ *
+ * \return New head of sorted list
+ */
+GList *
+pcmk__sort_nodes(GList *nodes, pe_node_t *active_node)
+{
+ return g_list_sort_with_data(nodes, compare_nodes, active_node);
+}
+
+/*!
+ * \internal
+ * \brief Check whether any node is available to run resources
+ *
+ * \param[in] nodes Nodes to check
+ *
+ * \return true if any node in \p nodes is available to run resources,
+ * otherwise false
+ */
+bool
+pcmk__any_node_available(GHashTable *nodes)
+{
+ GHashTableIter iter;
+ const pe_node_t *node = NULL;
+
+ if (nodes == NULL) {
+ return false;
+ }
+ g_hash_table_iter_init(&iter, nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (pcmk__node_available(node, true, false)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Apply node health values for all nodes in cluster
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__apply_node_health(pe_working_set_t *data_set)
+{
+ int base_health = 0;
+ enum pcmk__health_strategy strategy;
+ const char *strategy_str = pe_pref(data_set->config_hash,
+ PCMK__OPT_NODE_HEALTH_STRATEGY);
+
+ strategy = pcmk__parse_health_strategy(strategy_str);
+ if (strategy == pcmk__health_strategy_none) {
+ return;
+ }
+ crm_info("Applying node health strategy '%s'", strategy_str);
+
+ // The progressive strategy can use a base health score
+ if (strategy == pcmk__health_strategy_progressive) {
+ base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, data_set);
+ }
+
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+ int health = pe__sum_node_health_scores(node, base_health);
+
+ // An overall health score of 0 has no effect
+ if (health == 0) {
+ continue;
+ }
+ crm_info("Overall system health of %s is %d",
+ pe__node_name(node), health);
+
+ // Use node health as a location score for each resource on the node
+ for (GList *r = data_set->resources; r != NULL; r = r->next) {
+ pe_resource_t *rsc = (pe_resource_t *) r->data;
+
+ bool constrain = true;
+
+ if (health < 0) {
+ /* Negative health scores do not apply to resources with
+ * allow-unhealthy-nodes=true.
+ */
+ constrain = !crm_is_true(g_hash_table_lookup(rsc->meta,
+ PCMK__META_ALLOW_UNHEALTHY_NODES));
+ }
+ if (constrain) {
+ pcmk__new_location(strategy_str, rsc, health, NULL, node,
+ data_set);
+ } else {
+ pe_rsc_trace(rsc, "%s is immune from health ban on %s",
+ rsc->id, pe__node_name(node));
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check for a node in a resource's parent's allowed nodes
+ *
+ * \param[in] rsc Resource whose parent should be checked
+ * \param[in] node Node to check for
+ *
+ * \return Equivalent of \p node from \p rsc's parent's allowed nodes if any,
+ * otherwise NULL
+ */
+pe_node_t *
+pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ GHashTable *allowed_nodes = NULL;
+
+ if ((rsc == NULL) || (node == NULL)) {
+ return NULL;
+ } else if (rsc->parent == NULL) {
+ allowed_nodes = rsc->allowed_nodes;
+ } else {
+ allowed_nodes = rsc->parent->allowed_nodes;
+ }
+ return pe_hash_table_lookup(allowed_nodes, node->details->id);
+}
diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c
new file mode 100644
index 0000000..6629999
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_ordering.c
@@ -0,0 +1,1463 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <inttypes.h> // PRIx32
+#include <stdbool.h>
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+enum pe_order_kind {
+ pe_order_kind_optional,
+ pe_order_kind_mandatory,
+ pe_order_kind_serialize,
+};
+
+enum ordering_symmetry {
+ ordering_asymmetric, // the only relation in an asymmetric ordering
+ ordering_symmetric, // the normal relation in a symmetric ordering
+ ordering_symmetric_inverse, // the inverse relation in a symmetric ordering
+};
+
+#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \
+ __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \
+ if (__rsc == NULL) { \
+ pcmk__config_err("%s: No resource found for %s", __set, __name); \
+ return pcmk_rc_unpack_error; \
+ } \
+ } while (0)
+
+static const char *
+invert_action(const char *action)
+{
+ if (pcmk__str_eq(action, RSC_START, pcmk__str_casei)) {
+ return RSC_STOP;
+
+ } else if (pcmk__str_eq(action, RSC_STOP, pcmk__str_casei)) {
+ return RSC_START;
+
+ } else if (pcmk__str_eq(action, RSC_PROMOTE, pcmk__str_casei)) {
+ return RSC_DEMOTE;
+
+ } else if (pcmk__str_eq(action, RSC_DEMOTE, pcmk__str_casei)) {
+ return RSC_PROMOTE;
+
+ } else if (pcmk__str_eq(action, RSC_PROMOTED, pcmk__str_casei)) {
+ return RSC_DEMOTED;
+
+ } else if (pcmk__str_eq(action, RSC_DEMOTED, pcmk__str_casei)) {
+ return RSC_PROMOTED;
+
+ } else if (pcmk__str_eq(action, RSC_STARTED, pcmk__str_casei)) {
+ return RSC_STOPPED;
+
+ } else if (pcmk__str_eq(action, RSC_STOPPED, pcmk__str_casei)) {
+ return RSC_STARTED;
+ }
+ crm_warn("Unknown action '%s' specified in order constraint", action);
+ return NULL;
+}
+
+static enum pe_order_kind
+get_ordering_type(const xmlNode *xml_obj)
+{
+ enum pe_order_kind kind_e = pe_order_kind_mandatory;
+ const char *kind = crm_element_value(xml_obj, XML_ORDER_ATTR_KIND);
+
+ if (kind == NULL) {
+ const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
+
+ kind_e = pe_order_kind_mandatory;
+
+ if (score) {
+ // @COMPAT deprecated informally since 1.0.7, formally since 2.0.1
+ int score_i = char2score(score);
+
+ if (score_i == 0) {
+ kind_e = pe_order_kind_optional;
+ }
+ pe_warn_once(pe_wo_order_score,
+ "Support for 'score' in rsc_order is deprecated "
+ "and will be removed in a future release "
+ "(use 'kind' instead)");
+ }
+
+ } else if (pcmk__str_eq(kind, "Mandatory", pcmk__str_casei)) {
+ kind_e = pe_order_kind_mandatory;
+
+ } else if (pcmk__str_eq(kind, "Optional", pcmk__str_casei)) {
+ kind_e = pe_order_kind_optional;
+
+ } else if (pcmk__str_eq(kind, "Serialize", pcmk__str_casei)) {
+ kind_e = pe_order_kind_serialize;
+
+ } else {
+ pcmk__config_err("Resetting '" XML_ORDER_ATTR_KIND "' for constraint "
+ "%s to 'Mandatory' because '%s' is not valid",
+ pcmk__s(ID(xml_obj), "missing ID"), kind);
+ }
+ return kind_e;
+}
+
+/*!
+ * \internal
+ * \brief Get ordering symmetry from XML
+ *
+ * \param[in] xml_obj Ordering XML
+ * \param[in] parent_kind Default ordering kind
+ * \param[in] parent_symmetrical_s Parent element's symmetrical setting, if any
+ *
+ * \retval ordering_symmetric Ordering is symmetric
+ * \retval ordering_asymmetric Ordering is asymmetric
+ */
+static enum ordering_symmetry
+get_ordering_symmetry(const xmlNode *xml_obj, enum pe_order_kind parent_kind,
+ const char *parent_symmetrical_s)
+{
+ int rc = pcmk_rc_ok;
+ bool symmetric = false;
+ enum pe_order_kind kind = parent_kind; // Default to parent's kind
+
+ // Check ordering XML for explicit kind
+ if ((crm_element_value(xml_obj, XML_ORDER_ATTR_KIND) != NULL)
+ || (crm_element_value(xml_obj, XML_RULE_ATTR_SCORE) != NULL)) {
+ kind = get_ordering_type(xml_obj);
+ }
+
+ // Check ordering XML (and parent) for explicit symmetrical setting
+ rc = pcmk__xe_get_bool_attr(xml_obj, XML_CONS_ATTR_SYMMETRICAL, &symmetric);
+
+ if (rc != pcmk_rc_ok && parent_symmetrical_s != NULL) {
+ symmetric = crm_is_true(parent_symmetrical_s);
+ rc = pcmk_rc_ok;
+ }
+
+ if (rc == pcmk_rc_ok) {
+ if (symmetric) {
+ if (kind == pe_order_kind_serialize) {
+ pcmk__config_warn("Ignoring " XML_CONS_ATTR_SYMMETRICAL
+ " for '%s' because not valid with "
+ XML_ORDER_ATTR_KIND " of 'Serialize'",
+ ID(xml_obj));
+ } else {
+ return ordering_symmetric;
+ }
+ }
+ return ordering_asymmetric;
+ }
+
+ // Use default symmetry
+ if (kind == pe_order_kind_serialize) {
+ return ordering_asymmetric;
+ }
+ return ordering_symmetric;
+}
+
+/*!
+ * \internal
+ * \brief Get ordering flags appropriate to ordering kind
+ *
+ * \param[in] kind Ordering kind
+ * \param[in] first Action name for 'first' action
+ * \param[in] symmetry This ordering's symmetry role
+ *
+ * \return Minimal ordering flags appropriate to \p kind
+ */
+static uint32_t
+ordering_flags_for_kind(enum pe_order_kind kind, const char *first,
+ enum ordering_symmetry symmetry)
+{
+ uint32_t flags = pe_order_none; // so we trace-log all flags set
+
+ pe__set_order_flags(flags, pe_order_optional);
+
+ switch (kind) {
+ case pe_order_kind_optional:
+ break;
+
+ case pe_order_kind_serialize:
+ pe__set_order_flags(flags, pe_order_serialize_only);
+ break;
+
+ case pe_order_kind_mandatory:
+ switch (symmetry) {
+ case ordering_asymmetric:
+ pe__set_order_flags(flags, pe_order_asymmetrical);
+ break;
+
+ case ordering_symmetric:
+ pe__set_order_flags(flags, pe_order_implies_then);
+ if (pcmk__strcase_any_of(first, RSC_START, RSC_PROMOTE,
+ NULL)) {
+ pe__set_order_flags(flags, pe_order_runnable_left);
+ }
+ break;
+
+ case ordering_symmetric_inverse:
+ pe__set_order_flags(flags, pe_order_implies_first);
+ break;
+ }
+ break;
+ }
+ return flags;
+}
+
+/*!
+ * \internal
+ * \brief Find resource corresponding to ID specified in ordering
+ *
+ * \param[in] xml Ordering XML
+ * \param[in] resource_attr XML attribute name for resource ID
+ * \param[in] instance_attr XML attribute name for instance number.
+ * This option is deprecated and will be removed in a
+ * future release.
+ * \param[in] data_set Cluster working set
+ *
+ * \return Resource corresponding to \p id, or NULL if none
+ */
+static pe_resource_t *
+get_ordering_resource(const xmlNode *xml, const char *resource_attr,
+ const char *instance_attr,
+ const pe_working_set_t *data_set)
+{
+ // @COMPAT: instance_attr and instance_id variables deprecated since 2.1.5
+ pe_resource_t *rsc = NULL;
+ const char *rsc_id = crm_element_value(xml, resource_attr);
+ const char *instance_id = crm_element_value(xml, instance_attr);
+
+ if (rsc_id == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' without %s",
+ ID(xml), resource_attr);
+ return NULL;
+ }
+
+ rsc = pcmk__find_constraint_resource(data_set->resources, rsc_id);
+ if (rsc == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "does not exist", ID(xml), rsc_id);
+ return NULL;
+ }
+
+ if (instance_id != NULL) {
+ pe_warn_once(pe_wo_order_inst,
+ "Support for " XML_ORDER_ATTR_FIRST_INSTANCE " and "
+ XML_ORDER_ATTR_THEN_INSTANCE " is deprecated and will be "
+ "removed in a future release.");
+
+ if (!pe_rsc_is_clone(rsc)) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "is not a clone but instance '%s' was requested",
+ ID(xml), rsc_id, instance_id);
+ return NULL;
+ }
+ rsc = find_clone_instance(rsc, instance_id);
+ if (rsc == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "does not have an instance '%s'",
+ "'%s'", ID(xml), rsc_id, instance_id);
+ return NULL;
+ }
+ }
+ return rsc;
+}
+
+/*!
+ * \internal
+ * \brief Determine minimum number of 'first' instances required in ordering
+ *
+ * \param[in] rsc 'First' resource in ordering
+ * \param[in] xml Ordering XML
+ *
+ * \return Minimum 'first' instances required (or 0 if not applicable)
+ */
+static int
+get_minimum_first_instances(const pe_resource_t *rsc, const xmlNode *xml)
+{
+ const char *clone_min = NULL;
+ bool require_all = false;
+
+ if (!pe_rsc_is_clone(rsc)) {
+ return 0;
+ }
+
+ clone_min = g_hash_table_lookup(rsc->meta,
+ XML_RSC_ATTR_INCARNATION_MIN);
+ if (clone_min != NULL) {
+ int clone_min_int = 0;
+
+ pcmk__scan_min_int(clone_min, &clone_min_int, 0);
+ return clone_min_int;
+ }
+
+ /* @COMPAT 1.1.13:
+ * require-all=false is deprecated equivalent of clone-min=1
+ */
+ if (pcmk__xe_get_bool_attr(xml, "require-all", &require_all) != ENODATA) {
+ pe_warn_once(pe_wo_require_all,
+ "Support for require-all in ordering constraints "
+ "is deprecated and will be removed in a future release"
+ " (use clone-min clone meta-attribute instead)");
+ if (!require_all) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Create orderings for a constraint with clone-min > 0
+ *
+ * \param[in] id Ordering ID
+ * \param[in,out] rsc_first 'First' resource in ordering (a clone)
+ * \param[in] action_first 'First' action in ordering
+ * \param[in] rsc_then 'Then' resource in ordering
+ * \param[in] action_then 'Then' action in ordering
+ * \param[in] flags Ordering flags
+ * \param[in] clone_min Minimum required instances of 'first'
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+clone_min_ordering(const char *id,
+ pe_resource_t *rsc_first, const char *action_first,
+ pe_resource_t *rsc_then, const char *action_then,
+ uint32_t flags, int clone_min, pe_working_set_t *data_set)
+{
+ // Create a pseudo-action for when the minimum instances are active
+ char *task = crm_strdup_printf(CRM_OP_RELAXED_CLONE ":%s", id);
+ pe_action_t *clone_min_met = get_pseudo_op(task, data_set);
+
+ free(task);
+
+ /* Require the pseudo-action to have the required number of actions to be
+ * considered runnable before allowing the pseudo-action to be runnable.
+ */
+ clone_min_met->required_runnable_before = clone_min;
+ pe__set_action_flags(clone_min_met, pe_action_requires_any);
+
+ // Order the actions for each clone instance before the pseudo-action
+ for (GList *rIter = rsc_first->children; rIter != NULL;
+ rIter = rIter->next) {
+
+ pe_resource_t *child = rIter->data;
+
+ pcmk__new_ordering(child, pcmk__op_key(child->id, action_first, 0),
+ NULL, NULL, NULL, clone_min_met,
+ pe_order_one_or_more|pe_order_implies_then_printed,
+ data_set);
+ }
+
+ // Order "then" action after the pseudo-action (if runnable)
+ pcmk__new_ordering(NULL, NULL, clone_min_met, rsc_then,
+ pcmk__op_key(rsc_then->id, action_then, 0),
+ NULL, flags|pe_order_runnable_left, data_set);
+}
+
+/*!
+ * \internal
+ * \brief Update ordering flags for restart-type=restart
+ *
+ * \param[in] rsc 'Then' resource in ordering
+ * \param[in] kind Ordering kind
+ * \param[in] flag Ordering flag to set (when applicable)
+ * \param[in,out] flags Ordering flag set to update
+ *
+ * \compat The restart-type resource meta-attribute is deprecated. Eventually,
+ * it will be removed, and pe_restart_ignore will be the only behavior,
+ * at which time this can just be removed entirely.
+ */
+#define handle_restart_type(rsc, kind, flag, flags) do { \
+ if (((kind) == pe_order_kind_optional) \
+ && ((rsc)->restart_type == pe_restart_restart)) { \
+ pe__set_order_flags((flags), (flag)); \
+ } \
+ } while (0)
+
+/*!
+ * \internal
+ * \brief Create new ordering for inverse of symmetric constraint
+ *
+ * \param[in] id Ordering ID (for logging only)
+ * \param[in] kind Ordering kind
+ * \param[in] rsc_first 'First' resource in ordering (a clone)
+ * \param[in] action_first 'First' action in ordering
+ * \param[in,out] rsc_then 'Then' resource in ordering
+ * \param[in] action_then 'Then' action in ordering
+ */
+static void
+inverse_ordering(const char *id, enum pe_order_kind kind,
+ pe_resource_t *rsc_first, const char *action_first,
+ pe_resource_t *rsc_then, const char *action_then)
+{
+ action_then = invert_action(action_then);
+ action_first = invert_action(action_first);
+ if ((action_then == NULL) || (action_first == NULL)) {
+ pcmk__config_warn("Cannot invert constraint '%s' "
+ "(please specify inverse manually)", id);
+ } else {
+ uint32_t flags = ordering_flags_for_kind(kind, action_first,
+ ordering_symmetric_inverse);
+
+ handle_restart_type(rsc_then, kind, pe_order_implies_first, flags);
+ pcmk__order_resource_actions(rsc_then, action_then, rsc_first,
+ action_first, flags);
+ }
+}
+
+static void
+unpack_simple_rsc_order(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ pe_resource_t *rsc_then = NULL;
+ pe_resource_t *rsc_first = NULL;
+ int min_required_before = 0;
+ enum pe_order_kind kind = pe_order_kind_mandatory;
+ uint32_t cons_weight = pe_order_none;
+ enum ordering_symmetry symmetry;
+
+ const char *action_then = NULL;
+ const char *action_first = NULL;
+ const char *id = NULL;
+
+ CRM_CHECK(xml_obj != NULL, return);
+
+ id = crm_element_value(xml_obj, XML_ATTR_ID);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return;
+ }
+
+ rsc_first = get_ordering_resource(xml_obj, XML_ORDER_ATTR_FIRST,
+ XML_ORDER_ATTR_FIRST_INSTANCE,
+ data_set);
+ if (rsc_first == NULL) {
+ return;
+ }
+
+ rsc_then = get_ordering_resource(xml_obj, XML_ORDER_ATTR_THEN,
+ XML_ORDER_ATTR_THEN_INSTANCE,
+ data_set);
+ if (rsc_then == NULL) {
+ return;
+ }
+
+ action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION);
+ if (action_first == NULL) {
+ action_first = RSC_START;
+ }
+
+ action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION);
+ if (action_then == NULL) {
+ action_then = action_first;
+ }
+
+ kind = get_ordering_type(xml_obj);
+
+ symmetry = get_ordering_symmetry(xml_obj, kind, NULL);
+ cons_weight = ordering_flags_for_kind(kind, action_first, symmetry);
+
+ handle_restart_type(rsc_then, kind, pe_order_implies_then, cons_weight);
+
+ /* If there is a minimum number of instances that must be runnable before
+ * the 'then' action is runnable, we use a pseudo-action for convenience:
+ * minimum number of clone instances have runnable actions ->
+ * pseudo-action is runnable -> dependency is runnable.
+ */
+ min_required_before = get_minimum_first_instances(rsc_first, xml_obj);
+ if (min_required_before > 0) {
+ clone_min_ordering(id, rsc_first, action_first, rsc_then, action_then,
+ cons_weight, min_required_before, data_set);
+ } else {
+ pcmk__order_resource_actions(rsc_first, action_first, rsc_then,
+ action_then, cons_weight);
+ }
+
+ if (symmetry == ordering_symmetric) {
+ inverse_ordering(id, kind, rsc_first, action_first,
+ rsc_then, action_then);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create a new ordering between two actions
+ *
+ * \param[in,out] first_rsc Resource for 'first' action (if NULL and
+ * \p first_action is a resource action, that
+ * resource will be used)
+ * \param[in,out] first_action_task Action key for 'first' action (if NULL and
+ * \p first_action is not NULL, its UUID will
+ * be used)
+ * \param[in,out] first_action 'first' action (if NULL, \p first_rsc and
+ * \p first_action_task must be set)
+ *
+ * \param[in] then_rsc Resource for 'then' action (if NULL and
+ * \p then_action is a resource action, that
+ * resource will be used)
+ * \param[in,out] then_action_task Action key for 'then' action (if NULL and
+ * \p then_action is not NULL, its UUID will
+ * be used)
+ * \param[in] then_action 'then' action (if NULL, \p then_rsc and
+ * \p then_action_task must be set)
+ *
+ * \param[in] flags Flag set of enum pe_ordering
+ * \param[in,out] data_set Cluster working set to add ordering to
+ *
+ * \note This function takes ownership of first_action_task and
+ * then_action_task, which do not need to be freed by the caller.
+ */
+void
+pcmk__new_ordering(pe_resource_t *first_rsc, char *first_action_task,
+ pe_action_t *first_action, pe_resource_t *then_rsc,
+ char *then_action_task, pe_action_t *then_action,
+ uint32_t flags, pe_working_set_t *data_set)
+{
+ pe__ordering_t *order = NULL;
+
+ // One of action or resource must be specified for each side
+ CRM_CHECK(((first_action != NULL) || (first_rsc != NULL))
+ && ((then_action != NULL) || (then_rsc != NULL)),
+ free(first_action_task); free(then_action_task); return);
+
+ if ((first_rsc == NULL) && (first_action != NULL)) {
+ first_rsc = first_action->rsc;
+ }
+ if ((then_rsc == NULL) && (then_action != NULL)) {
+ then_rsc = then_action->rsc;
+ }
+
+ order = calloc(1, sizeof(pe__ordering_t));
+ CRM_ASSERT(order != NULL);
+
+ order->id = data_set->order_id++;
+ order->flags = flags;
+ order->lh_rsc = first_rsc;
+ order->rh_rsc = then_rsc;
+ order->lh_action = first_action;
+ order->rh_action = then_action;
+ order->lh_action_task = first_action_task;
+ order->rh_action_task = then_action_task;
+
+ if ((order->lh_action_task == NULL) && (first_action != NULL)) {
+ order->lh_action_task = strdup(first_action->uuid);
+ }
+
+ if ((order->rh_action_task == NULL) && (then_action != NULL)) {
+ order->rh_action_task = strdup(then_action->uuid);
+ }
+
+ if ((order->lh_rsc == NULL) && (first_action != NULL)) {
+ order->lh_rsc = first_action->rsc;
+ }
+
+ if ((order->rh_rsc == NULL) && (then_action != NULL)) {
+ order->rh_rsc = then_action->rsc;
+ }
+
+ pe_rsc_trace(first_rsc, "Created ordering %d for %s then %s",
+ (data_set->order_id - 1),
+ pcmk__s(order->lh_action_task, "an underspecified action"),
+ pcmk__s(order->rh_action_task, "an underspecified action"));
+
+ data_set->ordering_constraints = g_list_prepend(data_set->ordering_constraints,
+ order);
+ pcmk__order_migration_equivalents(order);
+}
+
+/*!
+ * \brief Unpack a set in an ordering constraint
+ *
+ * \param[in] set Set XML to unpack
+ * \param[in] parent_kind rsc_order XML "kind" attribute
+ * \param[in] parent_symmetrical_s rsc_order XML "symmetrical" attribute
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+unpack_order_set(const xmlNode *set, enum pe_order_kind parent_kind,
+ const char *parent_symmetrical_s, pe_working_set_t *data_set)
+{
+ GList *set_iter = NULL;
+ GList *resources = NULL;
+
+ pe_resource_t *last = NULL;
+ pe_resource_t *resource = NULL;
+
+ int local_kind = parent_kind;
+ bool sequential = false;
+ uint32_t flags = pe_order_optional;
+ enum ordering_symmetry symmetry;
+
+ char *key = NULL;
+ const char *id = ID(set);
+ const char *action = crm_element_value(set, "action");
+ const char *sequential_s = crm_element_value(set, "sequential");
+ const char *kind_s = crm_element_value(set, XML_ORDER_ATTR_KIND);
+
+ if (action == NULL) {
+ action = RSC_START;
+ }
+
+ if (kind_s) {
+ local_kind = get_ordering_type(set);
+ }
+ if (sequential_s == NULL) {
+ sequential_s = "1";
+ }
+
+ sequential = crm_is_true(sequential_s);
+
+ symmetry = get_ordering_symmetry(set, parent_kind, parent_symmetrical_s);
+ flags = ordering_flags_for_kind(local_kind, action, symmetry);
+
+ for (const xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, resource, ID(xml_rsc));
+ resources = g_list_append(resources, resource);
+ }
+
+ if (pcmk__list_of_1(resources)) {
+ crm_trace("Single set: %s", id);
+ goto done;
+ }
+
+ set_iter = resources;
+ while (set_iter != NULL) {
+ resource = (pe_resource_t *) set_iter->data;
+ set_iter = set_iter->next;
+
+ key = pcmk__op_key(resource->id, action, 0);
+
+ if (local_kind == pe_order_kind_serialize) {
+ /* Serialize before everything that comes after */
+
+ for (GList *gIter = set_iter; gIter != NULL; gIter = gIter->next) {
+ pe_resource_t *then_rsc = (pe_resource_t *) gIter->data;
+ char *then_key = pcmk__op_key(then_rsc->id, action, 0);
+
+ pcmk__new_ordering(resource, strdup(key), NULL, then_rsc,
+ then_key, NULL, flags, data_set);
+ }
+
+ } else if (sequential) {
+ if (last != NULL) {
+ pcmk__order_resource_actions(last, action, resource, action,
+ flags);
+ }
+ last = resource;
+ }
+ free(key);
+ }
+
+ if (symmetry == ordering_asymmetric) {
+ goto done;
+ }
+
+ last = NULL;
+ action = invert_action(action);
+
+ flags = ordering_flags_for_kind(local_kind, action,
+ ordering_symmetric_inverse);
+
+ set_iter = resources;
+ while (set_iter != NULL) {
+ resource = (pe_resource_t *) set_iter->data;
+ set_iter = set_iter->next;
+
+ if (sequential) {
+ if (last != NULL) {
+ pcmk__order_resource_actions(resource, action, last, action,
+ flags);
+ }
+ last = resource;
+ }
+ }
+
+ done:
+ g_list_free(resources);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \brief Order two resource sets relative to each other
+ *
+ * \param[in] id Ordering ID (for logging)
+ * \param[in] set1 First listed set
+ * \param[in] set2 Second listed set
+ * \param[in] kind Ordering kind
+ * \param[in,out] data_set Cluster working set
+ * \param[in] symmetry Which ordering symmetry applies to this relation
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+order_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2,
+ enum pe_order_kind kind, pe_working_set_t *data_set,
+ enum ordering_symmetry symmetry)
+{
+
+ const xmlNode *xml_rsc = NULL;
+ const xmlNode *xml_rsc_2 = NULL;
+
+ pe_resource_t *rsc_1 = NULL;
+ pe_resource_t *rsc_2 = NULL;
+
+ const char *action_1 = crm_element_value(set1, "action");
+ const char *action_2 = crm_element_value(set2, "action");
+
+ uint32_t flags = pe_order_none;
+
+ bool require_all = true;
+
+ (void) pcmk__xe_get_bool_attr(set1, "require-all", &require_all);
+
+ if (action_1 == NULL) {
+ action_1 = RSC_START;
+ }
+
+ if (action_2 == NULL) {
+ action_2 = RSC_START;
+ }
+
+ if (symmetry == ordering_symmetric_inverse) {
+ action_1 = invert_action(action_1);
+ action_2 = invert_action(action_2);
+ }
+
+ if (pcmk__str_eq(RSC_STOP, action_1, pcmk__str_casei)
+ || pcmk__str_eq(RSC_DEMOTE, action_1, pcmk__str_casei)) {
+ /* Assuming: A -> ( B || C) -> D
+ * The one-or-more logic only applies during the start/promote phase.
+ * During shutdown neither B nor can shutdown until D is down, so simply
+ * turn require_all back on.
+ */
+ require_all = true;
+ }
+
+ flags = ordering_flags_for_kind(kind, action_1, symmetry);
+
+ /* If we have an unordered set1, whether it is sequential or not is
+ * irrelevant in regards to set2.
+ */
+ if (!require_all) {
+ char *task = crm_strdup_printf(CRM_OP_RELAXED_SET ":%s", ID(set1));
+ pe_action_t *unordered_action = get_pseudo_op(task, data_set);
+
+ free(task);
+ pe__set_action_flags(unordered_action, pe_action_requires_any);
+
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+
+ /* Add an ordering constraint between every element in set1 and the
+ * pseudo action. If any action in set1 is runnable the pseudo
+ * action will be runnable.
+ */
+ pcmk__new_ordering(rsc_1, pcmk__op_key(rsc_1->id, action_1, 0),
+ NULL, NULL, NULL, unordered_action,
+ pe_order_one_or_more|pe_order_implies_then_printed,
+ data_set);
+ }
+ for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2));
+
+ /* Add an ordering constraint between the pseudo-action and every
+ * element in set2. If the pseudo-action is runnable, every action
+ * in set2 will be runnable.
+ */
+ pcmk__new_ordering(NULL, NULL, unordered_action,
+ rsc_2, pcmk__op_key(rsc_2->id, action_2, 0),
+ NULL, flags|pe_order_runnable_left, data_set);
+ }
+
+ return pcmk_rc_ok;
+ }
+
+ if (pcmk__xe_attr_is_true(set1, "sequential")) {
+ if (symmetry == ordering_symmetric_inverse) {
+ // Get the first one
+ xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ if (xml_rsc != NULL) {
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+ }
+
+ } else {
+ // Get the last one
+ const char *rid = NULL;
+
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ rid = ID(xml_rsc);
+ }
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid);
+ }
+ }
+
+ if (pcmk__xe_attr_is_true(set2, "sequential")) {
+ if (symmetry == ordering_symmetric_inverse) {
+ // Get the last one
+ const char *rid = NULL;
+
+ for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ rid = ID(xml_rsc);
+ }
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid);
+
+ } else {
+ // Get the first one
+ xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ if (xml_rsc != NULL) {
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc));
+ }
+ }
+ }
+
+ if ((rsc_1 != NULL) && (rsc_2 != NULL)) {
+ pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags);
+
+ } else if (rsc_1 != NULL) {
+ for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc));
+ pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2,
+ flags);
+ }
+
+ } else if (rsc_2 != NULL) {
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+ pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2,
+ flags);
+ }
+
+ } else {
+ for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc));
+
+ for (xmlNode *xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF);
+ xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) {
+
+ EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2));
+ pcmk__order_resource_actions(rsc_1, action_1, rsc_2,
+ action_2, flags);
+ }
+ }
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief If an ordering constraint uses resource tags, expand them
+ *
+ * \param[in,out] xml_obj Ordering constraint XML
+ * \param[out] expanded_xml Equivalent XML with tags expanded
+ * \param[in] data_set Cluster working set
+ *
+ * \return Standard Pacemaker return code (specifically, pcmk_rc_ok on success,
+ * and pcmk_rc_unpack_error on invalid configuration)
+ */
+static int
+unpack_order_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
+ const pe_working_set_t *data_set)
+{
+ const char *id_first = NULL;
+ const char *id_then = NULL;
+ const char *action_first = NULL;
+ const char *action_then = NULL;
+
+ pe_resource_t *rsc_first = NULL;
+ pe_resource_t *rsc_then = NULL;
+ pe_tag_t *tag_first = NULL;
+ pe_tag_t *tag_then = NULL;
+
+ xmlNode *rsc_set_first = NULL;
+ xmlNode *rsc_set_then = NULL;
+ bool any_sets = false;
+
+ // Check whether there are any resource sets with template or tag references
+ *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
+ if (*expanded_xml != NULL) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_order");
+ return pcmk_rc_ok;
+ }
+
+ id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST);
+ id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN);
+ if ((id_first == NULL) || (id_then == NULL)) {
+ return pcmk_rc_ok;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, id_first, &rsc_first,
+ &tag_first)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", ID(xml_obj), id_first);
+ return pcmk_rc_unpack_error;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, id_then, &rsc_then, &tag_then)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", ID(xml_obj), id_then);
+ return pcmk_rc_unpack_error;
+ }
+
+ if ((rsc_first != NULL) && (rsc_then != NULL)) {
+ // Neither side references a template or tag
+ return pcmk_rc_ok;
+ }
+
+ action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION);
+ action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION);
+
+ *expanded_xml = copy_xml(xml_obj);
+
+ // Convert template/tag reference in "first" into resource_set under constraint
+ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_first, XML_ORDER_ATTR_FIRST,
+ true, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (rsc_set_first != NULL) {
+ if (action_first != NULL) {
+ // Move "first-action" into converted resource_set as "action"
+ crm_xml_add(rsc_set_first, "action", action_first);
+ xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_FIRST_ACTION);
+ }
+ any_sets = true;
+ }
+
+ // Convert template/tag reference in "then" into resource_set under constraint
+ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_then, XML_ORDER_ATTR_THEN,
+ true, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (rsc_set_then != NULL) {
+ if (action_then != NULL) {
+ // Move "then-action" into converted resource_set as "action"
+ crm_xml_add(rsc_set_then, "action", action_then);
+ xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_THEN_ACTION);
+ }
+ any_sets = true;
+ }
+
+ if (any_sets) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_order");
+ } else {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ }
+
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Unpack ordering constraint XML
+ *
+ * \param[in,out] xml_obj Ordering constraint XML to unpack
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ xmlNode *set = NULL;
+ xmlNode *last = NULL;
+
+ xmlNode *orig_xml = NULL;
+ xmlNode *expanded_xml = NULL;
+
+ const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
+ const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL);
+ enum pe_order_kind kind = get_ordering_type(xml_obj);
+
+ enum ordering_symmetry symmetry = get_ordering_symmetry(xml_obj, kind,
+ NULL);
+
+ // Expand any resource tags in the constraint XML
+ if (unpack_order_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) {
+ return;
+ }
+ if (expanded_xml != NULL) {
+ orig_xml = xml_obj;
+ xml_obj = expanded_xml;
+ }
+
+ // If the constraint has resource sets, unpack them
+ for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET);
+ set != NULL; set = crm_next_same_xml(set)) {
+
+ set = expand_idref(set, data_set->input);
+ if ((set == NULL) // Configuration error, message already logged
+ || (unpack_order_set(set, kind, invert, data_set) != pcmk_rc_ok)) {
+
+ if (expanded_xml != NULL) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+
+ if (last != NULL) {
+
+ if (order_rsc_sets(id, last, set, kind, data_set,
+ symmetry) != pcmk_rc_ok) {
+ if (expanded_xml != NULL) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+
+ if ((symmetry == ordering_symmetric)
+ && (order_rsc_sets(id, set, last, kind, data_set,
+ ordering_symmetric_inverse) != pcmk_rc_ok)) {
+ if (expanded_xml != NULL) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+
+ }
+ last = set;
+ }
+
+ if (expanded_xml) {
+ free_xml(expanded_xml);
+ xml_obj = orig_xml;
+ }
+
+ // If the constraint has no resource sets, unpack it as a simple ordering
+ if (last == NULL) {
+ return unpack_simple_rsc_order(xml_obj, data_set);
+ }
+}
+
+static bool
+ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input)
+{
+ /* Prevent user-defined ordering constraints between resources
+ * running in a guest node and the resource that defines that node.
+ */
+ if (!pcmk_is_set(input->type, pe_order_preserve)
+ && (input->action->rsc != NULL)
+ && pcmk__rsc_corresponds_to_guest(action->rsc, input->action->node)) {
+
+ crm_warn("Invalid ordering constraint between %s and %s",
+ input->action->rsc->id, action->rsc->id);
+ return true;
+ }
+
+ /* If there's an order like
+ * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1"
+ *
+ * then rscA is being migrated from node1 to node2, while rscB is being
+ * migrated from node2 to node1. If there would be a graph loop,
+ * break the order "load_stopped_node2" -> "rscA_migrate_to node1".
+ */
+ if ((input->type == pe_order_load) && action->rsc
+ && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)
+ && pcmk__graph_has_loop(action, action, input)) {
+ return true;
+ }
+
+ return false;
+}
+
+void
+pcmk__disable_invalid_orderings(pe_working_set_t *data_set)
+{
+ for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
+ pe_action_t *action = (pe_action_t *) iter->data;
+ pe_action_wrapper_t *input = NULL;
+
+ for (GList *input_iter = action->actions_before;
+ input_iter != NULL; input_iter = input_iter->next) {
+
+ input = (pe_action_wrapper_t *) input_iter->data;
+ if (ordering_is_invalid(action, input)) {
+ input->type = pe_order_none;
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order stops on a node before the node's shutdown
+ *
+ * \param[in,out] node Node being shut down
+ * \param[in] shutdown_op Shutdown action for node
+ */
+void
+pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op)
+{
+ for (GList *iter = node->details->data_set->actions;
+ iter != NULL; iter = iter->next) {
+
+ pe_action_t *action = (pe_action_t *) iter->data;
+
+ // Only stops on the node shutting down are relevant
+ if ((action->rsc == NULL) || (action->node == NULL)
+ || (action->node->details != node->details)
+ || !pcmk__str_eq(action->task, RSC_STOP, pcmk__str_casei)) {
+ continue;
+ }
+
+ // Resources and nodes in maintenance mode won't be touched
+
+ if (pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)) {
+ pe_rsc_trace(action->rsc,
+ "Not ordering %s before shutdown of %s because "
+ "resource in maintenance mode",
+ action->uuid, pe__node_name(node));
+ continue;
+
+ } else if (node->details->maintenance) {
+ pe_rsc_trace(action->rsc,
+ "Not ordering %s before shutdown of %s because "
+ "node in maintenance mode",
+ action->uuid, pe__node_name(node));
+ continue;
+ }
+
+ /* Don't touch a resource that is unmanaged or blocked, to avoid
+ * blocking the shutdown (though if another action depends on this one,
+ * we may still end up blocking)
+ */
+ if (!pcmk_any_flags_set(action->rsc->flags,
+ pe_rsc_managed|pe_rsc_block)) {
+ pe_rsc_trace(action->rsc,
+ "Not ordering %s before shutdown of %s because "
+ "resource is unmanaged or blocked",
+ action->uuid, pe__node_name(node));
+ continue;
+ }
+
+ pe_rsc_trace(action->rsc, "Ordering %s before shutdown of %s",
+ action->uuid, pe__node_name(node));
+ pe__clear_action_flags(action, pe_action_optional);
+ pcmk__new_ordering(action->rsc, NULL, action, NULL,
+ strdup(CRM_OP_SHUTDOWN), shutdown_op,
+ pe_order_optional|pe_order_runnable_left,
+ node->details->data_set);
+ }
+}
+
+/*!
+ * \brief Find resource actions matching directly or as child
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] original_key Action key to search for (possibly referencing
+ * parent of \rsc)
+ *
+ * \return Newly allocated list of matching actions
+ * \note It is the caller's responsibility to free the result with g_list_free()
+ */
+static GList *
+find_actions_by_task(const pe_resource_t *rsc, const char *original_key)
+{
+ // Search under given task key directly
+ GList *list = find_actions(rsc->actions, original_key, NULL);
+
+ if (list == NULL) {
+ // Search again using this resource's ID
+ char *key = NULL;
+ char *task = NULL;
+ guint interval_ms = 0;
+
+ if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
+ key = pcmk__op_key(rsc->id, task, interval_ms);
+ list = find_actions(rsc->actions, key, NULL);
+ free(key);
+ free(task);
+ } else {
+ crm_err("Invalid operation key (bug?): %s", original_key);
+ }
+ }
+ return list;
+}
+
+/*!
+ * \internal
+ * \brief Order relevant resource actions after a given action
+ *
+ * \param[in,out] first_action Action to order after (or NULL if none runnable)
+ * \param[in] rsc Resource whose actions should be ordered
+ * \param[in,out] order Ordering constraint being applied
+ */
+static void
+order_resource_actions_after(pe_action_t *first_action,
+ const pe_resource_t *rsc, pe__ordering_t *order)
+{
+ GList *then_actions = NULL;
+ uint32_t flags = pe_order_none;
+
+ CRM_CHECK((rsc != NULL) && (order != NULL), return);
+
+ flags = order->flags;
+ pe_rsc_trace(rsc, "Applying ordering %d for 'then' resource %s",
+ order->id, rsc->id);
+
+ if (order->rh_action != NULL) {
+ then_actions = g_list_prepend(NULL, order->rh_action);
+
+ } else {
+ then_actions = find_actions_by_task(rsc, order->rh_action_task);
+ }
+
+ if (then_actions == NULL) {
+ pe_rsc_trace(rsc, "Ignoring ordering %d: no %s actions found for %s",
+ order->id, order->rh_action_task, rsc->id);
+ return;
+ }
+
+ if ((first_action != NULL) && (first_action->rsc == rsc)
+ && pcmk_is_set(first_action->flags, pe_action_dangle)) {
+
+ pe_rsc_trace(rsc,
+ "Detected dangling migration ordering (%s then %s %s)",
+ first_action->uuid, order->rh_action_task, rsc->id);
+ pe__clear_order_flags(flags, pe_order_implies_then);
+ }
+
+ if ((first_action == NULL) && !pcmk_is_set(flags, pe_order_implies_then)) {
+ pe_rsc_debug(rsc,
+ "Ignoring ordering %d for %s: No first action found",
+ order->id, rsc->id);
+ g_list_free(then_actions);
+ return;
+ }
+
+ for (GList *iter = then_actions; iter != NULL; iter = iter->next) {
+ pe_action_t *then_action_iter = (pe_action_t *) iter->data;
+
+ if (first_action != NULL) {
+ order_actions(first_action, then_action_iter, flags);
+ } else {
+ pe__clear_action_flags(then_action_iter, pe_action_runnable);
+ crm_warn("%s of %s is unrunnable because there is no %s of %s "
+ "to order it after", then_action_iter->task, rsc->id,
+ order->lh_action_task, order->lh_rsc->id);
+ }
+ }
+
+ g_list_free(then_actions);
+}
+
+static void
+rsc_order_first(pe_resource_t *first_rsc, pe__ordering_t *order,
+ pe_working_set_t *data_set)
+{
+ GList *first_actions = NULL;
+ pe_action_t *first_action = order->lh_action;
+ pe_resource_t *then_rsc = order->rh_rsc;
+
+ CRM_ASSERT(first_rsc != NULL);
+ pe_rsc_trace(first_rsc, "Applying ordering constraint %d (first: %s)",
+ order->id, first_rsc->id);
+
+ if (first_action != NULL) {
+ first_actions = g_list_prepend(NULL, first_action);
+
+ } else {
+ first_actions = find_actions_by_task(first_rsc, order->lh_action_task);
+ }
+
+ if ((first_actions == NULL) && (first_rsc == then_rsc)) {
+ pe_rsc_trace(first_rsc,
+ "Ignoring constraint %d: first (%s for %s) not found",
+ order->id, order->lh_action_task, first_rsc->id);
+
+ } else if (first_actions == NULL) {
+ char *key = NULL;
+ char *op_type = NULL;
+ guint interval_ms = 0;
+
+ parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
+ key = pcmk__op_key(first_rsc->id, op_type, interval_ms);
+
+ if ((first_rsc->fns->state(first_rsc, TRUE) == RSC_ROLE_STOPPED)
+ && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) {
+ free(key);
+ pe_rsc_trace(first_rsc,
+ "Ignoring constraint %d: first (%s for %s) not found",
+ order->id, order->lh_action_task, first_rsc->id);
+
+ } else if ((first_rsc->fns->state(first_rsc, TRUE) == RSC_ROLE_UNPROMOTED)
+ && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) {
+ free(key);
+ pe_rsc_trace(first_rsc,
+ "Ignoring constraint %d: first (%s for %s) not found",
+ order->id, order->lh_action_task, first_rsc->id);
+
+ } else {
+ pe_rsc_trace(first_rsc,
+ "Creating first (%s for %s) for constraint %d ",
+ order->lh_action_task, first_rsc->id, order->id);
+ first_action = custom_action(first_rsc, key, op_type, NULL, TRUE,
+ TRUE, data_set);
+ first_actions = g_list_prepend(NULL, first_action);
+ }
+
+ free(op_type);
+ }
+
+ if (then_rsc == NULL) {
+ if (order->rh_action == NULL) {
+ pe_rsc_trace(first_rsc, "Ignoring constraint %d: then not found",
+ order->id);
+ return;
+ }
+ then_rsc = order->rh_action->rsc;
+ }
+ for (GList *gIter = first_actions; gIter != NULL; gIter = gIter->next) {
+ first_action = (pe_action_t *) gIter->data;
+
+ if (then_rsc == NULL) {
+ order_actions(first_action, order->rh_action, order->flags);
+
+ } else {
+ order_resource_actions_after(first_action, then_rsc, order);
+ }
+ }
+
+ g_list_free(first_actions);
+}
+
+void
+pcmk__apply_orderings(pe_working_set_t *data_set)
+{
+ crm_trace("Applying ordering constraints");
+
+ /* Ordering constraints need to be processed in the order they were created.
+ * rsc_order_first() and order_resource_actions_after() require the relevant
+ * actions to already exist in some cases, but rsc_order_first() will create
+ * the 'first' action in certain cases. Thus calling rsc_order_first() can
+ * change the behavior of later-created orderings.
+ *
+ * Also, g_list_append() should be avoided for performance reasons, so we
+ * prepend orderings when creating them and reverse the list here.
+ *
+ * @TODO This is brittle and should be carefully redesigned so that the
+ * order of creation doesn't matter, and the reverse becomes unneeded.
+ */
+ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
+
+ for (GList *gIter = data_set->ordering_constraints;
+ gIter != NULL; gIter = gIter->next) {
+
+ pe__ordering_t *order = gIter->data;
+ pe_resource_t *rsc = order->lh_rsc;
+
+ if (rsc != NULL) {
+ rsc_order_first(rsc, order, data_set);
+ continue;
+ }
+
+ rsc = order->rh_rsc;
+ if (rsc != NULL) {
+ order_resource_actions_after(order->lh_action, rsc, order);
+
+ } else {
+ crm_trace("Applying ordering constraint %d (non-resource actions)",
+ order->id);
+ order_actions(order->lh_action, order->rh_action, order->flags);
+ }
+ }
+
+ g_list_foreach(data_set->actions, (GFunc) pcmk__block_colocation_dependents,
+ data_set);
+
+ crm_trace("Ordering probes");
+ pcmk__order_probes(data_set);
+
+ crm_trace("Updating %d actions", g_list_length(data_set->actions));
+ g_list_foreach(data_set->actions,
+ (GFunc) pcmk__update_action_for_orderings, data_set);
+
+ pcmk__disable_invalid_orderings(data_set);
+}
+
+/*!
+ * \internal
+ * \brief Order a given action after each action in a given list
+ *
+ * \param[in,out] after "After" action
+ * \param[in,out] list List of "before" actions
+ */
+void
+pcmk__order_after_each(pe_action_t *after, GList *list)
+{
+ const char *after_desc = (after->task == NULL)? after->uuid : after->task;
+
+ for (GList *iter = list; iter != NULL; iter = iter->next) {
+ pe_action_t *before = (pe_action_t *) iter->data;
+ const char *before_desc = before->task? before->task : before->uuid;
+
+ crm_debug("Ordering %s on %s before %s on %s",
+ before_desc, pe__node_name(before->node),
+ after_desc, pe__node_name(after->node));
+ order_actions(before, after, pe_order_optional);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order promotions and demotions for restarts of a clone or bundle
+ *
+ * \param[in,out] rsc Clone or bundle to order
+ */
+void
+pcmk__promotable_restart_ordering(pe_resource_t *rsc)
+{
+ // Order start and promote after all instances are stopped
+ pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_PROMOTE,
+ pe_order_optional);
+
+ // Order stop, start, and promote after all instances are demoted
+ pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_START,
+ pe_order_optional);
+ pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE,
+ pe_order_optional);
+
+ // Order promote after all instances are started
+ pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
+ pe_order_optional);
+
+ // Order demote after all instances are demoted
+ pcmk__order_resource_actions(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED,
+ pe_order_optional);
+}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
new file mode 100644
index 0000000..aefbf9a
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -0,0 +1,1573 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
+static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
+static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
+static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
+ bool optional);
+static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
+ bool optional);
+
+static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
+ /* This array lists the immediate next role when transitioning from one role
+ * to a target role. For example, when going from Stopped to Promoted, the
+ * next role is Unpromoted, because the resource must be started before it
+ * can be promoted. The current state then becomes Started, which is fed
+ * into this array again, giving a next role of Promoted.
+ *
+ * Current role Immediate next role Final target role
+ * ------------ ------------------- -----------------
+ */
+ /* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */
+ RSC_ROLE_STOPPED, /* Stopped */
+ RSC_ROLE_STOPPED, /* Started */
+ RSC_ROLE_STOPPED, /* Unpromoted */
+ RSC_ROLE_STOPPED, /* Promoted */
+ },
+ /* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */
+ RSC_ROLE_STOPPED, /* Stopped */
+ RSC_ROLE_STARTED, /* Started */
+ RSC_ROLE_UNPROMOTED, /* Unpromoted */
+ RSC_ROLE_UNPROMOTED, /* Promoted */
+ },
+ /* Started */ { RSC_ROLE_STOPPED, /* Unknown */
+ RSC_ROLE_STOPPED, /* Stopped */
+ RSC_ROLE_STARTED, /* Started */
+ RSC_ROLE_UNPROMOTED, /* Unpromoted */
+ RSC_ROLE_PROMOTED, /* Promoted */
+ },
+ /* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */
+ RSC_ROLE_STOPPED, /* Stopped */
+ RSC_ROLE_STOPPED, /* Started */
+ RSC_ROLE_UNPROMOTED, /* Unpromoted */
+ RSC_ROLE_PROMOTED, /* Promoted */
+ },
+ /* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */
+ RSC_ROLE_UNPROMOTED, /* Stopped */
+ RSC_ROLE_UNPROMOTED, /* Started */
+ RSC_ROLE_UNPROMOTED, /* Unpromoted */
+ RSC_ROLE_PROMOTED, /* Promoted */
+ },
+};
+
+/*!
+ * \internal
+ * \brief Function to schedule actions needed for a role change
+ *
+ * \param[in,out] rsc Resource whose role is changing
+ * \param[in,out] node Node where resource will be in its next role
+ * \param[in] optional Whether scheduled actions should be optional
+ */
+typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
+ bool optional);
+
+static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
+ /* This array lists the function needed to transition directly from one role
+ * to another. NULL indicates that nothing is needed.
+ *
+ * Current role Transition function Next role
+ * ------------ ------------------- ----------
+ */
+ /* Unknown */ { assert_role_error, /* Unknown */
+ stop_resource, /* Stopped */
+ assert_role_error, /* Started */
+ assert_role_error, /* Unpromoted */
+ assert_role_error, /* Promoted */
+ },
+ /* Stopped */ { assert_role_error, /* Unknown */
+ NULL, /* Stopped */
+ start_resource, /* Started */
+ start_resource, /* Unpromoted */
+ assert_role_error, /* Promoted */
+ },
+ /* Started */ { assert_role_error, /* Unknown */
+ stop_resource, /* Stopped */
+ NULL, /* Started */
+ NULL, /* Unpromoted */
+ promote_resource, /* Promoted */
+ },
+ /* Unpromoted */ { assert_role_error, /* Unknown */
+ stop_resource, /* Stopped */
+ stop_resource, /* Started */
+ NULL, /* Unpromoted */
+ promote_resource, /* Promoted */
+ },
+ /* Promoted */ { assert_role_error, /* Unknown */
+ demote_resource, /* Stopped */
+ demote_resource, /* Started */
+ demote_resource, /* Unpromoted */
+ NULL, /* Promoted */
+ },
+};
+
+/*!
+ * \internal
+ * \brief Get a list of a resource's allowed nodes sorted by node weight
+ *
+ * \param[in] rsc Resource to check
+ *
+ * \return List of allowed nodes sorted by node weight
+ */
+static GList *
+sorted_allowed_nodes(const pe_resource_t *rsc)
+{
+ if (rsc->allowed_nodes != NULL) {
+ GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
+
+ if (nodes != NULL) {
+ return pcmk__sort_nodes(nodes, pe__current_node(rsc));
+ }
+ }
+ return NULL;
+}
+
+/*!
+ * \internal
+ * \brief Assign a resource to its best allowed node, if possible
+ *
+ * \param[in,out] rsc Resource to choose a node for
+ * \param[in] prefer If not NULL, prefer this node when all else equal
+ *
+ * \return true if \p rsc could be assigned to a node, otherwise false
+ */
+static bool
+assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer)
+{
+ GList *nodes = NULL;
+ pe_node_t *chosen = NULL;
+ pe_node_t *best = NULL;
+ bool result = false;
+ const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
+
+ if (prefer == NULL) {
+ prefer = most_free_node;
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ // We've already finished assignment of resources to nodes
+ return rsc->allocated_to != NULL;
+ }
+
+ // Sort allowed nodes by weight
+ nodes = sorted_allowed_nodes(rsc);
+ if (nodes != NULL) {
+ best = (pe_node_t *) nodes->data; // First node has best score
+ }
+
+ if ((prefer != NULL) && (nodes != NULL)) {
+ // Get the allowed node version of prefer
+ chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
+
+ if (chosen == NULL) {
+ pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
+ pe__node_name(prefer), rsc->id);
+
+ /* Favor the preferred node as long as its weight is at least as good as
+ * the best allowed node's.
+ *
+ * An alternative would be to favor the preferred node even if the best
+ * node is better, when the best node's weight is less than INFINITY.
+ */
+ } else if (chosen->weight < best->weight) {
+ pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
+ pe__node_name(chosen), rsc->id);
+ chosen = NULL;
+
+ } else if (!pcmk__node_available(chosen, true, false)) {
+ pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
+ pe__node_name(chosen), rsc->id);
+ chosen = NULL;
+
+ } else {
+ pe_rsc_trace(rsc,
+ "Chose preferred node %s for %s (ignoring %d candidates)",
+ pe__node_name(chosen), rsc->id, g_list_length(nodes));
+ }
+ }
+
+ if ((chosen == NULL) && (best != NULL)) {
+ /* Either there is no preferred node, or the preferred node is not
+ * suitable, but another node is allowed to run the resource.
+ */
+
+ chosen = best;
+
+ if (!pe_rsc_is_unique_clone(rsc->parent)
+ && (chosen->weight > 0) // Zero not acceptable
+ && pcmk__node_available(chosen, false, false)) {
+ /* If the resource is already running on a node, prefer that node if
+ * it is just as good as the chosen node.
+ *
+ * We don't do this for unique clone instances, because
+ * pcmk__assign_instances() has already assigned instances to their
+ * running nodes when appropriate, and if we get here, we don't want
+ * remaining unassigned instances to prefer a node that's already
+ * running another instance.
+ */
+ pe_node_t *running = pe__current_node(rsc);
+
+ if (running == NULL) {
+ // Nothing to do
+
+ } else if (!pcmk__node_available(running, true, false)) {
+ pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
+ rsc->id, pe__node_name(running));
+
+ } else {
+ int nodes_with_best_score = 1;
+
+ for (GList *iter = nodes->next; iter; iter = iter->next) {
+ pe_node_t *allowed = (pe_node_t *) iter->data;
+
+ if (allowed->weight != chosen->weight) {
+ // The nodes are sorted by weight, so no more are equal
+ break;
+ }
+ if (pe__same_node(allowed, running)) {
+ // Scores are equal, so prefer the current node
+ chosen = allowed;
+ }
+ nodes_with_best_score++;
+ }
+
+ if (nodes_with_best_score > 1) {
+ do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO),
+ "Chose %s for %s from %d nodes with score %s",
+ pe__node_name(chosen), rsc->id,
+ nodes_with_best_score,
+ pcmk_readable_score(chosen->weight));
+ }
+ }
+ }
+
+ pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
+ pe__node_name(chosen), rsc->id, g_list_length(nodes));
+ }
+
+ result = pcmk__finalize_assignment(rsc, chosen, false);
+ g_list_free(nodes);
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Apply a "this with" colocation to a node's allowed node scores
+ *
+ * \param[in,out] data Colocation to apply
+ * \param[in,out] user_data Resource being assigned
+ */
+static void
+apply_this_with(gpointer data, gpointer user_data)
+{
+ pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
+ pe_resource_t *rsc = (pe_resource_t *) user_data;
+
+ GHashTable *archive = NULL;
+ pe_resource_t *other = colocation->primary;
+
+ // In certain cases, we will need to revert the node scores
+ if ((colocation->dependent_role >= RSC_ROLE_PROMOTED)
+ || ((colocation->score < 0) && (colocation->score > -INFINITY))) {
+ archive = pcmk__copy_node_table(rsc->allowed_nodes);
+ }
+
+ if (pcmk_is_set(other->flags, pe_rsc_provisional)) {
+ pe_rsc_trace(rsc,
+ "%s: Assigning colocation %s primary %s first"
+ "(score=%d role=%s)",
+ rsc->id, colocation->id, other->id,
+ colocation->score, role2text(colocation->dependent_role));
+ other->cmds->assign(other, NULL);
+ }
+
+ // Apply the colocation score to this resource's allowed node scores
+ rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
+ if ((archive != NULL)
+ && !pcmk__any_node_available(rsc->allowed_nodes)) {
+ pe_rsc_info(rsc,
+ "%s: Reverting scores from colocation with %s "
+ "because no nodes allowed",
+ rsc->id, other->id);
+ g_hash_table_destroy(rsc->allowed_nodes);
+ rsc->allowed_nodes = archive;
+ archive = NULL;
+ }
+ if (archive != NULL) {
+ g_hash_table_destroy(archive);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update a Pacemaker Remote node once its connection has been assigned
+ *
+ * \param[in] connection Connection resource that has been assigned
+ */
+static void
+remote_connection_assigned(const pe_resource_t *connection)
+{
+ pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
+ connection->id);
+
+ CRM_CHECK(remote_node != NULL, return);
+
+ if ((connection->allocated_to != NULL)
+ && (connection->next_role != RSC_ROLE_STOPPED)) {
+
+ crm_trace("Pacemaker Remote node %s will be online",
+ remote_node->details->id);
+ remote_node->details->online = TRUE;
+ if (remote_node->details->unseen) {
+ // Avoid unnecessary fence, since we will attempt connection
+ remote_node->details->unclean = FALSE;
+ }
+
+ } else {
+ crm_trace("Pacemaker Remote node %s will be shut down "
+ "(%sassigned connection's next role is %s)",
+ remote_node->details->id,
+ ((connection->allocated_to == NULL)? "un" : ""),
+ role2text(connection->next_role));
+ remote_node->details->shutdown = TRUE;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Assign a primitive resource to a node
+ *
+ * \param[in,out] rsc Resource to assign to a node
+ * \param[in] prefer Node to prefer, if all else is equal
+ *
+ * \return Node that \p rsc is assigned to, if assigned entirely to one node
+ */
+pe_node_t *
+pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer)
+{
+ GList *this_with_colocations = NULL;
+ GList *with_this_colocations = NULL;
+ GList *iter = NULL;
+ pcmk__colocation_t *colocation = NULL;
+
+ CRM_ASSERT(rsc != NULL);
+
+ // Never assign a child without parent being assigned first
+ if ((rsc->parent != NULL)
+ && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
+ pe_rsc_debug(rsc, "%s: Assigning parent %s first",
+ rsc->id, rsc->parent->id);
+ rsc->parent->cmds->assign(rsc->parent, prefer);
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return rsc->allocated_to; // Assignment has already been done
+ }
+
+ // Ensure we detect assignment loops
+ if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
+ pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
+ return NULL;
+ }
+ pe__set_resource_flags(rsc, pe_rsc_allocating);
+
+ pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes,
+ rsc->cluster);
+
+ this_with_colocations = pcmk__this_with_colocations(rsc);
+ with_this_colocations = pcmk__with_this_colocations(rsc);
+
+ // Apply mandatory colocations first, to satisfy as many as possible
+ for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
+ colocation = iter->data;
+ if ((colocation->score <= -CRM_SCORE_INFINITY)
+ || (colocation->score >= CRM_SCORE_INFINITY)) {
+ apply_this_with(iter->data, rsc);
+ }
+ }
+ for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
+ colocation = iter->data;
+ if ((colocation->score <= -CRM_SCORE_INFINITY)
+ || (colocation->score >= CRM_SCORE_INFINITY)) {
+ pcmk__add_dependent_scores(iter->data, rsc);
+ }
+ }
+
+ pe__show_node_weights(true, rsc, "Mandatory-colocations",
+ rsc->allowed_nodes, rsc->cluster);
+
+ // Then apply optional colocations
+ for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
+ colocation = iter->data;
+
+ if ((colocation->score > -CRM_SCORE_INFINITY)
+ && (colocation->score < CRM_SCORE_INFINITY)) {
+ apply_this_with(iter->data, rsc);
+ }
+ }
+ for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
+ colocation = iter->data;
+
+ if ((colocation->score > -CRM_SCORE_INFINITY)
+ && (colocation->score < CRM_SCORE_INFINITY)) {
+ pcmk__add_dependent_scores(iter->data, rsc);
+ }
+ }
+
+ g_list_free(this_with_colocations);
+ g_list_free(with_this_colocations);
+
+ if (rsc->next_role == RSC_ROLE_STOPPED) {
+ pe_rsc_trace(rsc,
+ "Banning %s from all nodes because it will be stopped",
+ rsc->id);
+ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
+ rsc->cluster);
+
+ } else if ((rsc->next_role > rsc->role)
+ && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum)
+ && (rsc->cluster->no_quorum_policy == no_quorum_freeze)) {
+ crm_notice("Resource %s cannot be elevated from %s to %s due to "
+ "no-quorum-policy=freeze",
+ rsc->id, role2text(rsc->role), role2text(rsc->next_role));
+ pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
+ }
+
+ pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
+ rsc, __func__, rsc->allowed_nodes, rsc->cluster);
+
+ // Unmanage resource if fencing is enabled but no device is configured
+ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
+ && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) {
+ pe__clear_resource_flags(rsc, pe_rsc_managed);
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ // Unmanaged resources stay on their current node
+ const char *reason = NULL;
+ pe_node_t *assign_to = NULL;
+
+ pe__set_next_role(rsc, rsc->role, "unmanaged");
+ assign_to = pe__current_node(rsc);
+ if (assign_to == NULL) {
+ reason = "inactive";
+ } else if (rsc->role == RSC_ROLE_PROMOTED) {
+ reason = "promoted";
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ reason = "failed";
+ } else {
+ reason = "active";
+ }
+ pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
+ (assign_to? assign_to->details->uname : "no node"), reason);
+ pcmk__finalize_assignment(rsc, assign_to, true);
+
+ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) {
+ pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id);
+ pcmk__finalize_assignment(rsc, NULL, true);
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
+ && assign_best_node(rsc, prefer)) {
+ // Assignment successful
+
+ } else if (rsc->allocated_to == NULL) {
+ if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
+ pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
+ } else if (rsc->running_on != NULL) {
+ pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
+ }
+
+ } else {
+ pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id,
+ pe__node_name(rsc->allocated_to));
+ }
+
+ pe__clear_resource_flags(rsc, pe_rsc_allocating);
+
+ if (rsc->is_remote_node) {
+ remote_connection_assigned(rsc);
+ }
+
+ return rsc->allocated_to;
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions to bring resource down and back to current role
+ *
+ * \param[in,out] rsc Resource to restart
+ * \param[in,out] current Node that resource should be brought down on
+ * \param[in] need_stop Whether the resource must be stopped
+ * \param[in] need_promote Whether the resource must be promoted
+ *
+ * \return Role that resource would have after scheduled actions are taken
+ */
+static void
+schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
+ bool need_stop, bool need_promote)
+{
+ enum rsc_role_e role = rsc->role;
+ enum rsc_role_e next_role;
+ rsc_transition_fn fn = NULL;
+
+ pe__set_resource_flags(rsc, pe_rsc_restarting);
+
+ // Bring resource down to a stop on its current node
+ while (role != RSC_ROLE_STOPPED) {
+ next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
+ pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
+ (need_stop? "required" : "optional"), rsc->id,
+ role2text(role), role2text(next_role));
+ fn = rsc_action_matrix[role][next_role];
+ if (fn == NULL) {
+ break;
+ }
+ fn(rsc, current, !need_stop);
+ role = next_role;
+ }
+
+ // Bring resource up to its next role on its next node
+ while ((rsc->role <= rsc->next_role) && (role != rsc->role)
+ && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
+ bool required = need_stop;
+
+ next_role = rsc_state_matrix[role][rsc->role];
+ if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
+ required = true;
+ }
+ pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
+ (required? "required" : "optional"), rsc->id,
+ role2text(role), role2text(next_role));
+ fn = rsc_action_matrix[role][next_role];
+ if (fn == NULL) {
+ break;
+ }
+ fn(rsc, rsc->allocated_to, !required);
+ role = next_role;
+ }
+
+ pe__clear_resource_flags(rsc, pe_rsc_restarting);
+}
+
+/*!
+ * \internal
+ * \brief If a resource's next role is not explicitly specified, set a default
+ *
+ * \param[in,out] rsc Resource to set next role for
+ *
+ * \return "explicit" if next role was explicitly set, otherwise "implicit"
+ */
+static const char *
+set_default_next_role(pe_resource_t *rsc)
+{
+ if (rsc->next_role != RSC_ROLE_UNKNOWN) {
+ return "explicit";
+ }
+
+ if (rsc->allocated_to == NULL) {
+ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment");
+ } else {
+ pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment");
+ }
+ return "implicit";
+}
+
+/*!
+ * \internal
+ * \brief Create an action to represent an already pending start
+ *
+ * \param[in,out] rsc Resource to create start action for
+ */
+static void
+create_pending_start(pe_resource_t *rsc)
+{
+ pe_action_t *start = NULL;
+
+ pe_rsc_trace(rsc,
+ "Creating action for %s to represent already pending start",
+ rsc->id);
+ start = start_action(rsc, rsc->allocated_to, TRUE);
+ pe__set_action_flags(start, pe_action_print_always);
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions needed to take a resource to its next role
+ *
+ * \param[in,out] rsc Resource to schedule actions for
+ */
+static void
+schedule_role_transition_actions(pe_resource_t *rsc)
+{
+ enum rsc_role_e role = rsc->role;
+
+ while (role != rsc->next_role) {
+ enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
+ rsc_transition_fn fn = NULL;
+
+ pe_rsc_trace(rsc,
+ "Creating action to take %s from %s to %s (ending at %s)",
+ rsc->id, role2text(role), role2text(next_role),
+ role2text(rsc->next_role));
+ fn = rsc_action_matrix[role][next_role];
+ if (fn == NULL) {
+ break;
+ }
+ fn(rsc, rsc->allocated_to, false);
+ role = next_role;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create all actions needed for a given primitive resource
+ *
+ * \param[in,out] rsc Primitive resource to create actions for
+ */
+void
+pcmk__primitive_create_actions(pe_resource_t *rsc)
+{
+ bool need_stop = false;
+ bool need_promote = false;
+ bool is_moving = false;
+ bool allow_migrate = false;
+ bool multiply_active = false;
+
+ pe_node_t *current = NULL;
+ unsigned int num_all_active = 0;
+ unsigned int num_clean_active = 0;
+ const char *next_role_source = NULL;
+
+ CRM_ASSERT(rsc != NULL);
+
+ next_role_source = set_default_next_role(rsc);
+ pe_rsc_trace(rsc,
+ "Creating all actions for %s transition from %s to %s "
+ "(%s) on %s",
+ rsc->id, role2text(rsc->role), role2text(rsc->next_role),
+ next_role_source, pe__node_name(rsc->allocated_to));
+
+ current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
+
+ g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
+ rsc);
+
+ if ((current != NULL) && (rsc->allocated_to != NULL)
+ && (current->details != rsc->allocated_to->details)
+ && (rsc->next_role >= RSC_ROLE_STARTED)) {
+
+ pe_rsc_trace(rsc, "Moving %s from %s to %s",
+ rsc->id, pe__node_name(current),
+ pe__node_name(rsc->allocated_to));
+ is_moving = true;
+ allow_migrate = pcmk__rsc_can_migrate(rsc, current);
+
+ // This is needed even if migrating (though I'm not sure why ...)
+ need_stop = true;
+ }
+
+ // Check whether resource is partially migrated and/or multiply active
+ if ((rsc->partial_migration_source != NULL)
+ && (rsc->partial_migration_target != NULL)
+ && allow_migrate && (num_all_active == 2)
+ && pe__same_node(current, rsc->partial_migration_source)
+ && pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
+ /* A partial migration is in progress, and the migration target remains
+ * the same as when the migration began.
+ */
+ pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
+ rsc->id, pe__node_name(rsc->partial_migration_source),
+ pe__node_name(rsc->partial_migration_target));
+
+ } else if ((rsc->partial_migration_source != NULL)
+ || (rsc->partial_migration_target != NULL)) {
+ // A partial migration is in progress but can't be continued
+
+ if (num_all_active > 2) {
+ // The resource is migrating *and* multiply active!
+ crm_notice("Forcing recovery of %s because it is migrating "
+ "from %s to %s and possibly active elsewhere",
+ rsc->id, pe__node_name(rsc->partial_migration_source),
+ pe__node_name(rsc->partial_migration_target));
+ } else {
+ // The migration source or target isn't available
+ crm_notice("Forcing recovery of %s because it can no longer "
+ "migrate from %s to %s",
+ rsc->id, pe__node_name(rsc->partial_migration_source),
+ pe__node_name(rsc->partial_migration_target));
+ }
+ need_stop = true;
+ rsc->partial_migration_source = rsc->partial_migration_target = NULL;
+ allow_migrate = false;
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
+ multiply_active = (num_all_active > 1);
+ } else {
+ /* If a resource has "requires" set to nothing or quorum, don't consider
+ * it active on unclean nodes (similar to how all resources behave when
+ * stonith-enabled is false). We can start such resources elsewhere
+ * before fencing completes, and if we considered the resource active on
+ * the failed node, we would attempt recovery for being active on
+ * multiple nodes.
+ */
+ multiply_active = (num_clean_active > 1);
+ }
+
+ if (multiply_active) {
+ const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+
+ // Resource was (possibly) incorrectly multiply active
+ pe_proc_err("%s resource %s might be active on %u nodes (%s)",
+ pcmk__s(class, "Untyped"), rsc->id, num_all_active,
+ recovery2text(rsc->recovery_type));
+ crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
+ "#Resource_is_Too_Active for more information");
+
+ switch (rsc->recovery_type) {
+ case recovery_stop_start:
+ need_stop = true;
+ break;
+ case recovery_stop_unexpected:
+ need_stop = true; // stop_resource() will skip expected node
+ pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
+ create_pending_start(rsc);
+ }
+
+ if (is_moving) {
+ // Remaining tests are only for resources staying where they are
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
+ need_stop = true;
+ pe_rsc_trace(rsc, "Recovering %s", rsc->id);
+ } else {
+ pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
+ if (rsc->next_role == RSC_ROLE_PROMOTED) {
+ need_promote = true;
+ }
+ }
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
+ pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
+ need_stop = true;
+
+ } else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL)
+ && (rsc->allocated_to != NULL)) {
+ pe_action_t *start = NULL;
+
+ pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
+ rsc->id);
+ start = start_action(rsc, rsc->allocated_to, TRUE);
+ if (!pcmk_is_set(start->flags, pe_action_optional)) {
+ // Recovery of a promoted resource
+ pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
+ need_stop = true;
+ }
+ }
+
+ // Create any actions needed to bring resource down and back up to same role
+ schedule_restart_actions(rsc, current, need_stop, need_promote);
+
+ // Create any actions needed to take resource from this role to the next
+ schedule_role_transition_actions(rsc);
+
+ pcmk__create_recurring_actions(rsc);
+
+ if (allow_migrate) {
+ pcmk__create_migration_actions(rsc, current);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
+ *
+ * \param[in] rsc Resource to check
+ */
+static void
+rsc_avoids_remote_nodes(const pe_resource_t *rsc)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (node->details->remote_rsc != NULL) {
+ node->weight = -INFINITY;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return allowed nodes as (possibly sorted) list
+ *
+ * Convert a resource's hash table of allowed nodes to a list. If printing to
+ * stdout, sort the list, to keep action ID numbers consistent for regression
+ * test output (while avoiding the performance hit on a live cluster).
+ *
+ * \param[in] rsc Resource to check for allowed nodes
+ *
+ * \return List of resource's allowed nodes
+ * \note Callers should take care not to rely on the list being sorted.
+ */
+static GList *
+allowed_nodes_as_list(const pe_resource_t *rsc)
+{
+ GList *allowed_nodes = NULL;
+
+ if (rsc->allowed_nodes) {
+ allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
+ }
+
+ if (!pcmk__is_daemon) {
+ allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
+ }
+
+ return allowed_nodes;
+}
+
+/*!
+ * \internal
+ * \brief Create implicit constraints needed for a primitive resource
+ *
+ * \param[in,out] rsc Primitive resource to create implicit constraints for
+ */
+void
+pcmk__primitive_internal_constraints(pe_resource_t *rsc)
+{
+ GList *allowed_nodes = NULL;
+ bool check_unfencing = false;
+ bool check_utilization = false;
+
+ CRM_ASSERT(rsc != NULL);
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc,
+ "Skipping implicit constraints for unmanaged resource %s",
+ rsc->id);
+ return;
+ }
+
+ // Whether resource requires unfencing
+ check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
+ && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)
+ && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
+
+ // Whether a non-default placement strategy is used
+ check_utilization = (g_hash_table_size(rsc->utilization) > 0)
+ && !pcmk__str_eq(rsc->cluster->placement_strategy,
+ "default", pcmk__str_casei);
+
+ // Order stops before starts (i.e. restart)
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
+ pe_order_optional|pe_order_implies_then|pe_order_restart,
+ rsc->cluster);
+
+ // Promotable ordering: demote before stop, start before promote
+ if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
+ pe_rsc_promotable)
+ || (rsc->role > RSC_ROLE_UNPROMOTED)) {
+
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
+ pe_order_promoted_implies_first, rsc->cluster);
+
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
+ rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
+ pe_order_runnable_left, rsc->cluster);
+ }
+
+ // Don't clear resource history if probing on same node
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
+ NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
+ NULL, pe_order_same_node|pe_order_then_cancels_first,
+ rsc->cluster);
+
+ // Certain checks need allowed nodes
+ if (check_unfencing || check_utilization || (rsc->container != NULL)) {
+ allowed_nodes = allowed_nodes_as_list(rsc);
+ }
+
+ if (check_unfencing) {
+ g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
+ }
+
+ if (check_utilization) {
+ pcmk__create_utilization_constraints(rsc, allowed_nodes);
+ }
+
+ if (rsc->container != NULL) {
+ pe_resource_t *remote_rsc = NULL;
+
+ if (rsc->is_remote_node) {
+ // rsc is the implicit remote connection for a guest or bundle node
+
+ /* Guest resources are not allowed to run on Pacemaker Remote nodes,
+ * to avoid nesting remotes. However, bundles are allowed.
+ */
+ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
+ rsc_avoids_remote_nodes(rsc->container);
+ }
+
+ /* If someone cleans up a guest or bundle node's container, we will
+ * likely schedule a (re-)probe of the container and recovery of the
+ * connection. Order the connection stop after the container probe,
+ * so that if we detect the container running, we will trigger a new
+ * transition and avoid the unnecessary recovery.
+ */
+ pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
+ RSC_STOP, pe_order_optional);
+
+ /* A user can specify that a resource must start on a Pacemaker Remote
+ * node by explicitly configuring it with the container=NODENAME
+ * meta-attribute. This is of questionable merit, since location
+ * constraints can accomplish the same thing. But we support it, so here
+ * we check whether a resource (that is not itself a remote connection)
+ * has container set to a remote node or guest node resource.
+ */
+ } else if (rsc->container->is_remote_node) {
+ remote_rsc = rsc->container;
+ } else {
+ remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
+ rsc->container);
+ }
+
+ if (remote_rsc != NULL) {
+ /* Force the resource on the Pacemaker Remote node instead of
+ * colocating the resource with the container resource.
+ */
+ for (GList *item = allowed_nodes; item; item = item->next) {
+ pe_node_t *node = item->data;
+
+ if (node->details->remote_rsc != remote_rsc) {
+ node->weight = -INFINITY;
+ }
+ }
+
+ } else {
+ /* This resource is either a filler for a container that does NOT
+ * represent a Pacemaker Remote node, or a Pacemaker Remote
+ * connection resource for a guest node or bundle.
+ */
+ int score;
+
+ crm_trace("Order and colocate %s relative to its container %s",
+ rsc->id, rsc->container->id);
+
+ pcmk__new_ordering(rsc->container,
+ pcmk__op_key(rsc->container->id, RSC_START, 0),
+ NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
+ NULL,
+ pe_order_implies_then|pe_order_runnable_left,
+ rsc->cluster);
+
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
+ rsc->container,
+ pcmk__op_key(rsc->container->id, RSC_STOP, 0),
+ NULL, pe_order_implies_first, rsc->cluster);
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
+ score = 10000; /* Highly preferred but not essential */
+ } else {
+ score = INFINITY; /* Force them to run on the same host */
+ }
+ pcmk__new_colocation("resource-with-container", NULL, score, rsc,
+ rsc->container, NULL, NULL, true,
+ rsc->cluster);
+ }
+ }
+
+ if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
+ /* Remote connections and fencing devices are not allowed to run on
+ * Pacemaker Remote nodes
+ */
+ rsc_avoids_remote_nodes(rsc);
+ }
+ g_list_free(allowed_nodes);
+}
+
+/*!
+ * \internal
+ * \brief Apply a colocation's score to node weights or resource priority
+ *
+ * Given a colocation constraint, apply its score to the dependent's
+ * allowed node weights (if we are still placing resources) or priority (if
+ * we are choosing promotable clone instance roles).
+ *
+ * \param[in,out] dependent Dependent resource in colocation
+ * \param[in] primary Primary resource in colocation
+ * \param[in] colocation Colocation constraint to apply
+ * \param[in] for_dependent true if called on behalf of dependent
+ */
+void
+pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
+ const pe_resource_t *primary,
+ const pcmk__colocation_t *colocation,
+ bool for_dependent)
+{
+ enum pcmk__coloc_affects filter_results;
+
+ CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
+ return);
+
+ if (for_dependent) {
+ // Always process on behalf of primary resource
+ primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
+ return;
+ }
+
+ filter_results = pcmk__colocation_affects(dependent, primary, colocation,
+ false);
+ pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
+ ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
+ dependent->id, primary->id, colocation->id, colocation->score,
+ filter_results);
+
+ switch (filter_results) {
+ case pcmk__coloc_affects_role:
+ pcmk__apply_coloc_to_priority(dependent, primary, colocation);
+ break;
+ case pcmk__coloc_affects_location:
+ pcmk__apply_coloc_to_weights(dependent, primary, colocation);
+ break;
+ default: // pcmk__coloc_affects_nothing
+ return;
+ }
+}
+
+/* Primitive implementation of
+ * resource_alloc_functions_t:with_this_colocations()
+ */
+void
+pcmk__with_primitive_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ // Primitives don't have children, so rsc should also be orig_rsc
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native)
+ && (rsc == orig_rsc) && (list != NULL),
+ return);
+
+ // Add primitive's own colocations plus any relevant ones from parent
+ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs);
+ if (rsc->parent != NULL) {
+ rsc->parent->cmds->with_this_colocations(rsc->parent, rsc, list);
+ }
+}
+
+/* Primitive implementation of
+ * resource_alloc_functions_t:this_with_colocations()
+ */
+void
+pcmk__primitive_with_colocations(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList **list)
+{
+ // Primitives don't have children, so rsc should also be orig_rsc
+ CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native)
+ && (rsc == orig_rsc) && (list != NULL),
+ return);
+
+ // Add primitive's own colocations plus any relevant ones from parent
+ pcmk__add_this_with_list(list, rsc->rsc_cons);
+ if (rsc->parent != NULL) {
+ rsc->parent->cmds->this_with_colocations(rsc->parent, rsc, list);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return action flags for a given primitive resource action
+ *
+ * \param[in,out] action Action to get flags for
+ * \param[in] node If not NULL, limit effects to this node (ignored)
+ *
+ * \return Flags appropriate to \p action on \p node
+ */
+enum pe_action_flags
+pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
+{
+ CRM_ASSERT(action != NULL);
+ return action->flags;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is a multiply active resource's expected node
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Node to check
+ *
+ * \return true if \p rsc is multiply active with multiple-active set to
+ * stop_unexpected, and \p node is the node where it will remain active
+ * \note This assumes that the resource's next role cannot be changed to stopped
+ * after this is called, which should be reasonable if status has already
+ * been unpacked and resources have been assigned to nodes.
+ */
+static bool
+is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ return pcmk_all_flags_set(rsc->flags,
+ pe_rsc_stop_unexpected|pe_rsc_restarting)
+ && (rsc->next_role > RSC_ROLE_STOPPED)
+ && pe__same_node(rsc->allocated_to, node);
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions needed to stop a resource wherever it is active
+ *
+ * \param[in,out] rsc Resource being stopped
+ * \param[in] node Node where resource is being stopped (ignored)
+ * \param[in] optional Whether actions should be optional
+ */
+static void
+stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
+{
+ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
+ pe_node_t *current = (pe_node_t *) iter->data;
+ pe_action_t *stop = NULL;
+
+ if (is_expected_node(rsc, current)) {
+ /* We are scheduling restart actions for a multiply active resource
+ * with multiple-active=stop_unexpected, and this is where it should
+ * not be stopped.
+ */
+ pe_rsc_trace(rsc,
+ "Skipping stop of multiply active resource %s "
+ "on expected node %s",
+ rsc->id, pe__node_name(current));
+ continue;
+ }
+
+ if (rsc->partial_migration_target != NULL) {
+ // Continue migration if node originally was and remains target
+ if (pe__same_node(current, rsc->partial_migration_target)
+ && pe__same_node(current, rsc->allocated_to)) {
+ pe_rsc_trace(rsc,
+ "Skipping stop of %s on %s "
+ "because partial migration there will continue",
+ rsc->id, pe__node_name(current));
+ continue;
+ } else {
+ pe_rsc_trace(rsc,
+ "Forcing stop of %s on %s "
+ "because migration target changed",
+ rsc->id, pe__node_name(current));
+ optional = false;
+ }
+ }
+
+ pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
+ rsc->id, pe__node_name(current));
+ stop = stop_action(rsc, current, optional);
+
+ if (rsc->allocated_to == NULL) {
+ pe_action_set_reason(stop, "node availability", true);
+ } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
+ |pe_rsc_stop_unexpected)) {
+ /* We are stopping a multiply active resource on a node that is
+ * not its expected node, and we are still scheduling restart
+ * actions, so the stop is for being multiply active.
+ */
+ pe_action_set_reason(stop, "being multiply active", true);
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe__clear_action_flags(stop, pe_action_runnable);
+ }
+
+ if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) {
+ pcmk__schedule_cleanup(rsc, current, optional);
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
+ pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false,
+ rsc->cluster);
+
+ order_actions(stop, unfence, pe_order_implies_first);
+ if (!pcmk__node_unfenced(current)) {
+ pe_proc_err("Stopping %s until %s can be unfenced",
+ rsc->id, pe__node_name(current));
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions needed to start a resource on a node
+ *
+ * \param[in,out] rsc Resource being started
+ * \param[in,out] node Node where resource should be started
+ * \param[in] optional Whether actions should be optional
+ */
+static void
+start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
+{
+ pe_action_t *start = NULL;
+
+ CRM_ASSERT(node != NULL);
+
+ pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
+ (optional? "optional" : "required"), rsc->id,
+ pe__node_name(node), node->weight);
+ start = start_action(rsc, node, TRUE);
+
+ pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then);
+
+ if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
+ pe__clear_action_flags(start, pe_action_optional);
+ }
+
+ if (is_expected_node(rsc, node)) {
+ /* This could be a problem if the start becomes necessary for other
+ * reasons later.
+ */
+ pe_rsc_trace(rsc,
+ "Start of multiply active resouce %s "
+ "on expected node %s will be a pseudo-action",
+ rsc->id, pe__node_name(node));
+ pe__set_action_flags(start, pe_action_pseudo);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions needed to promote a resource on a node
+ *
+ * \param[in,out] rsc Resource being promoted
+ * \param[in] node Node where resource should be promoted
+ * \param[in] optional Whether actions should be optional
+ */
+static void
+promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
+{
+ GList *iter = NULL;
+ GList *action_list = NULL;
+ bool runnable = true;
+
+ CRM_ASSERT(node != NULL);
+
+ // Any start must be runnable for promotion to be runnable
+ action_list = pe__resource_actions(rsc, node, RSC_START, true);
+ for (iter = action_list; iter != NULL; iter = iter->next) {
+ pe_action_t *start = (pe_action_t *) iter->data;
+
+ if (!pcmk_is_set(start->flags, pe_action_runnable)) {
+ runnable = false;
+ }
+ }
+ g_list_free(action_list);
+
+ if (runnable) {
+ pe_action_t *promote = promote_action(rsc, node, optional);
+
+ pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
+ (optional? "optional" : "required"), rsc->id,
+ pe__node_name(node));
+
+ if (is_expected_node(rsc, node)) {
+ /* This could be a problem if the promote becomes necessary for
+ * other reasons later.
+ */
+ pe_rsc_trace(rsc,
+ "Promotion of multiply active resouce %s "
+ "on expected node %s will be a pseudo-action",
+ rsc->id, pe__node_name(node));
+ pe__set_action_flags(promote, pe_action_pseudo);
+ }
+ } else {
+ pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
+ rsc->id, pe__node_name(node));
+ action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true);
+ for (iter = action_list; iter != NULL; iter = iter->next) {
+ pe_action_t *promote = (pe_action_t *) iter->data;
+
+ pe__clear_action_flags(promote, pe_action_runnable);
+ }
+ g_list_free(action_list);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Schedule actions needed to demote a resource wherever it is active
+ *
+ * \param[in,out] rsc Resource being demoted
+ * \param[in] node Node where resource should be demoted (ignored)
+ * \param[in] optional Whether actions should be optional
+ */
+static void
+demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
+{
+ /* Since this will only be called for a primitive (possibly as an instance
+ * of a collective resource), the resource is multiply active if it is
+ * running on more than one node, so we want to demote on all of them as
+ * part of recovery, regardless of which one is the desired node.
+ */
+ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
+ pe_node_t *current = (pe_node_t *) iter->data;
+
+ if (is_expected_node(rsc, current)) {
+ pe_rsc_trace(rsc,
+ "Skipping demote of multiply active resource %s "
+ "on expected node %s",
+ rsc->id, pe__node_name(current));
+ } else {
+ pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
+ (optional? "optional" : "required"), rsc->id,
+ pe__node_name(current));
+ demote_action(rsc, current, optional);
+ }
+ }
+}
+
+static void
+assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
+{
+ CRM_ASSERT(false);
+}
+
+/*!
+ * \internal
+ * \brief Schedule cleanup of a resource
+ *
+ * \param[in,out] rsc Resource to clean up
+ * \param[in] node Node to clean up on
+ * \param[in] optional Whether clean-up should be optional
+ */
+void
+pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
+{
+ /* If the cleanup is required, its orderings are optional, because they're
+ * relevant only if both actions are required. Conversely, if the cleanup is
+ * optional, the orderings make the then action required if the first action
+ * becomes required.
+ */
+ uint32_t flag = optional? pe_order_implies_then : pe_order_optional;
+
+ CRM_CHECK((rsc != NULL) && (node != NULL), return);
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
+ pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
+ rsc->id, pe__node_name(node));
+ return;
+ }
+
+ if (node->details->unclean || !node->details->online) {
+ pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
+ rsc->id, pe__node_name(node));
+ return;
+ }
+
+ crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
+ delete_action(rsc, node, optional);
+
+ // stop -> clean-up -> start
+ pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag);
+ pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag);
+}
+
+/*!
+ * \internal
+ * \brief Add primitive meta-attributes relevant to graph actions to XML
+ *
+ * \param[in] rsc Primitive resource whose meta-attributes should be added
+ * \param[in,out] xml Transition graph action attributes XML to add to
+ */
+void
+pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
+{
+ char *name = NULL;
+ char *value = NULL;
+ const pe_resource_t *parent = NULL;
+
+ CRM_ASSERT((rsc != NULL) && (xml != NULL));
+
+ /* Clone instance numbers get set internally as meta-attributes, and are
+ * needed in the transition graph (for example, to tell unique clone
+ * instances apart).
+ */
+ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
+ if (value != NULL) {
+ name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
+ crm_xml_add(xml, name, value);
+ free(name);
+ }
+
+ // Not sure if this one is really needed ...
+ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
+ if (value != NULL) {
+ name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
+ crm_xml_add(xml, name, value);
+ free(name);
+ }
+
+ /* The container meta-attribute can be set on the primitive itself or one of
+ * its parents (for example, a group inside a container resource), so check
+ * them all, and keep the highest one found.
+ */
+ for (parent = rsc; parent != NULL; parent = parent->parent) {
+ if (parent->container != NULL) {
+ crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
+ parent->container->id);
+ }
+ }
+
+ /* Bundle replica children will get their external-ip set internally as a
+ * meta-attribute. The graph action needs it, but under a different naming
+ * convention than other meta-attributes.
+ */
+ value = g_hash_table_lookup(rsc->meta, "external-ip");
+ if (value != NULL) {
+ crm_xml_add(xml, "pcmk_external_ip", value);
+ }
+}
+
+// Primitive implementation of resource_alloc_functions_t:add_utilization()
+void
+pcmk__primitive_add_utilization(const pe_resource_t *rsc,
+ const pe_resource_t *orig_rsc, GList *all_rscs,
+ GHashTable *utilization)
+{
+ if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ return;
+ }
+
+ pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
+ orig_rsc->id, rsc->id);
+ pcmk__release_node_capacity(utilization, rsc);
+}
+
+/*!
+ * \internal
+ * \brief Get epoch time of node's shutdown attribute (or now if none)
+ *
+ * \param[in,out] node Node to check
+ *
+ * \return Epoch time corresponding to shutdown attribute if set or now if not
+ */
+static time_t
+shutdown_time(pe_node_t *node)
+{
+ const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
+ time_t result = 0;
+
+ if (shutdown != NULL) {
+ long long result_ll;
+
+ if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
+ result = (time_t) result_ll;
+ }
+ }
+ return (result == 0)? get_effective_time(node->details->data_set) : result;
+}
+
+/*!
+ * \internal
+ * \brief Ban a resource from a node if it's not locked to the node
+ *
+ * \param[in] data Node to check
+ * \param[in,out] user_data Resource to check
+ */
+static void
+ban_if_not_locked(gpointer data, gpointer user_data)
+{
+ const pe_node_t *node = (const pe_node_t *) data;
+ pe_resource_t *rsc = (pe_resource_t *) user_data;
+
+ if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
+ resource_location(rsc, node, -CRM_SCORE_INFINITY,
+ XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
+ }
+}
+
+// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
+void
+pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
+{
+ const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+
+ // Fence devices and remote connections can't be locked
+ if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
+ || pe__resource_is_remote_conn(rsc, rsc->cluster)) {
+ return;
+ }
+
+ if (rsc->lock_node != NULL) {
+ // The lock was obtained from resource history
+
+ if (rsc->running_on != NULL) {
+ /* The resource was started elsewhere even though it is now
+ * considered locked. This shouldn't be possible, but as a
+ * failsafe, we don't want to disturb the resource now.
+ */
+ pe_rsc_info(rsc,
+ "Cancelling shutdown lock because %s is already active",
+ rsc->id);
+ pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
+ rsc->lock_node = NULL;
+ rsc->lock_time = 0;
+ }
+
+ // Only a resource active on exactly one node can be locked
+ } else if (pcmk__list_of_1(rsc->running_on)) {
+ pe_node_t *node = rsc->running_on->data;
+
+ if (node->details->shutdown) {
+ if (node->details->unclean) {
+ pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
+ rsc->id, pe__node_name(node));
+ } else {
+ rsc->lock_node = node;
+ rsc->lock_time = shutdown_time(node);
+ }
+ }
+ }
+
+ if (rsc->lock_node == NULL) {
+ // No lock needed
+ return;
+ }
+
+ if (rsc->cluster->shutdown_lock > 0) {
+ time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
+
+ pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
+ rsc->id, pe__node_name(rsc->lock_node),
+ (long long) lock_expiration);
+ pe__update_recheck_time(++lock_expiration, rsc->cluster);
+ } else {
+ pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
+ rsc->id, pe__node_name(rsc->lock_node));
+ }
+
+ // If resource is locked to one node, ban it from all other nodes
+ g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
+}
diff --git a/lib/pacemaker/pcmk_sched_probes.c b/lib/pacemaker/pcmk_sched_probes.c
new file mode 100644
index 0000000..919e523
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_probes.c
@@ -0,0 +1,896 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Add the expected result to a newly created probe
+ *
+ * \param[in,out] probe Probe action to add expected result to
+ * \param[in] rsc Resource that probe is for
+ * \param[in] node Node that probe will run on
+ */
+static void
+add_expected_result(pe_action_t *probe, const pe_resource_t *rsc,
+ const pe_node_t *node)
+{
+ // Check whether resource is currently active on node
+ pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
+
+ // The expected result is what we think the resource's current state is
+ if (running == NULL) {
+ pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
+
+ } else if (rsc->role == RSC_ROLE_PROMOTED) {
+ pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create any needed robes on a node for a list of resources
+ *
+ * \param[in,out] rscs List of resources to create probes for
+ * \param[in,out] node Node to create probes on
+ *
+ * \return true if any probe was created, otherwise false
+ */
+bool
+pcmk__probe_resource_list(GList *rscs, pe_node_t *node)
+{
+ bool any_created = false;
+
+ for (GList *iter = rscs; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ if (rsc->cmds->create_probe(rsc, node)) {
+ any_created = true;
+ }
+ }
+ return any_created;
+}
+
+/*!
+ * \internal
+ * \brief Order one resource's start after another's start-up probe
+ *
+ * \param[in,out] rsc1 Resource that might get start-up probe
+ * \param[in] rsc2 Resource that might be started
+ */
+static void
+probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2)
+{
+ if ((rsc1->allocated_to != NULL)
+ && (g_hash_table_lookup(rsc1->known_on,
+ rsc1->allocated_to->details->id) == NULL)) {
+
+ pcmk__new_ordering(rsc1, pcmk__op_key(rsc1->id, RSC_STATUS, 0), NULL,
+ rsc2, pcmk__op_key(rsc2->id, RSC_START, 0), NULL,
+ pe_order_optional, rsc1->cluster);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a guest resource will stop
+ *
+ * \param[in] node Guest node to check
+ *
+ * \return true if guest resource will likely stop, otherwise false
+ */
+static bool
+guest_resource_will_stop(const pe_node_t *node)
+{
+ const pe_resource_t *guest_rsc = node->details->remote_rsc->container;
+
+ /* Ideally, we'd check whether the guest has a required stop, but that
+ * information doesn't exist yet, so approximate it ...
+ */
+ return node->details->remote_requires_reset
+ || node->details->unclean
+ || pcmk_is_set(guest_rsc->flags, pe_rsc_failed)
+ || (guest_rsc->next_role == RSC_ROLE_STOPPED)
+
+ // Guest is moving
+ || ((guest_rsc->role > RSC_ROLE_STOPPED)
+ && (guest_rsc->allocated_to != NULL)
+ && (pe_find_node(guest_rsc->running_on,
+ guest_rsc->allocated_to->details->uname) == NULL));
+}
+
+/*!
+ * \internal
+ * \brief Create a probe action for a resource on a node
+ *
+ * \param[in,out] rsc Resource to create probe for
+ * \param[in,out] node Node to create probe on
+ *
+ * \return Newly created probe action
+ */
+static pe_action_t *
+probe_action(pe_resource_t *rsc, pe_node_t *node)
+{
+ pe_action_t *probe = NULL;
+ char *key = pcmk__op_key(rsc->id, RSC_STATUS, 0);
+
+ crm_debug("Scheduling probe of %s %s on %s",
+ role2text(rsc->role), rsc->id, pe__node_name(node));
+
+ probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE,
+ rsc->cluster);
+ pe__clear_action_flags(probe, pe_action_optional);
+
+ pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional);
+ add_expected_result(probe, rsc, node);
+ return probe;
+}
+
+/*!
+ * \internal
+ * \brief Create probes for a resource on a node, if needed
+ *
+ * \brief Schedule any probes needed for a resource on a node
+ *
+ * \param[in,out] rsc Resource to create probe for
+ * \param[in,out] node Node to create probe on
+ *
+ * \return true if any probe was created, otherwise false
+ */
+bool
+pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node)
+{
+ uint32_t flags = pe_order_optional;
+ pe_action_t *probe = NULL;
+ pe_node_t *allowed = NULL;
+ pe_resource_t *top = uber_parent(rsc);
+ const char *reason = NULL;
+
+ CRM_CHECK((rsc != NULL) && (node != NULL), return false);
+
+ if (!pcmk_is_set(rsc->cluster->flags, pe_flag_startup_probes)) {
+ reason = "start-up probes are disabled";
+ goto no_probe;
+ }
+
+ if (pe__is_guest_or_remote_node(node)) {
+ const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+
+ if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
+ reason = "Pacemaker Remote nodes cannot run stonith agents";
+ goto no_probe;
+
+ } else if (pe__is_guest_node(node)
+ && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
+ reason = "guest nodes cannot run resources containing guest nodes";
+ goto no_probe;
+
+ } else if (rsc->is_remote_node) {
+ reason = "Pacemaker Remote nodes cannot host remote connections";
+ goto no_probe;
+ }
+ }
+
+ // If this is a collective resource, probes are created for its children
+ if (rsc->children != NULL) {
+ return pcmk__probe_resource_list(rsc->children, node);
+ }
+
+ if ((rsc->container != NULL) && !rsc->is_remote_node) {
+ reason = "resource is inside a container";
+ goto no_probe;
+
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
+ reason = "resource is orphaned";
+ goto no_probe;
+
+ } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
+ reason = "resource state is already known";
+ goto no_probe;
+ }
+
+ allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
+
+ if (rsc->exclusive_discover || top->exclusive_discover) {
+ // Exclusive discovery is enabled ...
+
+ if (allowed == NULL) {
+ // ... but this node is not allowed to run the resource
+ reason = "resource has exclusive discovery but is not allowed "
+ "on node";
+ goto no_probe;
+
+ } else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
+ // ... but no constraint marks this node for discovery of resource
+ reason = "resource has exclusive discovery but is not enabled "
+ "on node";
+ goto no_probe;
+ }
+ }
+
+ if (allowed == NULL) {
+ allowed = node;
+ }
+ if (allowed->rsc_discover_mode == pe_discover_never) {
+ reason = "node has discovery disabled";
+ goto no_probe;
+ }
+
+ if (pe__is_guest_node(node)) {
+ pe_resource_t *guest = node->details->remote_rsc->container;
+
+ if (guest->role == RSC_ROLE_STOPPED) {
+ // The guest is stopped, so we know no resource is active there
+ reason = "node's guest is stopped";
+ probe_then_start(guest, top);
+ goto no_probe;
+
+ } else if (guest_resource_will_stop(node)) {
+ reason = "node's guest will stop";
+
+ // Order resource start after guest stop (in case it's restarting)
+ pcmk__new_ordering(guest, pcmk__op_key(guest->id, RSC_STOP, 0),
+ NULL, top, pcmk__op_key(top->id, RSC_START, 0),
+ NULL, pe_order_optional, rsc->cluster);
+ goto no_probe;
+ }
+ }
+
+ // We've eliminated all cases where a probe is not needed, so now it is
+ probe = probe_action(rsc, node);
+
+ /* Below, we will order the probe relative to start or reload. If this is a
+ * clone instance, the start or reload is for the entire clone rather than
+ * just the instance. Otherwise, the start or reload is for the resource
+ * itself.
+ */
+ if (!pe_rsc_is_clone(top)) {
+ top = rsc;
+ }
+
+ /* Prevent a start if the resource can't be probed, but don't cause the
+ * resource or entire clone to stop if already active.
+ */
+ if (!pcmk_is_set(probe->flags, pe_action_runnable)
+ && (top->running_on == NULL)) {
+ pe__set_order_flags(flags, pe_order_runnable_left);
+ }
+
+ // Start or reload after probing the resource
+ pcmk__new_ordering(rsc, NULL, probe,
+ top, pcmk__op_key(top->id, RSC_START, 0), NULL,
+ flags, rsc->cluster);
+ pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
+ pe_order_optional, rsc->cluster);
+
+ return true;
+
+no_probe:
+ pe_rsc_trace(rsc,
+ "Skipping probe for %s on %s because %s",
+ rsc->id, node->details->id, reason);
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a probe should be ordered before another action
+ *
+ * \param[in] probe Probe action to check
+ * \param[in] then Other action to check
+ *
+ * \return true if \p probe should be ordered before \p then, otherwise false
+ */
+static bool
+probe_needed_before_action(const pe_action_t *probe, const pe_action_t *then)
+{
+ // Probes on a node are performed after unfencing it, not before
+ if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei)
+ && (probe->node != NULL) && (then->node != NULL)
+ && (probe->node->details == then->node->details)) {
+ const char *op = g_hash_table_lookup(then->meta, "stonith_action");
+
+ if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
+ return false;
+ }
+ }
+
+ // Probes should be done on a node before shutting it down
+ if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none)
+ && (probe->node != NULL) && (then->node != NULL)
+ && (probe->node->details != then->node->details)) {
+ return false;
+ }
+
+ // Otherwise probes should always be done before any other action
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Add implicit "probe then X" orderings for "stop then X" orderings
+ *
+ * If the state of a resource is not known yet, a probe will be scheduled,
+ * expecting a "not running" result. If the probe fails, a stop will not be
+ * scheduled until the next transition. Thus, if there are ordering constraints
+ * like "stop this resource then do something else that's not for the same
+ * resource", add implicit "probe this resource then do something" equivalents
+ * so the relation is upheld until we know whether a stop is needed.
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+add_probe_orderings_for_stops(pe_working_set_t *data_set)
+{
+ for (GList *iter = data_set->ordering_constraints; iter != NULL;
+ iter = iter->next) {
+
+ pe__ordering_t *order = iter->data;
+ uint32_t order_flags = pe_order_optional;
+ GList *probes = NULL;
+ GList *then_actions = NULL;
+
+ // Skip disabled orderings
+ if (order->flags == pe_order_none) {
+ continue;
+ }
+
+ // Skip non-resource orderings, and orderings for the same resource
+ if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
+ continue;
+ }
+
+ // Skip invalid orderings (shouldn't be possible)
+ if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) ||
+ ((order->rh_action == NULL) && (order->rh_action_task == NULL))) {
+ continue;
+ }
+
+ // Skip orderings for first actions other than stop
+ if ((order->lh_action != NULL)
+ && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) {
+ continue;
+ } else if ((order->lh_action == NULL)
+ && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) {
+ continue;
+ }
+
+ /* Do not imply a probe ordering for a resource inside of a stopping
+ * container. Otherwise, it might introduce a transition loop, since a
+ * probe could be scheduled after the container starts again.
+ */
+ if ((order->rh_rsc != NULL)
+ && (order->lh_rsc->container == order->rh_rsc)) {
+
+ if ((order->rh_action != NULL)
+ && pcmk__str_eq(order->rh_action->task, RSC_STOP,
+ pcmk__str_none)) {
+ continue;
+ } else if ((order->rh_action == NULL)
+ && pcmk__ends_with(order->rh_action_task,
+ "_" RSC_STOP "_0")) {
+ continue;
+ }
+ }
+
+ // Preserve certain order options for future filtering
+ if (pcmk_is_set(order->flags, pe_order_apply_first_non_migratable)) {
+ pe__set_order_flags(order_flags,
+ pe_order_apply_first_non_migratable);
+ }
+ if (pcmk_is_set(order->flags, pe_order_same_node)) {
+ pe__set_order_flags(order_flags, pe_order_same_node);
+ }
+
+ // Preserve certain order types for future filtering
+ if ((order->flags == pe_order_anti_colocation)
+ || (order->flags == pe_order_load)) {
+ order_flags = order->flags;
+ }
+
+ // List all scheduled probes for the first resource
+ probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE);
+ if (probes == NULL) { // There aren't any
+ continue;
+ }
+
+ // List all relevant "then" actions
+ if (order->rh_action != NULL) {
+ then_actions = g_list_prepend(NULL, order->rh_action);
+
+ } else if (order->rh_rsc != NULL) {
+ then_actions = find_actions(order->rh_rsc->actions,
+ order->rh_action_task, NULL);
+ if (then_actions == NULL) { // There aren't any
+ g_list_free(probes);
+ continue;
+ }
+ }
+
+ crm_trace("Implying 'probe then' orderings for '%s then %s' "
+ "(id=%d, type=%.6x)",
+ order->lh_action? order->lh_action->uuid : order->lh_action_task,
+ order->rh_action? order->rh_action->uuid : order->rh_action_task,
+ order->id, order->flags);
+
+ for (GList *probe_iter = probes; probe_iter != NULL;
+ probe_iter = probe_iter->next) {
+
+ pe_action_t *probe = (pe_action_t *) probe_iter->data;
+
+ for (GList *then_iter = then_actions; then_iter != NULL;
+ then_iter = then_iter->next) {
+
+ pe_action_t *then = (pe_action_t *) then_iter->data;
+
+ if (probe_needed_before_action(probe, then)) {
+ order_actions(probe, then, order_flags);
+ }
+ }
+ }
+
+ g_list_free(then_actions);
+ g_list_free(probes);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add necessary orderings between probe and starts of clone instances
+ *
+ * , in additon to the ordering with the parent resource added upon creating
+ * the probe.
+ *
+ * \param[in,out] probe Probe as 'first' action in an ordering
+ * \param[in,out] after 'then' action wrapper in the ordering
+ */
+static void
+add_start_orderings_for_probe(pe_action_t *probe, pe_action_wrapper_t *after)
+{
+ uint32_t flags = pe_order_optional|pe_order_runnable_left;
+
+ /* Although the ordering between the probe of the clone instance and the
+ * start of its parent has been added in pcmk__probe_rsc_on_node(), we
+ * avoided enforcing `pe_order_runnable_left` order type for that as long as
+ * any of the clone instances are running to prevent them from being
+ * unexpectedly stopped.
+ *
+ * On the other hand, we still need to prevent any inactive instances from
+ * starting unless the probe is runnable so that we don't risk starting too
+ * many instances before we know the state on all nodes.
+ */
+ if (after->action->rsc->variant <= pe_group
+ || pcmk_is_set(probe->flags, pe_action_runnable)
+ // The order type is already enforced for its parent.
+ || pcmk_is_set(after->type, pe_order_runnable_left)
+ || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
+ || !pcmk__str_eq(after->action->task, RSC_START, pcmk__str_none)) {
+ return;
+ }
+
+ crm_trace("Adding probe start orderings for '%s@%s (%s) "
+ "then instances of %s@%s'",
+ probe->uuid, pe__node_name(probe->node),
+ pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
+ after->action->uuid, pe__node_name(after->action->node));
+
+ for (GList *then_iter = after->action->actions_after; then_iter != NULL;
+ then_iter = then_iter->next) {
+
+ pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
+
+ if (then->action->rsc->running_on
+ || (pe__const_top_resource(then->action->rsc, false)
+ != after->action->rsc)
+ || !pcmk__str_eq(then->action->task, RSC_START, pcmk__str_none)) {
+ continue;
+ }
+
+ crm_trace("Adding probe start ordering for '%s@%s (%s) "
+ "then %s@%s' (type=%#.6x)",
+ probe->uuid, pe__node_name(probe->node),
+ pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
+ then->action->uuid, pe__node_name(then->action->node),
+ flags);
+
+ /* Prevent the instance from starting if the instance can't, but don't
+ * cause any other intances to stop if already active.
+ */
+ order_actions(probe, then->action, flags);
+ }
+
+ return;
+}
+
+/*!
+ * \internal
+ * \brief Order probes before restarts and re-promotes
+ *
+ * If a given ordering is a "probe then start" or "probe then promote" ordering,
+ * add an implicit "probe then stop/demote" ordering in case the action is part
+ * of a restart/re-promote, and do the same recursively for all actions ordered
+ * after the "then" action.
+ *
+ * \param[in,out] probe Probe as 'first' action in an ordering
+ * \param[in,out] after 'then' action in the ordering
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after,
+ pe_working_set_t *data_set)
+{
+ GList *iter = NULL;
+ bool interleave = false;
+ pe_resource_t *compatible_rsc = NULL;
+
+ // Validate that this is a resource probe followed by some action
+ if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
+ || (probe->rsc->variant != pe_native)
+ || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
+ return;
+ }
+
+ // Avoid running into any possible loop
+ if (pcmk_is_set(after->flags, pe_action_tracking)) {
+ return;
+ }
+ pe__set_action_flags(after, pe_action_tracking);
+
+ crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
+ probe->uuid, pe__node_name(probe->node),
+ after->uuid, pe__node_name(after->node));
+
+ /* Add restart orderings if "then" is for a different primitive.
+ * Orderings for collective resources will be added later.
+ */
+ if ((after->rsc != NULL) && (after->rsc->variant == pe_native)
+ && (probe->rsc != after->rsc)) {
+
+ GList *then_actions = NULL;
+
+ if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
+ then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP,
+ FALSE);
+
+ } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
+ then_actions = pe__resource_actions(after->rsc, NULL,
+ RSC_DEMOTE, FALSE);
+ }
+
+ for (iter = then_actions; iter != NULL; iter = iter->next) {
+ pe_action_t *then = (pe_action_t *) iter->data;
+
+ // Skip pseudo-actions (for example, those implied by fencing)
+ if (!pcmk_is_set(then->flags, pe_action_pseudo)) {
+ order_actions(probe, then, pe_order_optional);
+ }
+ }
+ g_list_free(then_actions);
+ }
+
+ /* Detect whether "then" is an interleaved clone action. For these, we want
+ * to add orderings only for the relevant instance.
+ */
+ if ((after->rsc != NULL)
+ && (after->rsc->variant > pe_group)) {
+ const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
+ XML_RSC_ATTR_INTERLEAVE);
+
+ interleave = crm_is_true(interleave_s);
+ if (interleave) {
+ compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
+ after->rsc,
+ RSC_ROLE_UNKNOWN,
+ false);
+ }
+ }
+
+ /* Now recursively do the same for all actions ordered after "then". This
+ * also handles collective resources since the collective action will be
+ * ordered before its individual instances' actions.
+ */
+ for (iter = after->actions_after; iter != NULL; iter = iter->next) {
+ pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
+
+ /* pe_order_implies_then is the reason why a required A.start
+ * implies/enforces B.start to be required too, which is the cause of
+ * B.restart/re-promote.
+ *
+ * Not sure about pe_order_implies_then_on_node though. It's now only
+ * used for unfencing case, which tends to introduce transition
+ * loops...
+ */
+ if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
+ /* The order type between a group/clone and its child such as
+ * B.start-> B_child.start is:
+ * pe_order_implies_first_printed | pe_order_runnable_left
+ *
+ * Proceed through the ordering chain and build dependencies with
+ * its children.
+ */
+ if ((after->rsc == NULL)
+ || (after->rsc->variant < pe_group)
+ || (probe->rsc->parent == after->rsc)
+ || (after_wrapper->action->rsc == NULL)
+ || (after_wrapper->action->rsc->variant > pe_group)
+ || (after->rsc != after_wrapper->action->rsc->parent)) {
+ continue;
+ }
+
+ /* Proceed to the children of a group or a non-interleaved clone.
+ * For an interleaved clone, proceed only to the relevant child.
+ */
+ if ((after->rsc->variant > pe_group) && interleave
+ && ((compatible_rsc == NULL)
+ || (compatible_rsc != after_wrapper->action->rsc))) {
+ continue;
+ }
+ }
+
+ crm_trace("Recursively adding probe restart orderings for "
+ "'%s@%s then %s@%s' (type=%#.6x)",
+ after->uuid, pe__node_name(after->node),
+ after_wrapper->action->uuid,
+ pe__node_name(after_wrapper->action->node),
+ after_wrapper->type);
+
+ add_restart_orderings_for_probe(probe, after_wrapper->action, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Clear the tracking flag on all scheduled actions
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+clear_actions_tracking_flag(pe_working_set_t *data_set)
+{
+ GList *gIter = NULL;
+
+ for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ pe__clear_action_flags(action, pe_action_tracking);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add start and restart orderings for probes scheduled for a resource
+ *
+ * \param[in,out] rsc Resource whose probes should be ordered
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+add_start_restart_orderings_for_rsc(pe_resource_t *rsc,
+ pe_working_set_t *data_set)
+{
+ GList *probes = NULL;
+
+ // For collective resources, order each instance recursively
+ if (rsc->variant != pe_native) {
+ g_list_foreach(rsc->children,
+ (GFunc) add_start_restart_orderings_for_rsc, data_set);
+ return;
+ }
+
+ // Find all probes for given resource
+ probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
+
+ // Add probe restart orderings for each probe found
+ for (GList *iter = probes; iter != NULL; iter = iter->next) {
+ pe_action_t *probe = (pe_action_t *) iter->data;
+
+ for (GList *then_iter = probe->actions_after; then_iter != NULL;
+ then_iter = then_iter->next) {
+
+ pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
+
+ add_start_orderings_for_probe(probe, then);
+ add_restart_orderings_for_probe(probe, then->action, data_set);
+ clear_actions_tracking_flag(data_set);
+ }
+ }
+
+ g_list_free(probes);
+}
+
+/*!
+ * \internal
+ * \brief Add "A then probe B" orderings for "A then B" orderings
+ *
+ * \param[in,out] data_set Cluster working set
+ *
+ * \note This function is currently disabled (see next comment).
+ */
+static void
+order_then_probes(pe_working_set_t *data_set)
+{
+#if 0
+ /* Given an ordering "A then B", we would prefer to wait for A to be started
+ * before probing B.
+ *
+ * For example, if A is a filesystem which B can't even run without, it
+ * would be helpful if the author of B's agent could assume that A is
+ * running before B.monitor will be called.
+ *
+ * However, we can't _only_ probe after A is running, otherwise we wouldn't
+ * detect the state of B if A could not be started. We can't even do an
+ * opportunistic version of this, because B may be moving:
+ *
+ * A.stop -> A.start -> B.probe -> B.stop -> B.start
+ *
+ * and if we add B.stop -> A.stop here, we get a loop:
+ *
+ * A.stop -> A.start -> B.probe -> B.stop -> A.stop
+ *
+ * We could kill the "B.probe -> B.stop" dependency, but that could mean
+ * stopping B "too" soon, because B.start must wait for the probe, and
+ * we don't want to stop B if we can't start it.
+ *
+ * We could add the ordering only if A is an anonymous clone with
+ * clone-max == node-max (since we'll never be moving it). However, we could
+ * still be stopping one instance at the same time as starting another.
+ *
+ * The complexity of checking for allowed conditions combined with the ever
+ * narrowing use case suggests that this code should remain disabled until
+ * someone gets smarter.
+ */
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ pe_action_t *start = NULL;
+ GList *actions = NULL;
+ GList *probes = NULL;
+
+ actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
+
+ if (actions) {
+ start = actions->data;
+ g_list_free(actions);
+ }
+
+ if (start == NULL) {
+ crm_err("No start action for %s", rsc->id);
+ continue;
+ }
+
+ probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
+
+ for (actions = start->actions_before; actions != NULL;
+ actions = actions->next) {
+
+ pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
+
+ pe_action_t *first = before->action;
+ pe_resource_t *first_rsc = first->rsc;
+
+ if (first->required_runnable_before) {
+ for (GList *clone_actions = first->actions_before;
+ clone_actions != NULL;
+ clone_actions = clone_actions->next) {
+
+ before = (pe_action_wrapper_t *) clone_actions->data;
+
+ crm_trace("Testing '%s then %s' for %s",
+ first->uuid, before->action->uuid, start->uuid);
+
+ CRM_ASSERT(before->action->rsc != NULL);
+ first_rsc = before->action->rsc;
+ break;
+ }
+
+ } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) {
+ crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
+ }
+
+ if (first_rsc == NULL) {
+ continue;
+
+ } else if (pe__const_top_resource(first_rsc, false)
+ == pe__const_top_resource(start->rsc, false)) {
+ crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
+ continue;
+
+ } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
+ false))) {
+ crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
+ continue;
+ }
+
+ crm_err("Applying %s before %s %d", first->uuid, start->uuid,
+ pe__const_top_resource(first_rsc, false)->variant);
+
+ for (GList *probe_iter = probes; probe_iter != NULL;
+ probe_iter = probe_iter->next) {
+
+ pe_action_t *probe = (pe_action_t *) probe_iter->data;
+
+ crm_err("Ordering %s before %s", first->uuid, probe->uuid);
+ order_actions(first, probe, pe_order_optional);
+ }
+ }
+ }
+#endif
+}
+
+void
+pcmk__order_probes(pe_working_set_t *data_set)
+{
+ // Add orderings for "probe then X"
+ g_list_foreach(data_set->resources,
+ (GFunc) add_start_restart_orderings_for_rsc, data_set);
+ add_probe_orderings_for_stops(data_set);
+
+ order_then_probes(data_set);
+}
+
+/*!
+ * \internal
+ * \brief Schedule any probes needed
+ *
+ * \param[in,out] data_set Cluster working set
+ *
+ * \note This may also schedule fencing of failed remote nodes.
+ */
+void
+pcmk__schedule_probes(pe_working_set_t *data_set)
+{
+ // Schedule probes on each node in the cluster as needed
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+ const char *probed = NULL;
+
+ if (!node->details->online) { // Don't probe offline nodes
+ if (pcmk__is_failed_remote_node(node)) {
+ pe_fence_node(data_set, node,
+ "the connection is unrecoverable", FALSE);
+ }
+ continue;
+
+ } else if (node->details->unclean) { // ... or nodes that need fencing
+ continue;
+
+ } else if (!node->details->rsc_discovery_enabled) {
+ // The user requested that probes not be done on this node
+ continue;
+ }
+
+ /* This is no longer needed for live clusters, since the probe_complete
+ * node attribute will never be in the CIB. However this is still useful
+ * for processing old saved CIBs (< 1.1.14), including the
+ * reprobe-target_rc regression test.
+ */
+ probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
+ if (probed != NULL && crm_is_true(probed) == FALSE) {
+ pe_action_t *probe_op = NULL;
+
+ probe_op = custom_action(NULL,
+ crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
+ node->details->uname),
+ CRM_OP_REPROBE, node, FALSE, TRUE,
+ data_set);
+ add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT,
+ XML_BOOLEAN_TRUE);
+ continue;
+ }
+
+ // Probe each resource in the cluster on this node, as needed
+ pcmk__probe_resource_list(data_set->resources, node);
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
new file mode 100644
index 0000000..d12d017
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -0,0 +1,1286 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+/*!
+ * \internal
+ * \brief Add implicit promotion ordering for a promotable instance
+ *
+ * \param[in,out] clone Clone resource
+ * \param[in,out] child Instance of \p clone being ordered
+ * \param[in,out] last Previous instance ordered (NULL if \p child is first)
+ */
+static void
+order_instance_promotion(pe_resource_t *clone, pe_resource_t *child,
+ pe_resource_t *last)
+{
+ // "Promote clone" -> promote instance -> "clone promoted"
+ pcmk__order_resource_actions(clone, RSC_PROMOTE, child, RSC_PROMOTE,
+ pe_order_optional);
+ pcmk__order_resource_actions(child, RSC_PROMOTE, clone, RSC_PROMOTED,
+ pe_order_optional);
+
+ // If clone is ordered, order this instance relative to last
+ if ((last != NULL) && pe__clone_is_ordered(clone)) {
+ pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE,
+ pe_order_optional);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add implicit demotion ordering for a promotable instance
+ *
+ * \param[in,out] clone Clone resource
+ * \param[in,out] child Instance of \p clone being ordered
+ * \param[in] last Previous instance ordered (NULL if \p child is first)
+ */
+static void
+order_instance_demotion(pe_resource_t *clone, pe_resource_t *child,
+ pe_resource_t *last)
+{
+ // "Demote clone" -> demote instance -> "clone demoted"
+ pcmk__order_resource_actions(clone, RSC_DEMOTE, child, RSC_DEMOTE,
+ pe_order_implies_first_printed);
+ pcmk__order_resource_actions(child, RSC_DEMOTE, clone, RSC_DEMOTED,
+ pe_order_implies_then_printed);
+
+ // If clone is ordered, order this instance relative to last
+ if ((last != NULL) && pe__clone_is_ordered(clone)) {
+ pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE,
+ pe_order_optional);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether an instance will be promoted or demoted
+ *
+ * \param[in] rsc Instance to check
+ * \param[out] demoting If \p rsc will be demoted, this will be set to true
+ * \param[out] promoting If \p rsc will be promoted, this will be set to true
+ */
+static void
+check_for_role_change(const pe_resource_t *rsc, bool *demoting, bool *promoting)
+{
+ const GList *iter = NULL;
+
+ // If this is a cloned group, check group members recursively
+ if (rsc->children != NULL) {
+ for (iter = rsc->children; iter != NULL; iter = iter->next) {
+ check_for_role_change((const pe_resource_t *) iter->data,
+ demoting, promoting);
+ }
+ return;
+ }
+
+ for (iter = rsc->actions; iter != NULL; iter = iter->next) {
+ const pe_action_t *action = (const pe_action_t *) iter->data;
+
+ if (*promoting && *demoting) {
+ return;
+
+ } else if (pcmk_is_set(action->flags, pe_action_optional)) {
+ continue;
+
+ } else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_none)) {
+ *demoting = true;
+
+ } else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_none)) {
+ *promoting = true;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Add promoted-role location constraint scores to an instance's priority
+ *
+ * Adjust a promotable clone instance's promotion priority by the scores of any
+ * location constraints in a list that are both limited to the promoted role and
+ * for the node where the instance will be placed.
+ *
+ * \param[in,out] child Promotable clone instance
+ * \param[in] location_constraints List of location constraints to apply
+ * \param[in] chosen Node where \p child will be placed
+ */
+static void
+apply_promoted_locations(pe_resource_t *child,
+ const GList *location_constraints,
+ const pe_node_t *chosen)
+{
+ for (const GList *iter = location_constraints; iter; iter = iter->next) {
+ const pe__location_t *location = iter->data;
+ pe_node_t *weighted_node = NULL;
+
+ if (location->role_filter == RSC_ROLE_PROMOTED) {
+ weighted_node = pe_find_node_id(location->node_list_rh,
+ chosen->details->id);
+ }
+ if (weighted_node != NULL) {
+ int new_priority = pcmk__add_scores(child->priority,
+ weighted_node->weight);
+
+ pe_rsc_trace(child,
+ "Applying location %s to %s promotion priority on %s: "
+ "%s + %s = %s",
+ location->id, child->id, pe__node_name(weighted_node),
+ pcmk_readable_score(child->priority),
+ pcmk_readable_score(weighted_node->weight),
+ pcmk_readable_score(new_priority));
+ child->priority = new_priority;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Get the node that an instance will be promoted on
+ *
+ * \param[in] rsc Promotable clone instance to check
+ *
+ * \return Node that \p rsc will be promoted on, or NULL if none
+ */
+static pe_node_t *
+node_to_be_promoted_on(const pe_resource_t *rsc)
+{
+ pe_node_t *node = NULL;
+ pe_node_t *local_node = NULL;
+ const pe_resource_t *parent = NULL;
+
+ // If this is a cloned group, bail if any group member can't be promoted
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
+
+ if (node_to_be_promoted_on(child) == NULL) {
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because member %s can't",
+ rsc->id, child->id);
+ return NULL;
+ }
+ }
+
+ node = rsc->fns->location(rsc, NULL, FALSE);
+ if (node == NULL) {
+ pe_rsc_trace(rsc, "%s can't be promoted because it won't be active",
+ rsc->id);
+ return NULL;
+
+ } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) {
+ crm_notice("Unmanaged instance %s will be left promoted on %s",
+ rsc->id, pe__node_name(node));
+ } else {
+ pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
+ rsc->id);
+ return NULL;
+ }
+
+ } else if (rsc->priority < 0) {
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because its promotion priority %d "
+ "is negative",
+ rsc->id, rsc->priority);
+ return NULL;
+
+ } else if (!pcmk__node_available(node, false, true)) {
+ pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources",
+ rsc->id, pe__node_name(node));
+ return NULL;
+ }
+
+ parent = pe__const_top_resource(rsc, false);
+ local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id);
+
+ if (local_node == NULL) {
+ /* It should not be possible for the scheduler to have allocated the
+ * instance to a node where its parent is not allowed, but it's good to
+ * have a fail-safe.
+ */
+ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ crm_warn("%s can't be promoted because %s is not allowed on %s "
+ "(scheduler bug?)",
+ rsc->id, parent->id, pe__node_name(node));
+ } // else the instance is unmanaged and already promoted
+ return NULL;
+
+ } else if ((local_node->count >= pe__clone_promoted_node_max(parent))
+ && pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because %s has "
+ "maximum promoted instances already",
+ rsc->id, pe__node_name(node));
+ return NULL;
+ }
+
+ return local_node;
+}
+
+/*!
+ * \internal
+ * \brief Compare two promotable clone instances by promotion priority
+ *
+ * \param[in] a First instance to compare
+ * \param[in] b Second instance to compare
+ *
+ * \return A negative number if \p a has higher promotion priority,
+ * a positive number if \p b has higher promotion priority,
+ * or 0 if promotion priorities are equal
+ */
+static gint
+cmp_promotable_instance(gconstpointer a, gconstpointer b)
+{
+ const pe_resource_t *rsc1 = (const pe_resource_t *) a;
+ const pe_resource_t *rsc2 = (const pe_resource_t *) b;
+
+ enum rsc_role_e role1 = RSC_ROLE_UNKNOWN;
+ enum rsc_role_e role2 = RSC_ROLE_UNKNOWN;
+
+ CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
+
+ // Check sort index set by pcmk__set_instance_roles()
+ if (rsc1->sort_index > rsc2->sort_index) {
+ pe_rsc_trace(rsc1,
+ "%s has higher promotion priority than %s "
+ "(sort index %d > %d)",
+ rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
+ return -1;
+ } else if (rsc1->sort_index < rsc2->sort_index) {
+ pe_rsc_trace(rsc1,
+ "%s has lower promotion priority than %s "
+ "(sort index %d < %d)",
+ rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
+ return 1;
+ }
+
+ // If those are the same, prefer instance whose current role is higher
+ role1 = rsc1->fns->state(rsc1, TRUE);
+ role2 = rsc2->fns->state(rsc2, TRUE);
+ if (role1 > role2) {
+ pe_rsc_trace(rsc1,
+ "%s has higher promotion priority than %s "
+ "(higher current role)",
+ rsc1->id, rsc2->id);
+ return -1;
+ } else if (role1 < role2) {
+ pe_rsc_trace(rsc1,
+ "%s has lower promotion priority than %s "
+ "(lower current role)",
+ rsc1->id, rsc2->id);
+ return 1;
+ }
+
+ // Finally, do normal clone instance sorting
+ return pcmk__cmp_instance(a, b);
+}
+
+/*!
+ * \internal
+ * \brief Add a promotable clone instance's sort index to its node's weight
+ *
+ * Add a promotable clone instance's sort index (which sums its promotion
+ * preferences and scores of relevant location constraints for the promoted
+ * role) to the node weight of the instance's allocated node.
+ *
+ * \param[in] data Promotable clone instance
+ * \param[in,out] user_data Clone parent of \p data
+ */
+static void
+add_sort_index_to_node_weight(gpointer data, gpointer user_data)
+{
+ const pe_resource_t *child = (const pe_resource_t *) data;
+ pe_resource_t *clone = (pe_resource_t *) user_data;
+
+ pe_node_t *node = NULL;
+ const pe_node_t *chosen = NULL;
+
+ if (child->sort_index < 0) {
+ pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id);
+ return;
+ }
+
+ chosen = child->fns->location(child, NULL, FALSE);
+ if (chosen == NULL) {
+ pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id);
+ return;
+ }
+
+ node = (pe_node_t *) pe_hash_table_lookup(clone->allowed_nodes,
+ chosen->details->id);
+ CRM_ASSERT(node != NULL);
+
+ node->weight = pcmk__add_scores(child->sort_index, node->weight);
+ pe_rsc_trace(clone,
+ "Added cumulative priority of %s (%s) to score on %s (now %s)",
+ child->id, pcmk_readable_score(child->sort_index),
+ pe__node_name(node), pcmk_readable_score(node->weight));
+}
+
+/*!
+ * \internal
+ * \brief Apply colocation to dependent's node weights if for promoted role
+ *
+ * \param[in,out] data Colocation constraint to apply
+ * \param[in,out] user_data Promotable clone that is constraint's dependent
+ */
+static void
+apply_coloc_to_dependent(gpointer data, gpointer user_data)
+{
+ pcmk__colocation_t *constraint = (pcmk__colocation_t *) data;
+ pe_resource_t *clone = (pe_resource_t *) user_data;
+ pe_resource_t *primary = constraint->primary;
+ uint32_t flags = pcmk__coloc_select_default;
+ float factor = constraint->score / (float) INFINITY;
+
+ if (constraint->dependent_role != RSC_ROLE_PROMOTED) {
+ return;
+ }
+ if (constraint->score < INFINITY) {
+ flags = pcmk__coloc_select_active;
+ }
+ pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s",
+ constraint->id, constraint->dependent->id,
+ constraint->primary->id,
+ pcmk_readable_score(constraint->score));
+ primary->cmds->add_colocated_node_scores(primary, clone->id,
+ &clone->allowed_nodes,
+ constraint->node_attribute, factor,
+ flags);
+}
+
+/*!
+ * \internal
+ * \brief Apply colocation to primary's node weights if for promoted role
+ *
+ * \param[in,out] data Colocation constraint to apply
+ * \param[in,out] user_data Promotable clone that is constraint's primary
+ */
+static void
+apply_coloc_to_primary(gpointer data, gpointer user_data)
+{
+ pcmk__colocation_t *constraint = (pcmk__colocation_t *) data;
+ pe_resource_t *clone = (pe_resource_t *) user_data;
+ pe_resource_t *dependent = constraint->dependent;
+ const float factor = constraint->score / (float) INFINITY;
+ const uint32_t flags = pcmk__coloc_select_active
+ |pcmk__coloc_select_nonnegative;
+
+ if ((constraint->primary_role != RSC_ROLE_PROMOTED)
+ || !pcmk__colocation_has_influence(constraint, NULL)) {
+ return;
+ }
+
+ pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
+ constraint->id, constraint->dependent->id,
+ constraint->primary->id,
+ pcmk_readable_score(constraint->score));
+ dependent->cmds->add_colocated_node_scores(dependent, clone->id,
+ &clone->allowed_nodes,
+ constraint->node_attribute,
+ factor, flags);
+}
+
+/*!
+ * \internal
+ * \brief Set clone instance's sort index to its node's weight
+ *
+ * \param[in,out] data Promotable clone instance
+ * \param[in] user_data Parent clone of \p data
+ */
+static void
+set_sort_index_to_node_weight(gpointer data, gpointer user_data)
+{
+ pe_resource_t *child = (pe_resource_t *) data;
+ const pe_resource_t *clone = (const pe_resource_t *) user_data;
+
+ pe_node_t *chosen = child->fns->location(child, NULL, FALSE);
+
+ if (!pcmk_is_set(child->flags, pe_rsc_managed)
+ && (child->next_role == RSC_ROLE_PROMOTED)) {
+ child->sort_index = INFINITY;
+ pe_rsc_trace(clone,
+ "Final sort index for %s is INFINITY (unmanaged promoted)",
+ child->id);
+
+ } else if ((chosen == NULL) || (child->sort_index < 0)) {
+ pe_rsc_trace(clone,
+ "Final sort index for %s is %d (ignoring node weight)",
+ child->id, child->sort_index);
+
+ } else {
+ const pe_node_t *node = NULL;
+
+ node = pe_hash_table_lookup(clone->allowed_nodes, chosen->details->id);
+ CRM_ASSERT(node != NULL);
+
+ child->sort_index = node->weight;
+ pe_rsc_trace(clone,
+ "Merging weights for %s: final sort index for %s is %d",
+ clone->id, child->id, child->sort_index);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Sort a promotable clone's instances by descending promotion priority
+ *
+ * \param[in,out] clone Promotable clone to sort
+ */
+static void
+sort_promotable_instances(pe_resource_t *clone)
+{
+ if (pe__set_clone_flag(clone, pe__clone_promotion_constrained)
+ == pcmk_rc_already) {
+ return;
+ }
+ pe__set_resource_flags(clone, pe_rsc_merging);
+
+ for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
+
+ pe_rsc_trace(clone,
+ "Merging weights for %s: initial sort index for %s is %d",
+ clone->id, child->id, child->sort_index);
+ }
+ pe__show_node_weights(true, clone, "Before", clone->allowed_nodes,
+ clone->cluster);
+
+ /* Because the this_with_colocations() and with_this_colocations() methods
+ * boil down to copies of rsc_cons and rsc_cons_lhs for clones, we can use
+ * those here directly for efficiency.
+ */
+ g_list_foreach(clone->children, add_sort_index_to_node_weight, clone);
+ g_list_foreach(clone->rsc_cons, apply_coloc_to_dependent, clone);
+ g_list_foreach(clone->rsc_cons_lhs, apply_coloc_to_primary, clone);
+
+ // Ban resource from all nodes if it needs a ticket but doesn't have it
+ pcmk__require_promotion_tickets(clone);
+
+ pe__show_node_weights(true, clone, "After", clone->allowed_nodes,
+ clone->cluster);
+
+ // Reset sort indexes to final node weights
+ g_list_foreach(clone->children, set_sort_index_to_node_weight, clone);
+
+ // Finally, sort instances in descending order of promotion priority
+ clone->children = g_list_sort(clone->children, cmp_promotable_instance);
+ pe__clear_resource_flags(clone, pe_rsc_merging);
+}
+
+/*!
+ * \internal
+ * \brief Find the active instance (if any) of an anonymous clone on a node
+ *
+ * \param[in] clone Anonymous clone to check
+ * \param[in] id Instance ID (without instance number) to check
+ * \param[in] node Node to check
+ *
+ * \return
+ */
+static pe_resource_t *
+find_active_anon_instance(const pe_resource_t *clone, const char *id,
+ const pe_node_t *node)
+{
+ for (GList *iter = clone->children; iter; iter = iter->next) {
+ pe_resource_t *child = iter->data;
+ pe_resource_t *active = NULL;
+
+ // Use ->find_rsc() in case this is a cloned group
+ active = clone->fns->find_rsc(child, id, node,
+ pe_find_clone|pe_find_current);
+ if (active != NULL) {
+ return active;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * \brief Check whether an anonymous clone instance is known on a node
+ *
+ * \param[in] clone Anonymous clone to check
+ * \param[in] id Instance ID (without instance number) to check
+ * \param[in] node Node to check
+ *
+ * \return true if \p id instance of \p clone is known on \p node,
+ * otherwise false
+ */
+static bool
+anonymous_known_on(const pe_resource_t *clone, const char *id,
+ const pe_node_t *node)
+{
+ for (GList *iter = clone->children; iter; iter = iter->next) {
+ pe_resource_t *child = iter->data;
+
+ /* Use ->find_rsc() because this might be a cloned group, and knowing
+ * that other members of the group are known here implies nothing.
+ */
+ child = clone->fns->find_rsc(child, id, NULL, pe_find_clone);
+ CRM_LOG_ASSERT(child != NULL);
+ if (child != NULL) {
+ if (g_hash_table_lookup(child->known_on, node->details->id)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is allowed to run a resource
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Node to check
+ *
+ * \return true if \p node is allowed to run \p rsc, otherwise false
+ */
+static bool
+is_allowed(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
+ node->details->id);
+
+ return (allowed != NULL) && (allowed->weight >= 0);
+}
+
+/*!
+ * \brief Check whether a clone instance's promotion score should be considered
+ *
+ * \param[in] rsc Promotable clone instance to check
+ * \param[in] node Node where score would be applied
+ *
+ * \return true if \p rsc's promotion score should be considered on \p node,
+ * otherwise false
+ */
+static bool
+promotion_score_applies(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ char *id = clone_strip(rsc->id);
+ const pe_resource_t *parent = pe__const_top_resource(rsc, false);
+ pe_resource_t *active = NULL;
+ const char *reason = "allowed";
+
+ // Some checks apply only to anonymous clone instances
+ if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
+
+ // If instance is active on the node, its score definitely applies
+ active = find_active_anon_instance(parent, id, node);
+ if (active == rsc) {
+ reason = "active";
+ goto check_allowed;
+ }
+
+ /* If *no* instance is active on this node, this instance's score will
+ * count if it has been probed on this node.
+ */
+ if ((active == NULL) && anonymous_known_on(parent, id, node)) {
+ reason = "probed";
+ goto check_allowed;
+ }
+ }
+
+ /* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
+ * take all instances' scores into account, to make sure we use any
+ * permanent promotion scores.
+ */
+ if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) {
+ reason = "none probed";
+ goto check_allowed;
+ }
+
+ /* Otherwise, we've probed and/or started the resource *somewhere*, so
+ * consider promotion scores on nodes where we know the status.
+ */
+ if ((pe_hash_table_lookup(rsc->known_on, node->details->id) != NULL)
+ || (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) {
+ reason = "known";
+ } else {
+ pe_rsc_trace(rsc,
+ "Ignoring %s promotion score (for %s) on %s: not probed",
+ rsc->id, id, pe__node_name(node));
+ free(id);
+ return false;
+ }
+
+check_allowed:
+ if (is_allowed(rsc, node)) {
+ pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
+ rsc->id, id, pe__node_name(node), reason);
+ free(id);
+ return true;
+ }
+
+ pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed",
+ rsc->id, id, pe__node_name(node));
+ free(id);
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Get the value of a promotion score node attribute
+ *
+ * \param[in] rsc Promotable clone instance to get promotion score for
+ * \param[in] node Node to get promotion score for
+ * \param[in] name Resource name to use in promotion score attribute name
+ *
+ * \return Value of promotion score node attribute for \p rsc on \p node
+ */
+static const char *
+promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node,
+ const char *name)
+{
+ char *attr_name = NULL;
+ const char *attr_value = NULL;
+
+ CRM_CHECK((rsc != NULL) && (node != NULL) && (name != NULL), return NULL);
+
+ attr_name = pcmk_promotion_score_name(name);
+ attr_value = pe_node_attribute_calculated(node, attr_name, rsc);
+ free(attr_name);
+ return attr_value;
+}
+
+/*!
+ * \internal
+ * \brief Get the promotion score for a clone instance on a node
+ *
+ * \param[in] rsc Promotable clone instance to get score for
+ * \param[in] node Node to get score for
+ * \param[out] is_default If non-NULL, will be set true if no score available
+ *
+ * \return Promotion score for \p rsc on \p node (or 0 if none)
+ */
+static int
+promotion_score(const pe_resource_t *rsc, const pe_node_t *node,
+ bool *is_default)
+{
+ char *name = NULL;
+ const char *attr_value = NULL;
+
+ if (is_default != NULL) {
+ *is_default = true;
+ }
+
+ CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
+
+ /* If this is an instance of a cloned group, the promotion score is the sum
+ * of all members' promotion scores.
+ */
+ if (rsc->children != NULL) {
+ int score = 0;
+
+ for (const GList *iter = rsc->children;
+ iter != NULL; iter = iter->next) {
+
+ const pe_resource_t *child = (const pe_resource_t *) iter->data;
+ bool child_default = false;
+ int child_score = promotion_score(child, node, &child_default);
+
+ if (!child_default && (is_default != NULL)) {
+ *is_default = false;
+ }
+ score += child_score;
+ }
+ return score;
+ }
+
+ if (!promotion_score_applies(rsc, node)) {
+ return 0;
+ }
+
+ /* For the promotion score attribute name, use the name the resource is
+ * known as in resource history, since that's what crm_attribute --promotion
+ * would have used.
+ */
+ name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name;
+
+ attr_value = promotion_attr_value(rsc, node, name);
+ if (attr_value != NULL) {
+ pe_rsc_trace(rsc, "Promotion score for %s on %s = %s",
+ name, pe__node_name(node), pcmk__s(attr_value, "(unset)"));
+ } else if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
+ /* If we don't have any resource history yet, we won't have clone_name.
+ * In that case, for anonymous clones, try the resource name without
+ * any instance number.
+ */
+ name = clone_strip(rsc->id);
+ if (strcmp(rsc->id, name) != 0) {
+ attr_value = promotion_attr_value(rsc, node, name);
+ pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
+ name, pe__node_name(node), rsc->id,
+ pcmk__s(attr_value, "(unset)"));
+ }
+ free(name);
+ }
+
+ if (attr_value == NULL) {
+ return 0;
+ }
+
+ if (is_default != NULL) {
+ *is_default = false;
+ }
+ return char2score(attr_value);
+}
+
+/*!
+ * \internal
+ * \brief Include promotion scores in instances' node weights and priorities
+ *
+ * \param[in,out] rsc Promotable clone resource to update
+ */
+void
+pcmk__add_promotion_scores(pe_resource_t *rsc)
+{
+ if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) {
+ return;
+ }
+
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
+
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+ int score, new_score;
+
+ g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (!pcmk__node_available(node, false, false)) {
+ /* This node will never be promoted, so don't apply the
+ * promotion score, as that may lead to clone shuffling.
+ */
+ continue;
+ }
+
+ score = promotion_score(child_rsc, node, NULL);
+ if (score > 0) {
+ new_score = pcmk__add_scores(node->weight, score);
+ if (new_score != node->weight) { // Could remain INFINITY
+ node->weight = new_score;
+ pe_rsc_trace(rsc,
+ "Added %s promotion priority (%s) to score "
+ "on %s (now %s)",
+ child_rsc->id, pcmk_readable_score(score),
+ pe__node_name(node),
+ pcmk_readable_score(new_score));
+ }
+ }
+
+ if (score > child_rsc->priority) {
+ pe_rsc_trace(rsc,
+ "Updating %s priority to promotion score (%d->%d)",
+ child_rsc->id, child_rsc->priority, score);
+ child_rsc->priority = score;
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief If a resource's current role is started, change it to unpromoted
+ *
+ * \param[in,out] data Resource to update
+ * \param[in] user_data Ignored
+ */
+static void
+set_current_role_unpromoted(void *data, void *user_data)
+{
+ pe_resource_t *rsc = (pe_resource_t *) data;
+
+ if (rsc->role == RSC_ROLE_STARTED) {
+ // Promotable clones should use unpromoted role instead of started
+ rsc->role = RSC_ROLE_UNPROMOTED;
+ }
+ g_list_foreach(rsc->children, set_current_role_unpromoted, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Set a resource's next role to unpromoted (or stopped if unassigned)
+ *
+ * \param[in,out] data Resource to update
+ * \param[in] user_data Ignored
+ */
+static void
+set_next_role_unpromoted(void *data, void *user_data)
+{
+ pe_resource_t *rsc = (pe_resource_t *) data;
+ GList *assigned = NULL;
+
+ rsc->fns->location(rsc, &assigned, FALSE);
+ if (assigned == NULL) {
+ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "stopped instance");
+ } else {
+ pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "unpromoted instance");
+ g_list_free(assigned);
+ }
+ g_list_foreach(rsc->children, set_next_role_unpromoted, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Set a resource's next role to promoted if not already set
+ *
+ * \param[in,out] data Resource to update
+ * \param[in] user_data Ignored
+ */
+static void
+set_next_role_promoted(void *data, gpointer user_data)
+{
+ pe_resource_t *rsc = (pe_resource_t *) data;
+
+ if (rsc->next_role == RSC_ROLE_UNKNOWN) {
+ pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance");
+ }
+ g_list_foreach(rsc->children, set_next_role_promoted, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Show instance's promotion score on node where it will be active
+ *
+ * \param[in,out] instance Promotable clone instance to show
+ */
+static void
+show_promotion_score(pe_resource_t *instance)
+{
+ pe_node_t *chosen = instance->fns->location(instance, NULL, FALSE);
+
+ if (pcmk_is_set(instance->cluster->flags, pe_flag_show_scores)
+ && !pcmk__is_daemon && (instance->cluster->priv != NULL)) {
+
+ pcmk__output_t *out = instance->cluster->priv;
+
+ out->message(out, "promotion-score", instance, chosen,
+ pcmk_readable_score(instance->sort_index));
+ } else {
+ pe_rsc_debug(pe__const_top_resource(instance, false),
+ "%s promotion score on %s: sort=%s priority=%s",
+ instance->id,
+ ((chosen == NULL)? "none" : pe__node_name(chosen)),
+ pcmk_readable_score(instance->sort_index),
+ pcmk_readable_score(instance->priority));
+ }
+}
+
+/*!
+ * \internal
+ * \brief Set a clone instance's promotion priority
+ *
+ * \param[in,out] data Promotable clone instance to update
+ * \param[in] user_data Instance's parent clone
+ */
+static void
+set_instance_priority(gpointer data, gpointer user_data)
+{
+ pe_resource_t *instance = (pe_resource_t *) data;
+ const pe_resource_t *clone = (const pe_resource_t *) user_data;
+ const pe_node_t *chosen = NULL;
+ enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
+ GList *list = NULL;
+
+ pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
+ role2text(instance->next_role));
+
+ if (instance->fns->state(instance, TRUE) == RSC_ROLE_STARTED) {
+ set_current_role_unpromoted(instance, NULL);
+ }
+
+ // Only an instance that will be active can be promoted
+ chosen = instance->fns->location(instance, &list, FALSE);
+ if (pcmk__list_of_multiple(list)) {
+ pcmk__config_err("Cannot promote non-colocated child %s",
+ instance->id);
+ }
+ g_list_free(list);
+ if (chosen == NULL) {
+ return;
+ }
+
+ next_role = instance->fns->state(instance, FALSE);
+ switch (next_role) {
+ case RSC_ROLE_STARTED:
+ case RSC_ROLE_UNKNOWN:
+ // Set instance priority to its promotion score (or -1 if none)
+ {
+ bool is_default = false;
+
+ instance->priority = promotion_score(instance, chosen,
+ &is_default);
+ if (is_default) {
+ /*
+ * Default to -1 if no value is set. This allows
+ * instances eligible for promotion to be specified
+ * based solely on rsc_location constraints, but
+ * prevents any instance from being promoted if neither
+ * a constraint nor a promotion score is present
+ */
+ instance->priority = -1;
+ }
+ }
+ break;
+
+ case RSC_ROLE_UNPROMOTED:
+ case RSC_ROLE_STOPPED:
+ // Instance can't be promoted
+ instance->priority = -INFINITY;
+ break;
+
+ case RSC_ROLE_PROMOTED:
+ // Nothing needed (re-creating actions after scheduling fencing)
+ break;
+
+ default:
+ CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
+ next_role, instance->id));
+ }
+
+ // Add relevant location constraint scores for promoted role
+ apply_promoted_locations(instance, instance->rsc_location, chosen);
+ apply_promoted_locations(instance, clone->rsc_location, chosen);
+
+ // Consider instance's role-based colocations with other resources
+ list = pcmk__this_with_colocations(instance);
+ for (GList *iter = list; iter != NULL; iter = iter->next) {
+ pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
+
+ instance->cmds->apply_coloc_score(instance, cons->primary, cons, true);
+ }
+ g_list_free(list);
+
+ instance->sort_index = instance->priority;
+ if (next_role == RSC_ROLE_PROMOTED) {
+ instance->sort_index = INFINITY;
+ }
+ pe_rsc_trace(clone, "Assigning %s priority = %d",
+ instance->id, instance->priority);
+}
+
+/*!
+ * \internal
+ * \brief Set a promotable clone instance's role
+ *
+ * \param[in,out] data Promotable clone instance to update
+ * \param[in,out] user_data Pointer to count of instances chosen for promotion
+ */
+static void
+set_instance_role(gpointer data, gpointer user_data)
+{
+ pe_resource_t *instance = (pe_resource_t *) data;
+ int *count = (int *) user_data;
+
+ const pe_resource_t *clone = pe__const_top_resource(instance, false);
+ pe_node_t *chosen = NULL;
+
+ show_promotion_score(instance);
+
+ if (instance->sort_index < 0) {
+ pe_rsc_trace(clone, "Not supposed to promote instance %s",
+ instance->id);
+
+ } else if ((*count < pe__clone_promoted_max(instance))
+ || !pcmk_is_set(clone->flags, pe_rsc_managed)) {
+ chosen = node_to_be_promoted_on(instance);
+ }
+
+ if (chosen == NULL) {
+ set_next_role_unpromoted(instance, NULL);
+ return;
+ }
+
+ if ((instance->role < RSC_ROLE_PROMOTED)
+ && !pcmk_is_set(instance->cluster->flags, pe_flag_have_quorum)
+ && (instance->cluster->no_quorum_policy == no_quorum_freeze)) {
+ crm_notice("Clone instance %s cannot be promoted without quorum",
+ instance->id);
+ set_next_role_unpromoted(instance, NULL);
+ return;
+ }
+
+ chosen->count++;
+ pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion",
+ instance->id, role2text(instance->role),
+ pe__node_name(chosen));
+ set_next_role_promoted(instance, NULL);
+ (*count)++;
+}
+
+/*!
+ * \internal
+ * \brief Set roles for all instances of a promotable clone
+ *
+ * \param[in,out] rsc Promotable clone resource to update
+ */
+void
+pcmk__set_instance_roles(pe_resource_t *rsc)
+{
+ int promoted = 0;
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ // Repurpose count to track the number of promoted instances allocated
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ node->count = 0;
+ }
+
+ // Set instances' promotion priorities and sort by highest priority first
+ g_list_foreach(rsc->children, set_instance_priority, rsc);
+ sort_promotable_instances(rsc);
+
+ // Choose the first N eligible instances to be promoted
+ g_list_foreach(rsc->children, set_instance_role, &promoted);
+ pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
+ rsc->id, promoted, pe__clone_promoted_max(rsc));
+}
+
+/*!
+ *
+ * \internal
+ * \brief Create actions for promotable clone instances
+ *
+ * \param[in,out] clone Promotable clone to create actions for
+ * \param[out] any_promoting Will be set true if any instance is promoting
+ * \param[out] any_demoting Will be set true if any instance is demoting
+ */
+static void
+create_promotable_instance_actions(pe_resource_t *clone,
+ bool *any_promoting, bool *any_demoting)
+{
+ for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+
+ instance->cmds->create_actions(instance);
+ check_for_role_change(instance, any_demoting, any_promoting);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Reset each promotable instance's resource priority
+ *
+ * Reset the priority of each instance of a promotable clone to the clone's
+ * priority (after promotion actions are scheduled, when instance priorities
+ * were repurposed as promotion scores).
+ *
+ * \param[in,out] clone Promotable clone to reset
+ */
+static void
+reset_instance_priorities(pe_resource_t *clone)
+{
+ for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+
+ instance->priority = clone->priority;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create actions specific to promotable clones
+ *
+ * \param[in,out] clone Promotable clone to create actions for
+ */
+void
+pcmk__create_promotable_actions(pe_resource_t *clone)
+{
+ bool any_promoting = false;
+ bool any_demoting = false;
+
+ // Create actions for each clone instance individually
+ create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
+
+ // Create pseudo-actions for clone as a whole
+ pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
+
+ // Undo our temporary repurposing of resource priority for instances
+ reset_instance_priorities(clone);
+}
+
+/*!
+ * \internal
+ * \brief Create internal orderings for a promotable clone's instances
+ *
+ * \param[in,out] clone Promotable clone instance to order
+ */
+void
+pcmk__order_promotable_instances(pe_resource_t *clone)
+{
+ pe_resource_t *previous = NULL; // Needed for ordered clones
+
+ pcmk__promotable_restart_ordering(clone);
+
+ for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+
+ // Demote before promote
+ pcmk__order_resource_actions(instance, RSC_DEMOTE,
+ instance, RSC_PROMOTE,
+ pe_order_optional);
+
+ order_instance_promotion(clone, instance, previous);
+ order_instance_demotion(clone, instance, previous);
+ previous = instance;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update dependent's allowed nodes for colocation with promotable
+ *
+ * \param[in,out] dependent Dependent resource to update
+ * \param[in] primary_node Node where an instance of the primary will be
+ * \param[in] colocation Colocation constraint to apply
+ */
+static void
+update_dependent_allowed_nodes(pe_resource_t *dependent,
+ const pe_node_t *primary_node,
+ const pcmk__colocation_t *colocation)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+ const char *primary_value = NULL;
+ const char *attr = NULL;
+
+ if (colocation->score >= INFINITY) {
+ return; // Colocation is mandatory, so allowed node scores don't matter
+ }
+
+ // Get value of primary's colocation node attribute
+ attr = colocation->node_attribute;
+ if (attr == NULL) {
+ attr = CRM_ATTR_UNAME;
+ }
+ primary_value = pe_node_attribute_raw(primary_node, attr);
+
+ pe_rsc_trace(colocation->primary,
+ "Applying %s (%s with %s on %s by %s @%d) to %s",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id, pe__node_name(primary_node), attr,
+ colocation->score, dependent->id);
+
+ g_hash_table_iter_init(&iter, dependent->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ const char *dependent_value = pe_node_attribute_raw(node, attr);
+
+ if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
+ node->weight = pcmk__add_scores(node->weight, colocation->score);
+ pe_rsc_trace(colocation->primary,
+ "Added %s score (%s) to %s (now %s)",
+ colocation->id, pcmk_readable_score(colocation->score),
+ pe__node_name(node),
+ pcmk_readable_score(node->weight));
+ }
+ }
+}
+
+/*!
+ * \brief Update dependent for a colocation with a promotable clone
+ *
+ * \param[in] primary Primary resource in the colocation
+ * \param[in,out] dependent Dependent resource in the colocation
+ * \param[in] colocation Colocation constraint to apply
+ */
+void
+pcmk__update_dependent_with_promotable(const pe_resource_t *primary,
+ pe_resource_t *dependent,
+ const pcmk__colocation_t *colocation)
+{
+ GList *affected_nodes = NULL;
+
+ /* Build a list of all nodes where an instance of the primary will be, and
+ * (for optional colocations) update the dependent's allowed node scores for
+ * each one.
+ */
+ for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *instance = (pe_resource_t *) iter->data;
+ pe_node_t *node = instance->fns->location(instance, NULL, FALSE);
+
+ if (node == NULL) {
+ continue;
+ }
+ if (instance->fns->state(instance, FALSE) == colocation->primary_role) {
+ update_dependent_allowed_nodes(dependent, node, colocation);
+ affected_nodes = g_list_prepend(affected_nodes, node);
+ }
+ }
+
+ /* For mandatory colocations, add the primary's node weight to the
+ * dependent's node weight for each affected node, and ban the dependent
+ * from all other nodes.
+ *
+ * However, skip this for promoted-with-promoted colocations, otherwise
+ * inactive dependent instances can't start (in the unpromoted role).
+ */
+ if ((colocation->score >= INFINITY)
+ && ((colocation->dependent_role != RSC_ROLE_PROMOTED)
+ || (colocation->primary_role != RSC_ROLE_PROMOTED))) {
+
+ pe_rsc_trace(colocation->primary,
+ "Applying %s (mandatory %s with %s) to %s",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id, dependent->id);
+ node_list_exclude(dependent->allowed_nodes, affected_nodes,
+ TRUE);
+ }
+ g_list_free(affected_nodes);
+}
+
+/*!
+ * \internal
+ * \brief Update dependent priority for colocation with promotable
+ *
+ * \param[in] primary Primary resource in the colocation
+ * \param[in,out] dependent Dependent resource in the colocation
+ * \param[in] colocation Colocation constraint to apply
+ */
+void
+pcmk__update_promotable_dependent_priority(const pe_resource_t *primary,
+ pe_resource_t *dependent,
+ const pcmk__colocation_t *colocation)
+{
+ pe_resource_t *primary_instance = NULL;
+
+ // Look for a primary instance where dependent will be
+ primary_instance = pcmk__find_compatible_instance(dependent, primary,
+ colocation->primary_role,
+ false);
+
+ if (primary_instance != NULL) {
+ // Add primary instance's priority to dependent's
+ int new_priority = pcmk__add_scores(dependent->priority,
+ colocation->score);
+
+ pe_rsc_trace(colocation->primary,
+ "Applying %s (%s with %s) to %s priority (%s + %s = %s)",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id, dependent->id,
+ pcmk_readable_score(dependent->priority),
+ pcmk_readable_score(colocation->score),
+ pcmk_readable_score(new_priority));
+ dependent->priority = new_priority;
+
+ } else if (colocation->score >= INFINITY) {
+ // Mandatory colocation, but primary won't be here
+ pe_rsc_trace(colocation->primary,
+ "Applying %s (%s with %s) to %s: can't be promoted",
+ colocation->id, colocation->dependent->id,
+ colocation->primary->id, dependent->id);
+ dependent->priority = -INFINITY;
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c
new file mode 100644
index 0000000..c1b929b
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_recurring.c
@@ -0,0 +1,716 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+// Information parsed from an operation history entry in the CIB
+struct op_history {
+ // XML attributes
+ const char *id; // ID of history entry
+ const char *name; // Action name
+
+ // Parsed information
+ char *key; // Operation key for action
+ enum rsc_role_e role; // Action role (or RSC_ROLE_UNKNOWN for default)
+ guint interval_ms; // Action interval
+};
+
+/*!
+ * \internal
+ * \brief Parse an interval from XML
+ *
+ * \param[in] xml XML containing an interval attribute
+ *
+ * \return Interval parsed from XML (or 0 as default)
+ */
+static guint
+xe_interval(const xmlNode *xml)
+{
+ return crm_parse_interval_spec(crm_element_value(xml,
+ XML_LRM_ATTR_INTERVAL));
+}
+
+/*!
+ * \internal
+ * \brief Check whether an operation exists multiple times in resource history
+ *
+ * \param[in] rsc Resource with history to search
+ * \param[in] name Name of action to search for
+ * \param[in] interval_ms Interval (in milliseconds) of action to search for
+ *
+ * \return true if an operation with \p name and \p interval_ms exists more than
+ * once in the operation history of \p rsc, otherwise false
+ */
+static bool
+is_op_dup(const pe_resource_t *rsc, const char *name, guint interval_ms)
+{
+ const char *id = NULL;
+
+ for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
+ op != NULL; op = crm_next_same_xml(op)) {
+
+ // Check whether action name and interval match
+ if (!pcmk__str_eq(crm_element_value(op, "name"),
+ name, pcmk__str_none)
+ || (xe_interval(op) != interval_ms)) {
+ continue;
+ }
+
+ if (ID(op) == NULL) {
+ continue; // Shouldn't be possible
+ }
+
+ if (id == NULL) {
+ id = ID(op); // First matching op
+ } else {
+ pcmk__config_err("Operation %s is duplicate of %s (do not use "
+ "same name and interval combination more "
+ "than once per resource)", ID(op), id);
+ return true;
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action name is one that can be recurring
+ *
+ * \param[in] name Action name to check
+ *
+ * \return true if \p name is an action known to be unsuitable as a recurring
+ * operation, otherwise false
+ *
+ * \note Pacemaker's current philosophy is to allow users to configure recurring
+ * operations except for a short list of actions known not to be suitable
+ * for that (as opposed to allowing only actions known to be suitable,
+ * which includes only monitor). Among other things, this approach allows
+ * users to define their own custom operations and make them recurring,
+ * though that use case is not well tested.
+ */
+static bool
+op_cannot_recur(const char *name)
+{
+ return pcmk__str_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE,
+ CRMD_ACTION_RELOAD_AGENT, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource history entry is for a recurring action
+ *
+ * \param[in] rsc Resource that history entry is for
+ * \param[in] xml XML of resource history entry to check
+ * \param[out] op Where to store parsed info if recurring
+ *
+ * \return true if \p xml is for a recurring action, otherwise false
+ */
+static bool
+is_recurring_history(const pe_resource_t *rsc, const xmlNode *xml,
+ struct op_history *op)
+{
+ const char *role = NULL;
+
+ op->interval_ms = xe_interval(xml);
+ if (op->interval_ms == 0) {
+ return false; // Not recurring
+ }
+
+ op->id = ID(xml);
+ if (pcmk__str_empty(op->id)) {
+ pcmk__config_err("Ignoring resource history entry without ID");
+ return false; // Shouldn't be possible (unless CIB was manually edited)
+ }
+
+ op->name = crm_element_value(xml, "name");
+ if (op_cannot_recur(op->name)) {
+ pcmk__config_err("Ignoring %s because %s action cannot be recurring",
+ op->id, pcmk__s(op->name, "unnamed"));
+ return false;
+ }
+
+ // There should only be one recurring operation per action/interval
+ if (is_op_dup(rsc, op->name, op->interval_ms)) {
+ return false;
+ }
+
+ // Ensure role is valid if specified
+ role = crm_element_value(xml, "role");
+ if (role == NULL) {
+ op->role = RSC_ROLE_UNKNOWN;
+ } else {
+ op->role = text2role(role);
+ if (op->role == RSC_ROLE_UNKNOWN) {
+ pcmk__config_err("Ignoring %s because %s is not a valid role",
+ op->id, role);
+ }
+ }
+
+ // Disabled resources don't get monitored
+ op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms);
+ if (find_rsc_op_entry(rsc, op->key) == NULL) {
+ crm_trace("Not creating recurring action %s for disabled resource %s",
+ op->id, rsc->id);
+ free(op->key);
+ return false;
+ }
+
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a recurring action for an active role should be optional
+ *
+ * \param[in] rsc Resource that recurring action is for
+ * \param[in] node Node that \p rsc will be active on (if any)
+ * \param[in] key Operation key for recurring action to check
+ * \param[in,out] start Start action for \p rsc
+ *
+ * \return true if recurring action should be optional, otherwise false
+ */
+static bool
+active_recurring_should_be_optional(const pe_resource_t *rsc,
+ const pe_node_t *node, const char *key,
+ pe_action_t *start)
+{
+ GList *possible_matches = NULL;
+
+ if (node == NULL) { // Should only be possible if unmanaged and stopped
+ pe_rsc_trace(rsc, "%s will be mandatory because resource is unmanaged",
+ key);
+ return false;
+ }
+
+ if (!pcmk_is_set(rsc->cmds->action_flags(start, NULL),
+ pe_action_optional)) {
+ pe_rsc_trace(rsc, "%s will be mandatory because %s is",
+ key, start->uuid);
+ return false;
+ }
+
+ possible_matches = find_actions_exact(rsc->actions, key, node);
+ if (possible_matches == NULL) {
+ pe_rsc_trace(rsc, "%s will be mandatory because it is not active on %s",
+ key, pe__node_name(node));
+ return false;
+ }
+
+ for (const GList *iter = possible_matches;
+ iter != NULL; iter = iter->next) {
+
+ const pe_action_t *op = (const pe_action_t *) iter->data;
+
+ if (pcmk_is_set(op->flags, pe_action_reschedule)) {
+ pe_rsc_trace(rsc,
+ "%s will be mandatory because "
+ "it needs to be rescheduled", key);
+ g_list_free(possible_matches);
+ return false;
+ }
+ }
+
+ g_list_free(possible_matches);
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Create recurring action from resource history entry for an active role
+ *
+ * \param[in,out] rsc Resource that resource history is for
+ * \param[in,out] start Start action for \p rsc on \p node
+ * \param[in] node Node that resource will be active on (if any)
+ * \param[in] op Resource history entry
+ */
+static void
+recurring_op_for_active(pe_resource_t *rsc, pe_action_t *start,
+ const pe_node_t *node, const struct op_history *op)
+{
+ pe_action_t *mon = NULL;
+ bool is_optional = true;
+
+ // We're only interested in recurring actions for active roles
+ if (op->role == RSC_ROLE_STOPPED) {
+ return;
+ }
+
+ is_optional = active_recurring_should_be_optional(rsc, node, op->key,
+ start);
+
+ if (((op->role != RSC_ROLE_UNKNOWN) && (rsc->next_role != op->role))
+ || ((op->role == RSC_ROLE_UNKNOWN)
+ && (rsc->next_role == RSC_ROLE_PROMOTED))) {
+ // Configured monitor role doesn't match role resource will have
+
+ if (is_optional) { // It's running, so cancel it
+ char *after_key = NULL;
+ pe_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name,
+ op->interval_ms,
+ node);
+
+ switch (rsc->role) {
+ case RSC_ROLE_UNPROMOTED:
+ case RSC_ROLE_STARTED:
+ if (rsc->next_role == RSC_ROLE_PROMOTED) {
+ after_key = promote_key(rsc);
+
+ } else if (rsc->next_role == RSC_ROLE_STOPPED) {
+ after_key = stop_key(rsc);
+ }
+
+ break;
+ case RSC_ROLE_PROMOTED:
+ after_key = demote_key(rsc);
+ break;
+ default:
+ break;
+ }
+
+ if (after_key) {
+ pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL,
+ pe_order_runnable_left, rsc->cluster);
+ }
+ }
+
+ do_crm_log((is_optional? LOG_INFO : LOG_TRACE),
+ "%s recurring action %s because %s configured for %s role "
+ "(not %s)",
+ (is_optional? "Cancelling" : "Ignoring"), op->key, op->id,
+ role2text((op->role == RSC_ROLE_UNKNOWN)? RSC_ROLE_UNPROMOTED : op->role),
+ role2text(rsc->next_role));
+ return;
+ }
+
+ pe_rsc_trace(rsc,
+ "Creating %s recurring action %s for %s (%s %s on %s)",
+ (is_optional? "optional" : "mandatory"), op->key,
+ op->id, rsc->id, role2text(rsc->next_role),
+ pe__node_name(node));
+
+ mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional, TRUE,
+ rsc->cluster);
+
+ if (!pcmk_is_set(start->flags, pe_action_runnable)) {
+ pe_rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid);
+ pe__clear_action_flags(mon, pe_action_runnable);
+
+ } else if ((node == NULL) || !node->details->online
+ || node->details->unclean) {
+ pe_rsc_trace(rsc, "%s is unrunnable because no node is available",
+ mon->uuid);
+ pe__clear_action_flags(mon, pe_action_runnable);
+
+ } else if (!pcmk_is_set(mon->flags, pe_action_optional)) {
+ pe_rsc_info(rsc, "Start %s-interval %s for %s on %s",
+ pcmk__readable_interval(op->interval_ms), mon->task,
+ rsc->id, pe__node_name(node));
+ }
+
+ if (rsc->next_role == RSC_ROLE_PROMOTED) {
+ pe__add_action_expected_result(mon, CRM_EX_PROMOTED);
+ }
+
+ // Order monitor relative to other actions
+ if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pcmk__new_ordering(rsc, start_key(rsc), NULL,
+ NULL, strdup(mon->uuid), mon,
+ pe_order_implies_then|pe_order_runnable_left,
+ rsc->cluster);
+
+ pcmk__new_ordering(rsc, reload_key(rsc), NULL,
+ NULL, strdup(mon->uuid), mon,
+ pe_order_implies_then|pe_order_runnable_left,
+ rsc->cluster);
+
+ if (rsc->next_role == RSC_ROLE_PROMOTED) {
+ pcmk__new_ordering(rsc, promote_key(rsc), NULL,
+ rsc, NULL, mon,
+ pe_order_optional|pe_order_runnable_left,
+ rsc->cluster);
+
+ } else if (rsc->role == RSC_ROLE_PROMOTED) {
+ pcmk__new_ordering(rsc, demote_key(rsc), NULL,
+ rsc, NULL, mon,
+ pe_order_optional|pe_order_runnable_left,
+ rsc->cluster);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Cancel a recurring action if running on a node
+ *
+ * \param[in,out] rsc Resource that action is for
+ * \param[in] node Node to cancel action on
+ * \param[in] key Operation key for action
+ * \param[in] name Action name
+ * \param[in] interval_ms Action interval (in milliseconds)
+ */
+static void
+cancel_if_running(pe_resource_t *rsc, const pe_node_t *node, const char *key,
+ const char *name, guint interval_ms)
+{
+ GList *possible_matches = find_actions_exact(rsc->actions, key, node);
+ pe_action_t *cancel_op = NULL;
+
+ if (possible_matches == NULL) {
+ return; // Recurring action isn't running on this node
+ }
+ g_list_free(possible_matches);
+
+ cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node);
+
+ switch (rsc->next_role) {
+ case RSC_ROLE_STARTED:
+ case RSC_ROLE_UNPROMOTED:
+ /* Order starts after cancel. If the current role is
+ * stopped, this cancels the monitor before the resource
+ * starts; if the current role is started, then this cancels
+ * the monitor on a migration target before starting there.
+ */
+ pcmk__new_ordering(rsc, NULL, cancel_op,
+ rsc, start_key(rsc), NULL,
+ pe_order_runnable_left, rsc->cluster);
+ break;
+ default:
+ break;
+ }
+ pe_rsc_info(rsc,
+ "Cancelling %s-interval %s action for %s on %s because "
+ "configured for " RSC_ROLE_STOPPED_S " role (not %s)",
+ pcmk__readable_interval(interval_ms), name, rsc->id,
+ pe__node_name(node), role2text(rsc->next_role));
+}
+
+/*!
+ * \internal
+ * \brief Order an action after all probes of a resource on a node
+ *
+ * \param[in,out] rsc Resource to check for probes
+ * \param[in] node Node to check for probes of \p rsc
+ * \param[in,out] action Action to order after probes of \p rsc on \p node
+ */
+static void
+order_after_probes(pe_resource_t *rsc, const pe_node_t *node,
+ pe_action_t *action)
+{
+ GList *probes = pe__resource_actions(rsc, node, RSC_STATUS, FALSE);
+
+ for (GList *iter = probes; iter != NULL; iter = iter->next) {
+ order_actions((pe_action_t *) iter->data, action,
+ pe_order_runnable_left);
+ }
+ g_list_free(probes);
+}
+
+/*!
+ * \internal
+ * \brief Order an action after all stops of a resource on a node
+ *
+ * \param[in,out] rsc Resource to check for stops
+ * \param[in] node Node to check for stops of \p rsc
+ * \param[in,out] action Action to order after stops of \p rsc on \p node
+ */
+static void
+order_after_stops(pe_resource_t *rsc, const pe_node_t *node,
+ pe_action_t *action)
+{
+ GList *stop_ops = pe__resource_actions(rsc, node, RSC_STOP, TRUE);
+
+ for (GList *iter = stop_ops; iter != NULL; iter = iter->next) {
+ pe_action_t *stop = (pe_action_t *) iter->data;
+
+ if (!pcmk_is_set(stop->flags, pe_action_optional)
+ && !pcmk_is_set(action->flags, pe_action_optional)
+ && !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc, "%s optional on %s: unmanaged",
+ action->uuid, pe__node_name(node));
+ pe__set_action_flags(action, pe_action_optional);
+ }
+
+ if (!pcmk_is_set(stop->flags, pe_action_runnable)) {
+ crm_debug("%s unrunnable on %s: stop is unrunnable",
+ action->uuid, pe__node_name(node));
+ pe__clear_action_flags(action, pe_action_runnable);
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pcmk__new_ordering(rsc, stop_key(rsc), stop,
+ NULL, NULL, action,
+ pe_order_implies_then|pe_order_runnable_left,
+ rsc->cluster);
+ }
+ }
+ g_list_free(stop_ops);
+}
+
+/*!
+ * \internal
+ * \brief Create recurring action from resource history entry for inactive role
+ *
+ * \param[in,out] rsc Resource that resource history is for
+ * \param[in] node Node that resource will be active on (if any)
+ * \param[in] op Resource history entry
+ */
+static void
+recurring_op_for_inactive(pe_resource_t *rsc, const pe_node_t *node,
+ const struct op_history *op)
+{
+ GList *possible_matches = NULL;
+
+ // We're only interested in recurring actions for the inactive role
+ if (op->role != RSC_ROLE_STOPPED) {
+ return;
+ }
+
+ if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
+ crm_notice("Ignoring %s (recurring monitors for " RSC_ROLE_STOPPED_S
+ " role are not supported for anonymous clones)", op->id);
+ return; // @TODO add support
+ }
+
+ pe_rsc_trace(rsc, "Creating recurring action %s for %s on nodes "
+ "where it should not be running", op->id, rsc->id);
+
+ for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *stop_node = (pe_node_t *) iter->data;
+
+ bool is_optional = true;
+ pe_action_t *stopped_mon = NULL;
+
+ // Cancel action on node where resource will be active
+ if ((node != NULL)
+ && pcmk__str_eq(stop_node->details->uname, node->details->uname,
+ pcmk__str_casei)) {
+ cancel_if_running(rsc, node, op->key, op->name, op->interval_ms);
+ continue;
+ }
+
+ // Recurring action on this node is optional if it's already active here
+ possible_matches = find_actions_exact(rsc->actions, op->key, stop_node);
+ is_optional = (possible_matches != NULL);
+ g_list_free(possible_matches);
+
+ pe_rsc_trace(rsc,
+ "Creating %s recurring action %s for %s (%s "
+ RSC_ROLE_STOPPED_S " on %s)",
+ (is_optional? "optional" : "mandatory"),
+ op->key, op->id, rsc->id, pe__node_name(stop_node));
+
+ stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node,
+ is_optional, TRUE, rsc->cluster);
+
+ pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING);
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ order_after_probes(rsc, stop_node, stopped_mon);
+ }
+
+ /* The recurring action is for the inactive role, so it shouldn't be
+ * performed until the resource is inactive.
+ */
+ order_after_stops(rsc, stop_node, stopped_mon);
+
+ if (!stop_node->details->online || stop_node->details->unclean) {
+ pe_rsc_debug(rsc, "%s unrunnable on %s: node unavailable)",
+ stopped_mon->uuid, pe__node_name(stop_node));
+ pe__clear_action_flags(stopped_mon, pe_action_runnable);
+ }
+
+ if (pcmk_is_set(stopped_mon->flags, pe_action_runnable)
+ && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) {
+ crm_notice("Start recurring %s-interval %s for "
+ RSC_ROLE_STOPPED_S " %s on %s",
+ pcmk__readable_interval(op->interval_ms),
+ stopped_mon->task, rsc->id, pe__node_name(stop_node));
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create recurring actions for a resource
+ *
+ * \param[in,out] rsc Resource to create recurring actions for
+ */
+void
+pcmk__create_recurring_actions(pe_resource_t *rsc)
+{
+ pe_action_t *start = NULL;
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
+ pe_rsc_trace(rsc, "Skipping recurring actions for blocked resource %s",
+ rsc->id);
+ return;
+ }
+
+ if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) {
+ pe_rsc_trace(rsc, "Skipping recurring actions for %s "
+ "in maintenance mode", rsc->id);
+ return;
+ }
+
+ if (rsc->allocated_to == NULL) {
+ // Recurring actions for active roles not needed
+
+ } else if (rsc->allocated_to->details->maintenance) {
+ pe_rsc_trace(rsc,
+ "Skipping recurring actions for %s on %s "
+ "in maintenance mode",
+ rsc->id, pe__node_name(rsc->allocated_to));
+
+ } else if ((rsc->next_role != RSC_ROLE_STOPPED)
+ || !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ // Recurring actions for active roles needed
+ start = start_action(rsc, rsc->allocated_to, TRUE);
+ }
+
+ pe_rsc_trace(rsc, "Creating any recurring actions needed for %s", rsc->id);
+
+ for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
+ op != NULL; op = crm_next_same_xml(op)) {
+
+ struct op_history op_history = { NULL, };
+
+ if (!is_recurring_history(rsc, op, &op_history)) {
+ continue;
+ }
+
+ if (start != NULL) {
+ recurring_op_for_active(rsc, start, rsc->allocated_to, &op_history);
+ }
+ recurring_op_for_inactive(rsc, rsc->allocated_to, &op_history);
+
+ free(op_history.key);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Create an executor cancel action
+ *
+ * \param[in,out] rsc Resource of action to cancel
+ * \param[in] task Name of action to cancel
+ * \param[in] interval_ms Interval of action to cancel
+ * \param[in] node Node of action to cancel
+ *
+ * \return Created op
+ */
+pe_action_t *
+pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms,
+ const pe_node_t *node)
+{
+ pe_action_t *cancel_op = NULL;
+ char *key = NULL;
+ char *interval_ms_s = NULL;
+
+ CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL));
+
+ // @TODO dangerous if possible to schedule another action with this key
+ key = pcmk__op_key(rsc->id, task, interval_ms);
+
+ cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE,
+ rsc->cluster);
+
+ pcmk__str_update(&cancel_op->task, RSC_CANCEL);
+ pcmk__str_update(&cancel_op->cancel_task, task);
+
+ interval_ms_s = crm_strdup_printf("%u", interval_ms);
+ add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
+ add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
+ free(interval_ms_s);
+
+ return cancel_op;
+}
+
+/*!
+ * \internal
+ * \brief Schedule cancellation of a recurring action
+ *
+ * \param[in,out] rsc Resource that action is for
+ * \param[in] call_id Action's call ID from history
+ * \param[in] task Action name
+ * \param[in] interval_ms Action interval
+ * \param[in] node Node that history entry is for
+ * \param[in] reason Short description of why action is being cancelled
+ */
+void
+pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task,
+ guint interval_ms, const pe_node_t *node,
+ const char *reason)
+{
+ pe_action_t *cancel = NULL;
+
+ CRM_CHECK((rsc != NULL) && (task != NULL)
+ && (node != NULL) && (reason != NULL),
+ return);
+
+ crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s",
+ pcmk__readable_interval(interval_ms), task, rsc->id,
+ pe__node_name(node), reason);
+ cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node);
+ add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
+
+ // Cancellations happen after stops
+ pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel,
+ pe_order_optional, rsc->cluster);
+}
+
+/*!
+ * \internal
+ * \brief Reschedule a recurring action
+ *
+ * \param[in,out] rsc Resource that action is for
+ * \param[in] task Name of action being rescheduled
+ * \param[in] interval_ms Action interval (in milliseconds)
+ * \param[in,out] node Node where action should be rescheduled
+ */
+void
+pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task,
+ guint interval_ms, pe_node_t *node)
+{
+ pe_action_t *op = NULL;
+
+ trigger_unfencing(rsc, node, "Device parameters changed (reschedule)",
+ NULL, rsc->cluster);
+ op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms),
+ task, node, TRUE, TRUE, rsc->cluster);
+ pe__set_action_flags(op, pe_action_reschedule);
+}
+
+/*!
+ * \internal
+ * \brief Check whether an action is recurring
+ *
+ * \param[in] action Action to check
+ *
+ * \return true if \p action has a nonzero interval, otherwise false
+ */
+bool
+pcmk__action_is_recurring(const pe_action_t *action)
+{
+ guint interval_ms = 0;
+
+ if (pcmk__guint_from_hash(action->meta,
+ XML_LRM_ATTR_INTERVAL_MS, 0,
+ &interval_ms) != pcmk_rc_ok) {
+ return false;
+ }
+ return (interval_ms > 0);
+}
diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c
new file mode 100644
index 0000000..6adb5d4
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_remote.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <glib.h>
+
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+enum remote_connection_state {
+ remote_state_unknown = 0,
+ remote_state_alive = 1,
+ remote_state_resting = 2,
+ remote_state_failed = 3,
+ remote_state_stopped = 4
+};
+
+static const char *
+state2text(enum remote_connection_state state)
+{
+ switch (state) {
+ case remote_state_unknown:
+ return "unknown";
+ case remote_state_alive:
+ return "alive";
+ case remote_state_resting:
+ return "resting";
+ case remote_state_failed:
+ return "failed";
+ case remote_state_stopped:
+ return "stopped";
+ }
+
+ return "impossible";
+}
+
+/* We always use pe_order_preserve with these convenience functions to exempt
+ * internally generated constraints from the prohibition of user constraints
+ * involving remote connection resources.
+ *
+ * The start ordering additionally uses pe_order_runnable_left so that the
+ * specified action is not runnable if the start is not runnable.
+ */
+
+static inline void
+order_start_then_action(pe_resource_t *first_rsc, pe_action_t *then_action,
+ uint32_t extra, pe_working_set_t *data_set)
+{
+ if ((first_rsc != NULL) && (then_action != NULL) && (data_set != NULL)) {
+ pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL,
+ then_action->rsc, NULL, then_action,
+ pe_order_preserve|pe_order_runnable_left|extra,
+ data_set);
+ }
+}
+
+static inline void
+order_action_then_stop(pe_action_t *first_action, pe_resource_t *then_rsc,
+ uint32_t extra, pe_working_set_t *data_set)
+{
+ if ((first_action != NULL) && (then_rsc != NULL) && (data_set != NULL)) {
+ pcmk__new_ordering(first_action->rsc, NULL, first_action,
+ then_rsc, stop_key(then_rsc), NULL,
+ pe_order_preserve|extra, data_set);
+ }
+}
+
+static enum remote_connection_state
+get_remote_node_state(const pe_node_t *node)
+{
+ const pe_resource_t *remote_rsc = NULL;
+ const pe_node_t *cluster_node = NULL;
+
+ CRM_ASSERT(node != NULL);
+
+ remote_rsc = node->details->remote_rsc;
+ CRM_ASSERT(remote_rsc != NULL);
+
+ cluster_node = pe__current_node(remote_rsc);
+
+ /* If the cluster node the remote connection resource resides on
+ * is unclean or went offline, we can't process any operations
+ * on that remote node until after it starts elsewhere.
+ */
+ if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
+ || (remote_rsc->allocated_to == NULL)) {
+
+ // The connection resource is not going to run anywhere
+
+ if ((cluster_node != NULL) && cluster_node->details->unclean) {
+ /* The remote connection is failed because its resource is on a
+ * failed node and can't be recovered elsewhere, so we must fence.
+ */
+ return remote_state_failed;
+ }
+
+ if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) {
+ /* Connection resource is cleanly stopped */
+ return remote_state_stopped;
+ }
+
+ /* Connection resource is failed */
+
+ if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
+ && remote_rsc->remote_reconnect_ms
+ && node->details->remote_was_fenced
+ && !pe__shutdown_requested(node)) {
+
+ /* We won't know whether the connection is recoverable until the
+ * reconnect interval expires and we reattempt connection.
+ */
+ return remote_state_unknown;
+ }
+
+ /* The remote connection is in a failed state. If there are any
+ * resources known to be active on it (stop) or in an unknown state
+ * (probe), we must assume the worst and fence it.
+ */
+ return remote_state_failed;
+
+ } else if (cluster_node == NULL) {
+ /* Connection is recoverable but not currently running anywhere, so see
+ * if we can recover it first
+ */
+ return remote_state_unknown;
+
+ } else if (cluster_node->details->unclean
+ || !(cluster_node->details->online)) {
+ // Connection is running on a dead node, see if we can recover it first
+ return remote_state_resting;
+
+ } else if (pcmk__list_of_multiple(remote_rsc->running_on)
+ && (remote_rsc->partial_migration_source != NULL)
+ && (remote_rsc->partial_migration_target != NULL)) {
+ /* We're in the middle of migrating a connection resource, so wait until
+ * after the migration completes before performing any actions.
+ */
+ return remote_state_resting;
+
+ }
+ return remote_state_alive;
+}
+
+/*!
+ * \internal
+ * \brief Order actions on remote node relative to actions for the connection
+ *
+ * \param[in,out] action An action scheduled on a Pacemaker Remote node
+ */
+static void
+apply_remote_ordering(pe_action_t *action)
+{
+ pe_resource_t *remote_rsc = NULL;
+ enum action_tasks task = text2task(action->task);
+ enum remote_connection_state state = get_remote_node_state(action->node);
+
+ uint32_t order_opts = pe_order_none;
+
+ if (action->rsc == NULL) {
+ return;
+ }
+
+ CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
+
+ remote_rsc = action->node->details->remote_rsc;
+ CRM_ASSERT(remote_rsc != NULL);
+
+ crm_trace("Order %s action %s relative to %s%s (state: %s)",
+ action->task, action->uuid,
+ pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
+ remote_rsc->id, state2text(state));
+
+ if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+ /* Migration ops map to "no_action", but we need to apply the same
+ * ordering as for stop or demote (see get_router_node()).
+ */
+ task = stop_rsc;
+ }
+
+ switch (task) {
+ case start_rsc:
+ case action_promote:
+ order_opts = pe_order_none;
+
+ if (state == remote_state_failed) {
+ /* Force recovery, by making this action required */
+ pe__set_order_flags(order_opts, pe_order_implies_then);
+ }
+
+ /* Ensure connection is up before running this action */
+ order_start_then_action(remote_rsc, action, order_opts,
+ remote_rsc->cluster);
+ break;
+
+ case stop_rsc:
+ if (state == remote_state_alive) {
+ order_action_then_stop(action, remote_rsc,
+ pe_order_implies_first,
+ remote_rsc->cluster);
+
+ } else if (state == remote_state_failed) {
+ /* The resource is active on the node, but since we don't have a
+ * valid connection, the only way to stop the resource is by
+ * fencing the node. There is no need to order the stop relative
+ * to the remote connection, since the stop will become implied
+ * by the fencing.
+ */
+ pe_fence_node(remote_rsc->cluster, action->node,
+ "resources are active but connection is unrecoverable",
+ FALSE);
+
+ } else if (remote_rsc->next_role == RSC_ROLE_STOPPED) {
+ /* State must be remote_state_unknown or remote_state_stopped.
+ * Since the connection is not coming back up in this
+ * transition, stop this resource first.
+ */
+ order_action_then_stop(action, remote_rsc,
+ pe_order_implies_first,
+ remote_rsc->cluster);
+
+ } else {
+ /* The connection is going to be started somewhere else, so
+ * stop this resource after that completes.
+ */
+ order_start_then_action(remote_rsc, action, pe_order_none,
+ remote_rsc->cluster);
+ }
+ break;
+
+ case action_demote:
+ /* Only order this demote relative to the connection start if the
+ * connection isn't being torn down. Otherwise, the demote would be
+ * blocked because the connection start would not be allowed.
+ */
+ if ((state == remote_state_resting)
+ || (state == remote_state_unknown)) {
+
+ order_start_then_action(remote_rsc, action, pe_order_none,
+ remote_rsc->cluster);
+ } /* Otherwise we can rely on the stop ordering */
+ break;
+
+ default:
+ /* Wait for the connection resource to be up */
+ if (pcmk__action_is_recurring(action)) {
+ /* In case we ever get the recovery logic wrong, force
+ * recurring monitors to be restarted, even if just
+ * the connection was re-established
+ */
+ order_start_then_action(remote_rsc, action,
+ pe_order_implies_then,
+ remote_rsc->cluster);
+
+ } else {
+ pe_node_t *cluster_node = pe__current_node(remote_rsc);
+
+ if ((task == monitor_rsc) && (state == remote_state_failed)) {
+ /* We would only be here if we do not know the state of the
+ * resource on the remote node. Since we have no way to find
+ * out, it is necessary to fence the node.
+ */
+ pe_fence_node(remote_rsc->cluster, action->node,
+ "resources are in unknown state "
+ "and connection is unrecoverable", FALSE);
+ }
+
+ if ((cluster_node != NULL) && (state == remote_state_stopped)) {
+ /* The connection is currently up, but is going down
+ * permanently. Make sure we check services are actually
+ * stopped _before_ we let the connection get closed.
+ */
+ order_action_then_stop(action, remote_rsc,
+ pe_order_runnable_left,
+ remote_rsc->cluster);
+
+ } else {
+ order_start_then_action(remote_rsc, action, pe_order_none,
+ remote_rsc->cluster);
+ }
+ }
+ break;
+ }
+}
+
+static void
+apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set)
+{
+ /* VMs are also classified as containers for these purposes... in
+ * that they both involve a 'thing' running on a real or remote
+ * cluster node.
+ *
+ * This allows us to be smarter about the type and extent of
+ * recovery actions required in various scenarios
+ */
+ pe_resource_t *remote_rsc = NULL;
+ pe_resource_t *container = NULL;
+ enum action_tasks task = text2task(action->task);
+
+ CRM_ASSERT(action->rsc != NULL);
+ CRM_ASSERT(action->node != NULL);
+ CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
+
+ remote_rsc = action->node->details->remote_rsc;
+ CRM_ASSERT(remote_rsc != NULL);
+
+ container = remote_rsc->container;
+ CRM_ASSERT(container != NULL);
+
+ if (pcmk_is_set(container->flags, pe_rsc_failed)) {
+ pe_fence_node(data_set, action->node, "container failed", FALSE);
+ }
+
+ crm_trace("Order %s action %s relative to %s%s for %s%s",
+ action->task, action->uuid,
+ pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
+ remote_rsc->id,
+ pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "",
+ container->id);
+
+ if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE,
+ CRMD_ACTION_MIGRATED, NULL)) {
+ /* Migration ops map to "no_action", but we need to apply the same
+ * ordering as for stop or demote (see get_router_node()).
+ */
+ task = stop_rsc;
+ }
+
+ switch (task) {
+ case start_rsc:
+ case action_promote:
+ // Force resource recovery if the container is recovered
+ order_start_then_action(container, action, pe_order_implies_then,
+ data_set);
+
+ // Wait for the connection resource to be up, too
+ order_start_then_action(remote_rsc, action, pe_order_none,
+ data_set);
+ break;
+
+ case stop_rsc:
+ case action_demote:
+ if (pcmk_is_set(container->flags, pe_rsc_failed)) {
+ /* When the container representing a guest node fails, any stop
+ * or demote actions for resources running on the guest node
+ * are implied by the container stopping. This is similar to
+ * how fencing operations work for cluster nodes and remote
+ * nodes.
+ */
+ } else {
+ /* Ensure the operation happens before the connection is brought
+ * down.
+ *
+ * If we really wanted to, we could order these after the
+ * connection start, IFF the container's current role was
+ * stopped (otherwise we re-introduce an ordering loop when the
+ * connection is restarting).
+ */
+ order_action_then_stop(action, remote_rsc, pe_order_none,
+ data_set);
+ }
+ break;
+
+ default:
+ /* Wait for the connection resource to be up */
+ if (pcmk__action_is_recurring(action)) {
+ /* In case we ever get the recovery logic wrong, force
+ * recurring monitors to be restarted, even if just
+ * the connection was re-established
+ */
+ if(task != no_action) {
+ order_start_then_action(remote_rsc, action,
+ pe_order_implies_then, data_set);
+ }
+ } else {
+ order_start_then_action(remote_rsc, action, pe_order_none,
+ data_set);
+ }
+ break;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Order all relevant actions relative to remote connection actions
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__order_remote_connection_actions(pe_working_set_t *data_set)
+{
+ if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
+ return;
+ }
+
+ crm_trace("Creating remote connection orderings");
+
+ for (GList *gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+ pe_resource_t *remote = NULL;
+
+ // We are only interested in resource actions
+ if (action->rsc == NULL) {
+ continue;
+ }
+
+ /* Special case: If we are clearing the failcount of an actual
+ * remote connection resource, then make sure this happens before
+ * any start of the resource in this transition.
+ */
+ if (action->rsc->is_remote_node &&
+ pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) {
+
+ pcmk__new_ordering(action->rsc, NULL, action, action->rsc,
+ pcmk__op_key(action->rsc->id, RSC_START, 0),
+ NULL, pe_order_optional, data_set);
+
+ continue;
+ }
+
+ // We are only interested in actions allocated to a node
+ if (action->node == NULL) {
+ continue;
+ }
+
+ if (!pe__is_guest_or_remote_node(action->node)) {
+ continue;
+ }
+
+ /* We are only interested in real actions.
+ *
+ * @TODO This is probably wrong; pseudo-actions might be converted to
+ * real actions and vice versa later in update_actions() at the end of
+ * pcmk__apply_orderings().
+ */
+ if (pcmk_is_set(action->flags, pe_action_pseudo)) {
+ continue;
+ }
+
+ remote = action->node->details->remote_rsc;
+ if (remote == NULL) {
+ // Orphaned
+ continue;
+ }
+
+ /* Another special case: if a resource is moving to a Pacemaker Remote
+ * node, order the stop on the original node after any start of the
+ * remote connection. This ensures that if the connection fails to
+ * start, we leave the resource running on the original node.
+ */
+ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) {
+ for (GList *item = action->rsc->actions; item != NULL;
+ item = item->next) {
+ pe_action_t *rsc_action = item->data;
+
+ if ((rsc_action->node->details != action->node->details)
+ && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) {
+ pcmk__new_ordering(remote, start_key(remote), NULL,
+ action->rsc, NULL, rsc_action,
+ pe_order_optional, data_set);
+ }
+ }
+ }
+
+ /* The action occurs across a remote connection, so create
+ * ordering constraints that guarantee the action occurs while the node
+ * is active (after start, before stop ... things like that).
+ *
+ * This is somewhat brittle in that we need to make sure the results of
+ * this ordering are compatible with the result of get_router_node().
+ * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
+ * of this logic rather than create_graph_action().
+ */
+ if (remote->container) {
+ crm_trace("Container ordering for %s", action->uuid);
+ apply_container_ordering(action, data_set);
+
+ } else {
+ crm_trace("Remote ordering for %s", action->uuid);
+ apply_remote_ordering(action);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node is a failed remote node
+ *
+ * \param[in] node Node to check
+ *
+ * \return true if \p node is a failed remote node, false otherwise
+ */
+bool
+pcmk__is_failed_remote_node(const pe_node_t *node)
+{
+ return pe__is_remote_node(node) && (node->details->remote_rsc != NULL)
+ && (get_remote_node_state(node) == remote_state_failed);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a given resource corresponds to a given node as guest
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Node to check
+ *
+ * \return true if \p node is a guest node and \p rsc is its containing
+ * resource, otherwise false
+ */
+bool
+pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL)
+ && (node->details->remote_rsc != NULL)
+ && (node->details->remote_rsc->container == rsc);
+}
+
+/*!
+ * \internal
+ * \brief Get proper connection host that a remote action must be routed through
+ *
+ * A remote connection resource might be starting, stopping, or migrating in the
+ * same transition that an action needs to be executed on its Pacemaker Remote
+ * node. Determine the proper node that the remote action should be routed
+ * through.
+ *
+ * \param[in] action (Potentially remote) action to route
+ *
+ * \return Connection host that action should be routed through if remote,
+ * otherwise NULL
+ */
+pe_node_t *
+pcmk__connection_host_for_action(const pe_action_t *action)
+{
+ pe_node_t *began_on = NULL;
+ pe_node_t *ended_on = NULL;
+ bool partial_migration = false;
+ const char *task = action->task;
+
+ if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)
+ || !pe__is_guest_or_remote_node(action->node)) {
+ return NULL;
+ }
+
+ CRM_ASSERT(action->node->details->remote_rsc != NULL);
+
+ began_on = pe__current_node(action->node->details->remote_rsc);
+ ended_on = action->node->details->remote_rsc->allocated_to;
+ if (action->node->details->remote_rsc
+ && (action->node->details->remote_rsc->container == NULL)
+ && action->node->details->remote_rsc->partial_migration_target) {
+ partial_migration = true;
+ }
+
+ if (began_on == NULL) {
+ crm_trace("Routing %s for %s through remote connection's "
+ "next node %s (starting)%s",
+ action->task, (action->rsc? action->rsc->id : "no resource"),
+ (ended_on? ended_on->details->uname : "none"),
+ partial_migration? " (partial migration)" : "");
+ return ended_on;
+ }
+
+ if (ended_on == NULL) {
+ crm_trace("Routing %s for %s through remote connection's "
+ "current node %s (stopping)%s",
+ action->task, (action->rsc? action->rsc->id : "no resource"),
+ (began_on? began_on->details->uname : "none"),
+ partial_migration? " (partial migration)" : "");
+ return began_on;
+ }
+
+ if (began_on->details == ended_on->details) {
+ crm_trace("Routing %s for %s through remote connection's "
+ "current node %s (not moving)%s",
+ action->task, (action->rsc? action->rsc->id : "no resource"),
+ (began_on? began_on->details->uname : "none"),
+ partial_migration? " (partial migration)" : "");
+ return began_on;
+ }
+
+ /* If we get here, the remote connection is moving during this transition.
+ * This means some actions for resources behind the connection will get
+ * routed through the cluster node the connection resource is currently on,
+ * and others are routed through the cluster node the connection will end up
+ * on.
+ */
+
+ if (pcmk__str_eq(task, "notify", pcmk__str_casei)) {
+ task = g_hash_table_lookup(action->meta, "notify_operation");
+ }
+
+ /*
+ * Stop, demote, and migration actions must occur before the connection can
+ * move (these actions are required before the remote resource can stop). In
+ * this case, we know these actions have to be routed through the initial
+ * cluster node the connection resource lived on before the move takes
+ * place.
+ *
+ * The exception is a partial migration of a (non-guest) remote connection
+ * resource; in that case, all actions (even these) will be ordered after
+ * the connection's pseudo-start on the migration target, so the target is
+ * the router node.
+ */
+ if (pcmk__strcase_any_of(task, "cancel", "stop", "demote", "migrate_from",
+ "migrate_to", NULL) && !partial_migration) {
+ crm_trace("Routing %s for %s through remote connection's "
+ "current node %s (moving)%s",
+ action->task, (action->rsc? action->rsc->id : "no resource"),
+ (began_on? began_on->details->uname : "none"),
+ partial_migration? " (partial migration)" : "");
+ return began_on;
+ }
+
+ /* Everything else (start, promote, monitor, probe, refresh,
+ * clear failcount, delete, ...) must occur after the connection starts on
+ * the node it is moving to.
+ */
+ crm_trace("Routing %s for %s through remote connection's "
+ "next node %s (moving)%s",
+ action->task, (action->rsc? action->rsc->id : "no resource"),
+ (ended_on? ended_on->details->uname : "none"),
+ partial_migration? " (partial migration)" : "");
+ return ended_on;
+}
+
+/*!
+ * \internal
+ * \brief Replace remote connection's addr="#uname" with actual address
+ *
+ * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource
+ * with its "addr" parameter set to "#uname", pull the actual value from the
+ * parameters evaluated without a node (which was put there earlier in
+ * pcmk__create_graph() when the bundle's expand() method was called).
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in,out] params Resource parameters evaluated per node
+ */
+void
+pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params)
+{
+ const char *remote_addr = g_hash_table_lookup(params,
+ XML_RSC_ATTR_REMOTE_RA_ADDR);
+
+ if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) {
+ GHashTable *base = pe_rsc_params(rsc, NULL, rsc->cluster);
+
+ remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR);
+ if (remote_addr != NULL) {
+ g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
+ strdup(remote_addr));
+ }
+ }
+}
+
+/*!
+ * \brief Add special bundle meta-attributes to XML
+ *
+ * If a given action will be executed on a guest node (including a bundle),
+ * add the special bundle meta-attribute "container-attribute-target" and
+ * environment variable "physical_host" as XML attributes (using meta-attribute
+ * naming).
+ *
+ * \param[in,out] args_xml XML to add attributes to
+ * \param[in] action Action to check
+ */
+void
+pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action)
+{
+ const pe_node_t *host = NULL;
+ enum action_tasks task;
+
+ if (!pe__is_guest_node(action->node)) {
+ return;
+ }
+
+ task = text2task(action->task);
+ if ((task == action_notify) || (task == action_notified)) {
+ task = text2task(g_hash_table_lookup(action->meta, "notify_operation"));
+ }
+
+ switch (task) {
+ case stop_rsc:
+ case stopped_rsc:
+ case action_demote:
+ case action_demoted:
+ // "Down" actions take place on guest's current host
+ host = pe__current_node(action->node->details->remote_rsc->container);
+ break;
+
+ case start_rsc:
+ case started_rsc:
+ case monitor_rsc:
+ case action_promote:
+ case action_promoted:
+ // "Up" actions take place on guest's next host
+ host = action->node->details->remote_rsc->container->allocated_to;
+ break;
+
+ default:
+ break;
+ }
+
+ if (host != NULL) {
+ hash2metafield((gpointer) XML_RSC_ATTR_TARGET,
+ (gpointer) g_hash_table_lookup(action->rsc->meta,
+ XML_RSC_ATTR_TARGET),
+ (gpointer) args_xml);
+ hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST,
+ (gpointer) host->details->uname,
+ (gpointer) args_xml);
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
new file mode 100644
index 0000000..b855499
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright 2014-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+// Resource allocation methods that vary by resource variant
+static resource_alloc_functions_t allocation_methods[] = {
+ {
+ pcmk__primitive_assign,
+ pcmk__primitive_create_actions,
+ pcmk__probe_rsc_on_node,
+ pcmk__primitive_internal_constraints,
+ pcmk__primitive_apply_coloc_score,
+ pcmk__colocated_resources,
+ pcmk__with_primitive_colocations,
+ pcmk__primitive_with_colocations,
+ pcmk__add_colocated_node_scores,
+ pcmk__apply_location,
+ pcmk__primitive_action_flags,
+ pcmk__update_ordered_actions,
+ pcmk__output_resource_actions,
+ pcmk__add_rsc_actions_to_graph,
+ pcmk__primitive_add_graph_meta,
+ pcmk__primitive_add_utilization,
+ pcmk__primitive_shutdown_lock,
+ },
+ {
+ pcmk__group_assign,
+ pcmk__group_create_actions,
+ pcmk__probe_rsc_on_node,
+ pcmk__group_internal_constraints,
+ pcmk__group_apply_coloc_score,
+ pcmk__group_colocated_resources,
+ pcmk__with_group_colocations,
+ pcmk__group_with_colocations,
+ pcmk__group_add_colocated_node_scores,
+ pcmk__group_apply_location,
+ pcmk__group_action_flags,
+ pcmk__group_update_ordered_actions,
+ pcmk__output_resource_actions,
+ pcmk__add_rsc_actions_to_graph,
+ pcmk__noop_add_graph_meta,
+ pcmk__group_add_utilization,
+ pcmk__group_shutdown_lock,
+ },
+ {
+ pcmk__clone_assign,
+ clone_create_actions,
+ clone_create_probe,
+ clone_internal_constraints,
+ pcmk__clone_apply_coloc_score,
+ pcmk__colocated_resources,
+ pcmk__with_clone_colocations,
+ pcmk__clone_with_colocations,
+ pcmk__add_colocated_node_scores,
+ clone_rsc_location,
+ clone_action_flags,
+ pcmk__instance_update_ordered_actions,
+ pcmk__output_resource_actions,
+ clone_expand,
+ clone_append_meta,
+ pcmk__clone_add_utilization,
+ pcmk__clone_shutdown_lock,
+ },
+ {
+ pcmk__bundle_allocate,
+ pcmk__bundle_create_actions,
+ pcmk__bundle_create_probe,
+ pcmk__bundle_internal_constraints,
+ pcmk__bundle_apply_coloc_score,
+ pcmk__colocated_resources,
+ pcmk__with_bundle_colocations,
+ pcmk__bundle_with_colocations,
+ pcmk__add_colocated_node_scores,
+ pcmk__bundle_rsc_location,
+ pcmk__bundle_action_flags,
+ pcmk__instance_update_ordered_actions,
+ pcmk__output_bundle_actions,
+ pcmk__bundle_expand,
+ pcmk__noop_add_graph_meta,
+ pcmk__bundle_add_utilization,
+ pcmk__bundle_shutdown_lock,
+ }
+};
+
+/*!
+ * \internal
+ * \brief Check whether a resource's agent standard, provider, or type changed
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in,out] node Node needing unfencing if agent changed
+ * \param[in] rsc_entry XML with previously known agent information
+ * \param[in] active_on_node Whether \p rsc is active on \p node
+ *
+ * \return true if agent for \p rsc changed, otherwise false
+ */
+bool
+pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
+ const xmlNode *rsc_entry, bool active_on_node)
+{
+ bool changed = false;
+ const char *attr_list[] = {
+ XML_ATTR_TYPE,
+ XML_AGENT_ATTR_CLASS,
+ XML_AGENT_ATTR_PROVIDER
+ };
+
+ for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
+ const char *value = crm_element_value(rsc->xml, attr_list[i]);
+ const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
+
+ if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
+ changed = true;
+ trigger_unfencing(rsc, node, "Device definition changed", NULL,
+ rsc->cluster);
+ if (active_on_node) {
+ crm_notice("Forcing restart of %s on %s "
+ "because %s changed from '%s' to '%s'",
+ rsc->id, pe__node_name(node), attr_list[i],
+ pcmk__s(old_value, ""), pcmk__s(value, ""));
+ }
+ }
+ }
+ if (changed && active_on_node) {
+ // Make sure the resource is restarted
+ custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
+ rsc->cluster);
+ pe__set_resource_flags(rsc, pe_rsc_start_pending);
+ }
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Add resource (and any matching children) to list if it matches ID
+ *
+ * \param[in] result List to add resource to
+ * \param[in] rsc Resource to check
+ * \param[in] id ID to match
+ *
+ * \return (Possibly new) head of list
+ */
+static GList *
+add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
+{
+ if ((strcmp(rsc->id, id) == 0)
+ || ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
+ result = g_list_prepend(result, rsc);
+ }
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
+
+ result = add_rsc_if_matching(result, child, id);
+ }
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Find all resources matching a given ID by either ID or clone name
+ *
+ * \param[in] id Resource ID to check
+ * \param[in] data_set Cluster working set
+ *
+ * \return List of all resources that match \p id
+ * \note The caller is responsible for freeing the return value with
+ * g_list_free().
+ */
+GList *
+pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set)
+{
+ GList *result = NULL;
+
+ CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
+ }
+ return result;
+}
+
+/*!
+ * \internal
+ * \brief Set the variant-appropriate allocation methods for a resource
+ *
+ * \param[in,out] rsc Resource to set allocation methods for
+ * \param[in] ignored Here so function can be used with g_list_foreach()
+ */
+static void
+set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
+{
+ rsc->cmds = &allocation_methods[rsc->variant];
+ g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
+}
+
+/*!
+ * \internal
+ * \brief Set the variant-appropriate allocation methods for all resources
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__set_allocation_methods(pe_working_set_t *data_set)
+{
+ g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
+ NULL);
+}
+
+// Shared implementation of resource_alloc_functions_t:colocated_resources()
+GList *
+pcmk__colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc,
+ GList *colocated_rscs)
+{
+ const GList *iter = NULL;
+ GList *colocations = NULL;
+
+ if (orig_rsc == NULL) {
+ orig_rsc = rsc;
+ }
+
+ if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
+ return colocated_rscs;
+ }
+
+ pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
+ rsc->id, orig_rsc->id);
+ colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc);
+
+ // Follow colocations where this resource is the dependent resource
+ colocations = pcmk__this_with_colocations(rsc);
+ for (iter = colocations; iter != NULL; iter = iter->next) {
+ const pcmk__colocation_t *constraint = iter->data;
+ const pe_resource_t *primary = constraint->primary;
+
+ if (primary == orig_rsc) {
+ continue; // Break colocation loop
+ }
+
+ if ((constraint->score == INFINITY) &&
+ (pcmk__colocation_affects(rsc, primary, constraint,
+ true) == pcmk__coloc_affects_location)) {
+
+ colocated_rscs = primary->cmds->colocated_resources(primary,
+ orig_rsc,
+ colocated_rscs);
+ }
+ }
+ g_list_free(colocations);
+
+ // Follow colocations where this resource is the primary resource
+ colocations = pcmk__with_this_colocations(rsc);
+ for (iter = colocations; iter != NULL; iter = iter->next) {
+ const pcmk__colocation_t *constraint = iter->data;
+ const pe_resource_t *dependent = constraint->dependent;
+
+ if (dependent == orig_rsc) {
+ continue; // Break colocation loop
+ }
+
+ if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
+ continue; // We can't be sure whether dependent will be colocated
+ }
+
+ if ((constraint->score == INFINITY) &&
+ (pcmk__colocation_affects(dependent, rsc, constraint,
+ true) == pcmk__coloc_affects_location)) {
+
+ colocated_rscs = dependent->cmds->colocated_resources(dependent,
+ orig_rsc,
+ colocated_rscs);
+ }
+ }
+ g_list_free(colocations);
+
+ return colocated_rscs;
+}
+
+// No-op function for variants that don't need to implement add_graph_meta()
+void
+pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
+{
+}
+
+void
+pcmk__output_resource_actions(pe_resource_t *rsc)
+{
+ pcmk__output_t *out = rsc->cluster->priv;
+
+ pe_node_t *next = NULL;
+ pe_node_t *current = NULL;
+
+ if (rsc->children != NULL) {
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
+
+ child->cmds->output_actions(child);
+ }
+ return;
+ }
+
+ next = rsc->allocated_to;
+ if (rsc->running_on) {
+ current = pe__current_node(rsc);
+ if (rsc->role == RSC_ROLE_STOPPED) {
+ /* This can occur when resources are being recovered because
+ * the current role can change in pcmk__primitive_create_actions()
+ */
+ rsc->role = RSC_ROLE_STARTED;
+ }
+ }
+
+ if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
+ /* Don't log stopped orphans */
+ return;
+ }
+
+ out->message(out, "rsc-action", rsc, current, next);
+}
+
+/*!
+ * \internal
+ * \brief Assign a specified primitive resource to a node
+ *
+ * Assign a specified primitive resource to a specified node, if the node can
+ * run the resource (or unconditionally, if \p force is true). Mark the resource
+ * as no longer provisional. If the primitive can't be assigned (or \p chosen is
+ * NULL), unassign any previous assignment for it, set its next role to stopped,
+ * and update any existing actions scheduled for it. This is not done
+ * recursively for children, so it should be called only for primitives.
+ *
+ * \param[in,out] rsc Resource to assign
+ * \param[in,out] chosen Node to assign \p rsc to
+ * \param[in] force If true, assign to \p chosen even if unavailable
+ *
+ * \return true if \p rsc could be assigned, otherwise false
+ *
+ * \note Assigning a resource to the NULL node using this function is different
+ * from calling pcmk__unassign_resource(), in that it will also update any
+ * actions created for the resource.
+ */
+bool
+pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force)
+{
+ pcmk__output_t *out = rsc->cluster->priv;
+
+ CRM_ASSERT(rsc->variant == pe_native);
+
+ if (!force && (chosen != NULL)) {
+ if ((chosen->weight < 0)
+ // Allow the graph to assume that guest node connections will come up
+ || (!pcmk__node_available(chosen, true, false)
+ && !pe__is_guest_node(chosen))) {
+
+ crm_debug("All nodes for resource %s are unavailable, unclean or "
+ "shutting down (%s can%s run resources, with weight %d)",
+ rsc->id, pe__node_name(chosen),
+ (pcmk__node_available(chosen, true, false)? "" : "not"),
+ chosen->weight);
+ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
+ chosen = NULL;
+ }
+ }
+
+ pcmk__unassign_resource(rsc);
+ pe__clear_resource_flags(rsc, pe_rsc_provisional);
+
+ if (chosen == NULL) {
+ crm_debug("Could not allocate a node for %s", rsc->id);
+ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
+
+ for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
+ pe_action_t *op = (pe_action_t *) iter->data;
+
+ crm_debug("Updating %s for allocation failure", op->uuid);
+
+ if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
+ pe__clear_action_flags(op, pe_action_optional);
+
+ } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
+ pe__clear_action_flags(op, pe_action_runnable);
+ //pe__set_resource_flags(rsc, pe_rsc_block);
+
+ } else {
+ // Cancel recurring actions, unless for stopped state
+ const char *interval_ms_s = NULL;
+ const char *target_rc_s = NULL;
+ char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
+
+ interval_ms_s = g_hash_table_lookup(op->meta,
+ XML_LRM_ATTR_INTERVAL_MS);
+ target_rc_s = g_hash_table_lookup(op->meta,
+ XML_ATTR_TE_TARGET_RC);
+ if ((interval_ms_s != NULL)
+ && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
+ && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
+ pe__clear_action_flags(op, pe_action_runnable);
+ }
+ free(rc_stopped);
+ }
+ }
+ return false;
+ }
+
+ crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen));
+ rsc->allocated_to = pe__copy_node(chosen);
+
+ chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
+ rsc);
+ chosen->details->num_resources++;
+ chosen->count++;
+ pcmk__consume_node_capacity(chosen->details->utilization, rsc);
+
+ if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
+ out->message(out, "resource-util", rsc, chosen, __func__);
+ }
+ return true;
+}
+
+/*!
+ * \internal
+ * \brief Assign a specified resource (of any variant) to a node
+ *
+ * Assign a specified resource and its children (if any) to a specified node, if
+ * the node can run the resource (or unconditionally, if \p force is true). Mark
+ * the resources as no longer provisional. If the resources can't be assigned
+ * (or \p chosen is NULL), unassign any previous assignments, set next role to
+ * stopped, and update any existing actions scheduled for them.
+ *
+ * \param[in,out] rsc Resource to assign
+ * \param[in,out] chosen Node to assign \p rsc to
+ * \param[in] force If true, assign to \p chosen even if unavailable
+ *
+ * \return true if \p rsc could be assigned, otherwise false
+ *
+ * \note Assigning a resource to the NULL node using this function is different
+ * from calling pcmk__unassign_resource(), in that it will also update any
+ * actions created for the resource.
+ */
+bool
+pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
+{
+ bool changed = false;
+
+ if (rsc->children == NULL) {
+ if (rsc->allocated_to != NULL) {
+ changed = true;
+ }
+ pcmk__finalize_assignment(rsc, node, force);
+
+ } else {
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
+
+ changed |= pcmk__assign_resource(child_rsc, node, force);
+ }
+ }
+ return changed;
+}
+
+/*!
+ * \internal
+ * \brief Remove any assignment of a specified resource to a node
+ *
+ * If a specified resource has been assigned to a node, remove that assignment
+ * and mark the resource as provisional again. This is not done recursively for
+ * children, so it should be called only for primitives.
+ *
+ * \param[in,out] rsc Resource to unassign
+ */
+void
+pcmk__unassign_resource(pe_resource_t *rsc)
+{
+ pe_node_t *old = rsc->allocated_to;
+
+ if (old == NULL) {
+ return;
+ }
+
+ crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old));
+ pe__set_resource_flags(rsc, pe_rsc_provisional);
+ rsc->allocated_to = NULL;
+
+ /* We're going to free the pe_node_t, but its details member is shared and
+ * will remain, so update that appropriately first.
+ */
+ old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
+ rsc);
+ old->details->num_resources--;
+ pcmk__release_node_capacity(old->details->utilization, rsc);
+ free(old);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource has reached its migration threshold on a node
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in] node Node to check
+ * \param[out] failed If threshold has been reached, this will be set to
+ * resource that failed (possibly a parent of \p rsc)
+ *
+ * \return true if the migration threshold has been reached, false otherwise
+ */
+bool
+pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node,
+ pe_resource_t **failed)
+{
+ int fail_count, remaining_tries;
+ pe_resource_t *rsc_to_ban = rsc;
+
+ // Migration threshold of 0 means never force away
+ if (rsc->migration_threshold == 0) {
+ return false;
+ }
+
+ // If we're ignoring failures, also ignore the migration threshold
+ if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
+ return false;
+ }
+
+ // If there are no failures, there's no need to force away
+ fail_count = pe_get_failcount(node, rsc, NULL,
+ pe_fc_effective|pe_fc_fillers, NULL);
+ if (fail_count <= 0) {
+ return false;
+ }
+
+ // If failed resource is anonymous clone instance, we'll force clone away
+ if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
+ rsc_to_ban = uber_parent(rsc);
+ }
+
+ // How many more times recovery will be tried on this node
+ remaining_tries = rsc->migration_threshold - fail_count;
+
+ if (remaining_tries <= 0) {
+ crm_warn("%s cannot run on %s due to reaching migration threshold "
+ "(clean up resource to allow again)"
+ CRM_XS " failures=%d migration-threshold=%d",
+ rsc_to_ban->id, pe__node_name(node), fail_count,
+ rsc->migration_threshold);
+ if (failed != NULL) {
+ *failed = rsc_to_ban;
+ }
+ return true;
+ }
+
+ crm_info("%s can fail %d more time%s on "
+ "%s before reaching migration threshold (%d)",
+ rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
+ pe__node_name(node), rsc->migration_threshold);
+ return false;
+}
+
+static void *
+convert_const_pointer(const void *ptr)
+{
+ /* Worst function ever */
+ return (void *)ptr;
+}
+
+/*!
+ * \internal
+ * \brief Get a node's weight
+ *
+ * \param[in] node Unweighted node to check (for node ID)
+ * \param[in] nodes List of weighted nodes to look for \p node in
+ *
+ * \return Node's weight, or -INFINITY if not found
+ */
+static int
+get_node_weight(const pe_node_t *node, GHashTable *nodes)
+{
+ pe_node_t *weighted_node = NULL;
+
+ if ((node != NULL) && (nodes != NULL)) {
+ weighted_node = g_hash_table_lookup(nodes, node->details->id);
+ }
+ return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
+}
+
+/*!
+ * \internal
+ * \brief Compare two resources according to which should be allocated first
+ *
+ * \param[in] a First resource to compare
+ * \param[in] b Second resource to compare
+ * \param[in] data Sorted list of all nodes in cluster
+ *
+ * \return -1 if \p a should be allocated before \b, 0 if they are equal,
+ * or +1 if \p a should be allocated after \b
+ */
+static gint
+cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const pe_resource_t *resource1 = a;
+ const pe_resource_t *resource2 = b;
+ const GList *nodes = (const GList *) data;
+
+ int rc = 0;
+ int r1_weight = -INFINITY;
+ int r2_weight = -INFINITY;
+ pe_node_t *r1_node = NULL;
+ pe_node_t *r2_node = NULL;
+ GHashTable *r1_nodes = NULL;
+ GHashTable *r2_nodes = NULL;
+ const char *reason = NULL;
+
+ // Resources with highest priority should be allocated first
+ reason = "priority";
+ r1_weight = resource1->priority;
+ r2_weight = resource2->priority;
+ if (r1_weight > r2_weight) {
+ rc = -1;
+ goto done;
+ }
+ if (r1_weight < r2_weight) {
+ rc = 1;
+ goto done;
+ }
+
+ // We need nodes to make any other useful comparisons
+ reason = "no node list";
+ if (nodes == NULL) {
+ goto done;
+ }
+
+ // Calculate and log node weights
+ resource1->cmds->add_colocated_node_scores(convert_const_pointer(resource1),
+ resource1->id, &r1_nodes, NULL,
+ 1, pcmk__coloc_select_this_with);
+ resource2->cmds->add_colocated_node_scores(convert_const_pointer(resource2),
+ resource2->id, &r2_nodes, NULL,
+ 1, pcmk__coloc_select_this_with);
+ pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
+ resource1->cluster);
+ pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
+ resource2->cluster);
+
+ // The resource with highest score on its current node goes first
+ reason = "current location";
+ if (resource1->running_on != NULL) {
+ r1_node = pe__current_node(resource1);
+ }
+ if (resource2->running_on != NULL) {
+ r2_node = pe__current_node(resource2);
+ }
+ r1_weight = get_node_weight(r1_node, r1_nodes);
+ r2_weight = get_node_weight(r2_node, r2_nodes);
+ if (r1_weight > r2_weight) {
+ rc = -1;
+ goto done;
+ }
+ if (r1_weight < r2_weight) {
+ rc = 1;
+ goto done;
+ }
+
+ // Otherwise a higher weight on any node will do
+ reason = "score";
+ for (const GList *iter = nodes; iter != NULL; iter = iter->next) {
+ const pe_node_t *node = (const pe_node_t *) iter->data;
+
+ r1_weight = get_node_weight(node, r1_nodes);
+ r2_weight = get_node_weight(node, r2_nodes);
+ if (r1_weight > r2_weight) {
+ rc = -1;
+ goto done;
+ }
+ if (r1_weight < r2_weight) {
+ rc = 1;
+ goto done;
+ }
+ }
+
+done:
+ crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
+ resource1->id, r1_weight,
+ ((r1_node == NULL)? "" : " on "),
+ ((r1_node == NULL)? "" : r1_node->details->id),
+ ((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
+ resource2->id, r2_weight,
+ ((r2_node == NULL)? "" : " on "),
+ ((r2_node == NULL)? "" : r2_node->details->id),
+ reason);
+ if (r1_nodes != NULL) {
+ g_hash_table_destroy(r1_nodes);
+ }
+ if (r2_nodes != NULL) {
+ g_hash_table_destroy(r2_nodes);
+ }
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Sort resources in the order they should be allocated to nodes
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__sort_resources(pe_working_set_t *data_set)
+{
+ GList *nodes = g_list_copy(data_set->nodes);
+
+ nodes = pcmk__sort_nodes(nodes, NULL);
+ data_set->resources = g_list_sort_with_data(data_set->resources,
+ cmp_resources, nodes);
+ g_list_free(nodes);
+}
diff --git a/lib/pacemaker/pcmk_sched_tickets.c b/lib/pacemaker/pcmk_sched_tickets.c
new file mode 100644
index 0000000..30206d7
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_tickets.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+enum loss_ticket_policy {
+ loss_ticket_stop,
+ loss_ticket_demote,
+ loss_ticket_fence,
+ loss_ticket_freeze
+};
+
+typedef struct {
+ const char *id;
+ pe_resource_t *rsc;
+ pe_ticket_t *ticket;
+ enum loss_ticket_policy loss_policy;
+ int role;
+} rsc_ticket_t;
+
+/*!
+ * \brief Check whether a ticket constraint matches a resource by role
+ *
+ * \param[in] rsc_ticket Ticket constraint
+ * \param[in] rsc Resource to compare with ticket
+ *
+ * \param[in] true if constraint has no role or resource's role matches
+ * constraint's, otherwise false
+ */
+static bool
+ticket_role_matches(const pe_resource_t *rsc, const rsc_ticket_t *rsc_ticket)
+{
+ if ((rsc_ticket->role == RSC_ROLE_UNKNOWN)
+ || (rsc_ticket->role == rsc->role)) {
+ return true;
+ }
+ pe_rsc_trace(rsc, "Skipping constraint: \"%s\" state filter",
+ role2text(rsc_ticket->role));
+ return false;
+}
+
+/*!
+ * \brief Create location constraints and fencing as needed for a ticket
+ *
+ * \param[in,out] rsc Resource affected by ticket
+ * \param[in] rsc_ticket Ticket
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+constraints_for_ticket(pe_resource_t *rsc, const rsc_ticket_t *rsc_ticket,
+ pe_working_set_t *data_set)
+{
+ GList *gIter = NULL;
+
+ CRM_CHECK((rsc != NULL) && (rsc_ticket != NULL), return);
+
+ if (rsc_ticket->ticket->granted && !rsc_ticket->ticket->standby) {
+ return;
+ }
+
+ if (rsc->children) {
+ pe_rsc_trace(rsc, "Processing ticket dependencies from %s", rsc->id);
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ constraints_for_ticket((pe_resource_t *) gIter->data, rsc_ticket,
+ data_set);
+ }
+ return;
+ }
+
+ pe_rsc_trace(rsc, "%s: Processing ticket dependency on %s (%s, %s)",
+ rsc->id, rsc_ticket->ticket->id, rsc_ticket->id,
+ role2text(rsc_ticket->role));
+
+ if (!rsc_ticket->ticket->granted && (rsc->running_on != NULL)) {
+
+ switch (rsc_ticket->loss_policy) {
+ case loss_ticket_stop:
+ resource_location(rsc, NULL, -INFINITY, "__loss_of_ticket__",
+ data_set);
+ break;
+
+ case loss_ticket_demote:
+ // Promotion score will be set to -INFINITY in promotion_order()
+ if (rsc_ticket->role != RSC_ROLE_PROMOTED) {
+ resource_location(rsc, NULL, -INFINITY,
+ "__loss_of_ticket__", data_set);
+ }
+ break;
+
+ case loss_ticket_fence:
+ if (!ticket_role_matches(rsc, rsc_ticket)) {
+ return;
+ }
+
+ resource_location(rsc, NULL, -INFINITY, "__loss_of_ticket__",
+ data_set);
+
+ for (gIter = rsc->running_on; gIter != NULL;
+ gIter = gIter->next) {
+ pe_fence_node(data_set, (pe_node_t *) gIter->data,
+ "deadman ticket was lost", FALSE);
+ }
+ break;
+
+ case loss_ticket_freeze:
+ if (!ticket_role_matches(rsc, rsc_ticket)) {
+ return;
+ }
+ if (rsc->running_on != NULL) {
+ pe__clear_resource_flags(rsc, pe_rsc_managed);
+ pe__set_resource_flags(rsc, pe_rsc_block);
+ }
+ break;
+ }
+
+ } else if (!rsc_ticket->ticket->granted) {
+
+ if ((rsc_ticket->role != RSC_ROLE_PROMOTED)
+ || (rsc_ticket->loss_policy == loss_ticket_stop)) {
+ resource_location(rsc, NULL, -INFINITY, "__no_ticket__",
+ data_set);
+ }
+
+ } else if (rsc_ticket->ticket->standby) {
+
+ if ((rsc_ticket->role != RSC_ROLE_PROMOTED)
+ || (rsc_ticket->loss_policy == loss_ticket_stop)) {
+ resource_location(rsc, NULL, -INFINITY, "__ticket_standby__",
+ data_set);
+ }
+ }
+}
+
+static void
+rsc_ticket_new(const char *id, pe_resource_t *rsc, pe_ticket_t *ticket,
+ const char *state, const char *loss_policy,
+ pe_working_set_t *data_set)
+{
+ rsc_ticket_t *new_rsc_ticket = NULL;
+
+ if (rsc == NULL) {
+ pcmk__config_err("Ignoring ticket '%s' because resource "
+ "does not exist", id);
+ return;
+ }
+
+ new_rsc_ticket = calloc(1, sizeof(rsc_ticket_t));
+ if (new_rsc_ticket == NULL) {
+ return;
+ }
+
+ if (pcmk__str_eq(state, RSC_ROLE_STARTED_S,
+ pcmk__str_null_matches|pcmk__str_casei)) {
+ state = RSC_ROLE_UNKNOWN_S;
+ }
+
+ new_rsc_ticket->id = id;
+ new_rsc_ticket->ticket = ticket;
+ new_rsc_ticket->rsc = rsc;
+ new_rsc_ticket->role = text2role(state);
+
+ if (pcmk__str_eq(loss_policy, "fence", pcmk__str_casei)) {
+ if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
+ new_rsc_ticket->loss_policy = loss_ticket_fence;
+ } else {
+ pcmk__config_err("Resetting '" XML_TICKET_ATTR_LOSS_POLICY
+ "' for ticket '%s' to 'stop' "
+ "because fencing is not configured", ticket->id);
+ loss_policy = "stop";
+ }
+ }
+
+ if (new_rsc_ticket->loss_policy == loss_ticket_fence) {
+ crm_debug("On loss of ticket '%s': Fence the nodes running %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+
+ } else if (pcmk__str_eq(loss_policy, "freeze", pcmk__str_casei)) {
+ crm_debug("On loss of ticket '%s': Freeze %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+ new_rsc_ticket->loss_policy = loss_ticket_freeze;
+
+ } else if (pcmk__str_eq(loss_policy, "demote", pcmk__str_casei)) {
+ crm_debug("On loss of ticket '%s': Demote %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+ new_rsc_ticket->loss_policy = loss_ticket_demote;
+
+ } else if (pcmk__str_eq(loss_policy, "stop", pcmk__str_casei)) {
+ crm_debug("On loss of ticket '%s': Stop %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+ new_rsc_ticket->loss_policy = loss_ticket_stop;
+
+ } else {
+ if (new_rsc_ticket->role == RSC_ROLE_PROMOTED) {
+ crm_debug("On loss of ticket '%s': Default to demote %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+ new_rsc_ticket->loss_policy = loss_ticket_demote;
+
+ } else {
+ crm_debug("On loss of ticket '%s': Default to stop %s (%s)",
+ new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id,
+ role2text(new_rsc_ticket->role));
+ new_rsc_ticket->loss_policy = loss_ticket_stop;
+ }
+ }
+
+ pe_rsc_trace(rsc, "%s (%s) ==> %s",
+ rsc->id, role2text(new_rsc_ticket->role), ticket->id);
+
+ rsc->rsc_tickets = g_list_append(rsc->rsc_tickets, new_rsc_ticket);
+
+ data_set->ticket_constraints = g_list_append(data_set->ticket_constraints,
+ new_rsc_ticket);
+
+ if (!(new_rsc_ticket->ticket->granted) || new_rsc_ticket->ticket->standby) {
+ constraints_for_ticket(rsc, new_rsc_ticket, data_set);
+ }
+}
+
+// \return Standard Pacemaker return code
+static int
+unpack_rsc_ticket_set(xmlNode *set, pe_ticket_t *ticket,
+ const char *loss_policy, pe_working_set_t *data_set)
+{
+ const char *set_id = NULL;
+ const char *role = NULL;
+
+ CRM_CHECK(set != NULL, return EINVAL);
+ CRM_CHECK(ticket != NULL, return EINVAL);
+
+ set_id = ID(set);
+ if (set_id == NULL) {
+ pcmk__config_err("Ignoring <" XML_CONS_TAG_RSC_SET "> without "
+ XML_ATTR_ID);
+ return pcmk_rc_unpack_error;
+ }
+
+ role = crm_element_value(set, "role");
+
+ for (xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
+ xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
+
+ pe_resource_t *resource = NULL;
+
+ resource = pcmk__find_constraint_resource(data_set->resources,
+ ID(xml_rsc));
+ if (resource == NULL) {
+ pcmk__config_err("%s: No resource found for %s",
+ set_id, ID(xml_rsc));
+ return pcmk_rc_unpack_error;
+ }
+ pe_rsc_trace(resource, "Resource '%s' depends on ticket '%s'",
+ resource->id, ticket->id);
+ rsc_ticket_new(set_id, resource, ticket, role, loss_policy, data_set);
+ }
+
+ return pcmk_rc_ok;
+}
+
+static void
+unpack_simple_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ const char *id = NULL;
+ const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET);
+ const char *loss_policy = crm_element_value(xml_obj,
+ XML_TICKET_ATTR_LOSS_POLICY);
+
+ pe_ticket_t *ticket = NULL;
+
+ const char *rsc_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
+ const char *state = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_SOURCE_ROLE);
+
+ // @COMPAT: Deprecated since 2.1.5
+ const char *instance = crm_element_value(xml_obj,
+ XML_COLOC_ATTR_SOURCE_INSTANCE);
+
+ pe_resource_t *rsc = NULL;
+
+ if (instance != NULL) {
+ pe_warn_once(pe_wo_coloc_inst,
+ "Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is "
+ "deprecated and will be removed in a future release.");
+ }
+
+ CRM_CHECK(xml_obj != NULL, return);
+
+ id = ID(xml_obj);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return;
+ }
+
+ if (ticket_str == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' without ticket specified",
+ id);
+ return;
+ } else {
+ ticket = g_hash_table_lookup(data_set->tickets, ticket_str);
+ }
+
+ if (ticket == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because ticket '%s' "
+ "does not exist", id, ticket_str);
+ return;
+ }
+
+ if (rsc_id == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' without resource", id);
+ return;
+ } else {
+ rsc = pcmk__find_constraint_resource(data_set->resources, rsc_id);
+ }
+
+ if (rsc == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "does not exist", id, rsc_id);
+ return;
+
+ } else if ((instance != NULL) && !pe_rsc_is_clone(rsc)) {
+ pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
+ "is not a clone but instance '%s' was requested",
+ id, rsc_id, instance);
+ return;
+ }
+
+ if (instance != NULL) {
+ rsc = find_clone_instance(rsc, instance);
+ if (rsc == NULL) {
+ pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
+ "does not have an instance '%s'",
+ "'%s'", id, rsc_id, instance);
+ return;
+ }
+ }
+
+ rsc_ticket_new(id, rsc, ticket, state, loss_policy, data_set);
+}
+
+// \return Standard Pacemaker return code
+static int
+unpack_rsc_ticket_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
+ pe_working_set_t *data_set)
+{
+ const char *id = NULL;
+ const char *rsc_id = NULL;
+ const char *state = NULL;
+
+ pe_resource_t *rsc = NULL;
+ pe_tag_t *tag = NULL;
+
+ xmlNode *rsc_set = NULL;
+
+ *expanded_xml = NULL;
+
+ CRM_CHECK(xml_obj != NULL, return EINVAL);
+
+ id = ID(xml_obj);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return pcmk_rc_unpack_error;
+ }
+
+ // Check whether there are any resource sets with template or tag references
+ *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
+ if (*expanded_xml != NULL) {
+ crm_log_xml_trace(*expanded_xml, "Expanded rsc_ticket");
+ return pcmk_rc_ok;
+ }
+
+ rsc_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
+ if (rsc_id == NULL) {
+ return pcmk_rc_ok;
+ }
+
+ if (!pcmk__valid_resource_or_tag(data_set, rsc_id, &rsc, &tag)) {
+ pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
+ "valid resource or tag", id, rsc_id);
+ return pcmk_rc_unpack_error;
+
+ } else if (rsc != NULL) {
+ // No template or tag is referenced
+ return pcmk_rc_ok;
+ }
+
+ state = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE);
+
+ *expanded_xml = copy_xml(xml_obj);
+
+ // Convert template/tag reference in "rsc" into resource_set under rsc_ticket
+ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, XML_COLOC_ATTR_SOURCE,
+ false, data_set)) {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ return pcmk_rc_unpack_error;
+ }
+
+ if (rsc_set != NULL) {
+ if (state != NULL) {
+ // Move "rsc-role" into converted resource_set as a "role" attribute
+ crm_xml_add(rsc_set, "role", state);
+ xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE);
+ }
+
+ } else {
+ free_xml(*expanded_xml);
+ *expanded_xml = NULL;
+ }
+
+ return pcmk_rc_ok;
+}
+
+void
+pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set)
+{
+ xmlNode *set = NULL;
+ bool any_sets = false;
+
+ const char *id = NULL;
+ const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET);
+ const char *loss_policy = crm_element_value(xml_obj, XML_TICKET_ATTR_LOSS_POLICY);
+
+ pe_ticket_t *ticket = NULL;
+
+ xmlNode *orig_xml = NULL;
+ xmlNode *expanded_xml = NULL;
+
+ CRM_CHECK(xml_obj != NULL, return);
+
+ id = ID(xml_obj);
+ if (id == NULL) {
+ pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
+ crm_element_name(xml_obj));
+ return;
+ }
+
+ if (data_set->tickets == NULL) {
+ data_set->tickets = pcmk__strkey_table(free, destroy_ticket);
+ }
+
+ if (ticket_str == NULL) {
+ pcmk__config_err("Ignoring constraint '%s' without ticket", id);
+ return;
+ } else {
+ ticket = g_hash_table_lookup(data_set->tickets, ticket_str);
+ }
+
+ if (ticket == NULL) {
+ ticket = ticket_new(ticket_str, data_set);
+ if (ticket == NULL) {
+ return;
+ }
+ }
+
+ if (unpack_rsc_ticket_tags(xml_obj, &expanded_xml,
+ data_set) != pcmk_rc_ok) {
+ return;
+ }
+ if (expanded_xml != NULL) {
+ orig_xml = xml_obj;
+ xml_obj = expanded_xml;
+ }
+
+ for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL;
+ set = crm_next_same_xml(set)) {
+
+ any_sets = true;
+ set = expand_idref(set, data_set->input);
+ if ((set == NULL) // Configuration error, message already logged
+ || (unpack_rsc_ticket_set(set, ticket, loss_policy,
+ data_set) != pcmk_rc_ok)) {
+ if (expanded_xml != NULL) {
+ free_xml(expanded_xml);
+ }
+ return;
+ }
+ }
+
+ if (expanded_xml) {
+ free_xml(expanded_xml);
+ xml_obj = orig_xml;
+ }
+
+ if (!any_sets) {
+ unpack_simple_rsc_ticket(xml_obj, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Ban resource from a node if it doesn't have a promotion ticket
+ *
+ * If a resource has tickets for the promoted role, and the ticket is either not
+ * granted or set to standby, then ban the resource from all nodes.
+ *
+ * \param[in,out] rsc Resource to check
+ */
+void
+pcmk__require_promotion_tickets(pe_resource_t *rsc)
+{
+ for (GList *item = rsc->rsc_tickets; item != NULL; item = item->next) {
+ rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) item->data;
+
+ if ((rsc_ticket->role == RSC_ROLE_PROMOTED)
+ && (!rsc_ticket->ticket->granted || rsc_ticket->ticket->standby)) {
+ resource_location(rsc, NULL, -INFINITY,
+ "__stateful_without_ticket__", rsc->cluster);
+ }
+ }
+}
diff --git a/lib/pacemaker/pcmk_sched_utilization.c b/lib/pacemaker/pcmk_sched_utilization.c
new file mode 100644
index 0000000..0a4bec3
--- /dev/null
+++ b/lib/pacemaker/pcmk_sched_utilization.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright 2014-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/msg_xml.h>
+#include <pacemaker-internal.h>
+
+#include "libpacemaker_private.h"
+
+// Name for a pseudo-op to use in ordering constraints for utilization
+#define LOAD_STOPPED "load_stopped"
+
+/*!
+ * \internal
+ * \brief Get integer utilization from a string
+ *
+ * \param[in] s String representation of a node utilization value
+ *
+ * \return Integer equivalent of \p s
+ * \todo It would make sense to restrict utilization values to nonnegative
+ * integers, but the documentation just says "integers" and we didn't
+ * restrict them initially, so for backward compatibility, allow any
+ * integer.
+ */
+static int
+utilization_value(const char *s)
+{
+ int value = 0;
+
+ if ((s != NULL) && (pcmk__scan_min_int(s, &value, INT_MIN) == EINVAL)) {
+ pe_warn("Using 0 for utilization instead of invalid value '%s'", value);
+ value = 0;
+ }
+ return value;
+}
+
+
+/*
+ * Functions for comparing node capacities
+ */
+
+struct compare_data {
+ const pe_node_t *node1;
+ const pe_node_t *node2;
+ bool node2_only;
+ int result;
+};
+
+/*!
+ * \internal
+ * \brief Compare a single utilization attribute for two nodes
+ *
+ * Compare one utilization attribute for two nodes, incrementing the result if
+ * the first node has greater capacity, and decrementing it if the second node
+ * has greater capacity.
+ *
+ * \param[in] key Utilization attribute name to compare
+ * \param[in] value Utilization attribute value to compare
+ * \param[in,out] user_data Comparison data (as struct compare_data*)
+ */
+static void
+compare_utilization_value(gpointer key, gpointer value, gpointer user_data)
+{
+ int node1_capacity = 0;
+ int node2_capacity = 0;
+ struct compare_data *data = user_data;
+ const char *node2_value = NULL;
+
+ if (data->node2_only) {
+ if (g_hash_table_lookup(data->node1->details->utilization, key)) {
+ return; // We've already compared this attribute
+ }
+ } else {
+ node1_capacity = utilization_value((const char *) value);
+ }
+
+ node2_value = g_hash_table_lookup(data->node2->details->utilization, key);
+ node2_capacity = utilization_value(node2_value);
+
+ if (node1_capacity > node2_capacity) {
+ data->result--;
+ } else if (node1_capacity < node2_capacity) {
+ data->result++;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Compare utilization capacities of two nodes
+ *
+ * \param[in] node1 First node to compare
+ * \param[in] node2 Second node to compare
+ *
+ * \return Negative integer if node1 has more free capacity,
+ * 0 if the capacities are equal, or a positive integer
+ * if node2 has more free capacity
+ */
+int
+pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2)
+{
+ struct compare_data data = {
+ .node1 = node1,
+ .node2 = node2,
+ .node2_only = false,
+ .result = 0,
+ };
+
+ // Compare utilization values that node1 and maybe node2 have
+ g_hash_table_foreach(node1->details->utilization, compare_utilization_value,
+ &data);
+
+ // Compare utilization values that only node2 has
+ data.node2_only = true;
+ g_hash_table_foreach(node2->details->utilization, compare_utilization_value,
+ &data);
+
+ return data.result;
+}
+
+
+/*
+ * Functions for updating node capacities
+ */
+
+struct calculate_data {
+ GHashTable *current_utilization;
+ bool plus;
+};
+
+/*!
+ * \internal
+ * \brief Update a single utilization attribute with a new value
+ *
+ * \param[in] key Name of utilization attribute to update
+ * \param[in] value Value to add or substract
+ * \param[in,out] user_data Calculation data (as struct calculate_data *)
+ */
+static void
+update_utilization_value(gpointer key, gpointer value, gpointer user_data)
+{
+ int result = 0;
+ const char *current = NULL;
+ struct calculate_data *data = user_data;
+
+ current = g_hash_table_lookup(data->current_utilization, key);
+ if (data->plus) {
+ result = utilization_value(current) + utilization_value(value);
+ } else if (current) {
+ result = utilization_value(current) - utilization_value(value);
+ }
+ g_hash_table_replace(data->current_utilization,
+ strdup(key), pcmk__itoa(result));
+}
+
+/*!
+ * \internal
+ * \brief Subtract a resource's utilization from node capacity
+ *
+ * \param[in,out] current_utilization Current node utilization attributes
+ * \param[in] rsc Resource with utilization to subtract
+ */
+void
+pcmk__consume_node_capacity(GHashTable *current_utilization,
+ const pe_resource_t *rsc)
+{
+ struct calculate_data data = {
+ .current_utilization = current_utilization,
+ .plus = false,
+ };
+
+ g_hash_table_foreach(rsc->utilization, update_utilization_value, &data);
+}
+
+/*!
+ * \internal
+ * \brief Add a resource's utilization to node capacity
+ *
+ * \param[in,out] current_utilization Current node utilization attributes
+ * \param[in] rsc Resource with utilization to add
+ */
+void
+pcmk__release_node_capacity(GHashTable *current_utilization,
+ const pe_resource_t *rsc)
+{
+ struct calculate_data data = {
+ .current_utilization = current_utilization,
+ .plus = true,
+ };
+
+ g_hash_table_foreach(rsc->utilization, update_utilization_value, &data);
+}
+
+
+/*
+ * Functions for checking for sufficient node capacity
+ */
+
+struct capacity_data {
+ const pe_node_t *node;
+ const char *rsc_id;
+ bool is_enough;
+};
+
+/*!
+ * \internal
+ * \brief Check whether a single utilization attribute has sufficient capacity
+ *
+ * \param[in] key Name of utilization attribute to check
+ * \param[in] value Amount of utilization required
+ * \param[in,out] user_data Capacity data (as struct capacity_data *)
+ */
+static void
+check_capacity(gpointer key, gpointer value, gpointer user_data)
+{
+ int required = 0;
+ int remaining = 0;
+ const char *node_value_s = NULL;
+ struct capacity_data *data = user_data;
+
+ node_value_s = g_hash_table_lookup(data->node->details->utilization, key);
+
+ required = utilization_value(value);
+ remaining = utilization_value(node_value_s);
+
+ if (required > remaining) {
+ crm_debug("Remaining capacity for %s on %s (%d) is insufficient "
+ "for resource %s usage (%d)",
+ (const char *) key, pe__node_name(data->node), remaining,
+ data->rsc_id, required);
+ data->is_enough = false;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node has sufficient capacity for a resource
+ *
+ * \param[in] node Node to check
+ * \param[in] rsc_id ID of resource to check (for debug logs only)
+ * \param[in] utilization Required utilization amounts
+ *
+ * \return true if node has sufficient capacity for resource, otherwise false
+ */
+static bool
+have_enough_capacity(const pe_node_t *node, const char *rsc_id,
+ GHashTable *utilization)
+{
+ struct capacity_data data = {
+ .node = node,
+ .rsc_id = rsc_id,
+ .is_enough = true,
+ };
+
+ g_hash_table_foreach(utilization, check_capacity, &data);
+ return data.is_enough;
+}
+
+/*!
+ * \internal
+ * \brief Sum the utilization requirements of a list of resources
+ *
+ * \param[in] orig_rsc Resource being allocated (for logging purposes)
+ * \param[in] rscs Resources whose utilization should be summed
+ *
+ * \return Newly allocated hash table with sum of all utilization values
+ * \note It is the caller's responsibility to free the return value using
+ * g_hash_table_destroy().
+ */
+static GHashTable *
+sum_resource_utilization(const pe_resource_t *orig_rsc, GList *rscs)
+{
+ GHashTable *utilization = pcmk__strkey_table(free, free);
+
+ for (GList *iter = rscs; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ rsc->cmds->add_utilization(rsc, orig_rsc, rscs, utilization);
+ }
+ return utilization;
+}
+
+/*!
+ * \internal
+ * \brief Ban resource from nodes with insufficient utilization capacity
+ *
+ * \param[in,out] rsc Resource to check
+ *
+ * \return Allowed node for \p rsc with most spare capacity, if there are no
+ * nodes with enough capacity for \p rsc and all its colocated resources
+ */
+const pe_node_t *
+pcmk__ban_insufficient_capacity(pe_resource_t *rsc)
+{
+ bool any_capable = false;
+ char *rscs_id = NULL;
+ pe_node_t *node = NULL;
+ const pe_node_t *most_capable_node = NULL;
+ GList *colocated_rscs = NULL;
+ GHashTable *unallocated_utilization = NULL;
+ GHashTableIter iter;
+
+ CRM_CHECK(rsc != NULL, return NULL);
+
+ // The default placement strategy ignores utilization
+ if (pcmk__str_eq(rsc->cluster->placement_strategy, "default",
+ pcmk__str_casei)) {
+ return NULL;
+ }
+
+ // Check whether any resources are colocated with this one
+ colocated_rscs = rsc->cmds->colocated_resources(rsc, NULL, NULL);
+ if (colocated_rscs == NULL) {
+ return NULL;
+ }
+
+ rscs_id = crm_strdup_printf("%s and its colocated resources", rsc->id);
+
+ // If rsc isn't in the list, add it so we include its utilization
+ if (g_list_find(colocated_rscs, rsc) == NULL) {
+ colocated_rscs = g_list_append(colocated_rscs, rsc);
+ }
+
+ // Sum utilization of colocated resources that haven't been allocated yet
+ unallocated_utilization = sum_resource_utilization(rsc, colocated_rscs);
+
+ // Check whether any node has enough capacity for all the resources
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (!pcmk__node_available(node, true, false)) {
+ continue;
+ }
+
+ if (have_enough_capacity(node, rscs_id, unallocated_utilization)) {
+ any_capable = true;
+ }
+
+ // Keep track of node with most free capacity
+ if ((most_capable_node == NULL)
+ || (pcmk__compare_node_capacities(node, most_capable_node) < 0)) {
+ most_capable_node = node;
+ }
+ }
+
+ if (any_capable) {
+ // If so, ban resource from any node with insufficient capacity
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (pcmk__node_available(node, true, false)
+ && !have_enough_capacity(node, rscs_id,
+ unallocated_utilization)) {
+ pe_rsc_debug(rsc, "%s does not have enough capacity for %s",
+ pe__node_name(node), rscs_id);
+ resource_location(rsc, node, -INFINITY, "__limit_utilization__",
+ rsc->cluster);
+ }
+ }
+ most_capable_node = NULL;
+
+ } else {
+ // Otherwise, ban from nodes with insufficient capacity for rsc alone
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
+ if (pcmk__node_available(node, true, false)
+ && !have_enough_capacity(node, rsc->id, rsc->utilization)) {
+ pe_rsc_debug(rsc, "%s does not have enough capacity for %s",
+ pe__node_name(node), rsc->id);
+ resource_location(rsc, node, -INFINITY, "__limit_utilization__",
+ rsc->cluster);
+ }
+ }
+ }
+
+ g_hash_table_destroy(unallocated_utilization);
+ g_list_free(colocated_rscs);
+ free(rscs_id);
+
+ pe__show_node_weights(true, rsc, "Post-utilization",
+ rsc->allowed_nodes, rsc->cluster);
+ return most_capable_node;
+}
+
+/*!
+ * \internal
+ * \brief Create a new load_stopped pseudo-op for a node
+ *
+ * \param[in] node Node to create op for
+ * \param[in,out] data_set Cluster working set
+ *
+ * \return Newly created load_stopped op
+ */
+static pe_action_t *
+new_load_stopped_op(const pe_node_t *node, pe_working_set_t *data_set)
+{
+ char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s",
+ node->details->uname);
+ pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
+
+ if (load_stopped->node == NULL) {
+ load_stopped->node = pe__copy_node(node);
+ pe__clear_action_flags(load_stopped, pe_action_optional);
+ }
+ free(load_stopped_task);
+ return load_stopped;
+}
+
+/*!
+ * \internal
+ * \brief Create utilization-related internal constraints for a resource
+ *
+ * \param[in,out] rsc Resource to create constraints for
+ * \param[in] allowed_nodes List of allowed next nodes for \p rsc
+ */
+void
+pcmk__create_utilization_constraints(pe_resource_t *rsc,
+ const GList *allowed_nodes)
+{
+ const GList *iter = NULL;
+ const pe_node_t *node = NULL;
+ pe_action_t *load_stopped = NULL;
+
+ pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s",
+ rsc->id, rsc->cluster->placement_strategy);
+
+ // "stop rsc then load_stopped" constraints for current nodes
+ for (iter = rsc->running_on; iter != NULL; iter = iter->next) {
+ node = (const pe_node_t *) iter->data;
+ load_stopped = new_load_stopped_op(node, rsc->cluster);
+ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, NULL, load_stopped,
+ pe_order_load, rsc->cluster);
+ }
+
+ // "load_stopped then start/migrate_to rsc" constraints for allowed nodes
+ for (iter = allowed_nodes; iter; iter = iter->next) {
+ node = (const pe_node_t *) iter->data;
+ load_stopped = new_load_stopped_op(node, rsc->cluster);
+ pcmk__new_ordering(NULL, NULL, load_stopped, rsc, start_key(rsc), NULL,
+ pe_order_load, rsc->cluster);
+ pcmk__new_ordering(NULL, NULL, load_stopped,
+ rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL,
+ pe_order_load, rsc->cluster);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Output node capacities if enabled
+ *
+ * \param[in] desc Prefix for output
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set)
+{
+ if (!pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
+ return;
+ }
+ for (const GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ const pe_node_t *node = (const pe_node_t *) iter->data;
+ pcmk__output_t *out = data_set->priv;
+
+ out->message(out, "node-capacity", node, desc);
+ }
+}
diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c
new file mode 100644
index 0000000..b4e670d
--- /dev/null
+++ b/lib/pacemaker/pcmk_scheduler.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <crm/crm.h>
+#include <crm/cib.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <glib.h>
+
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+#include "libpacemaker_private.h"
+
+CRM_TRACE_INIT_DATA(pacemaker);
+
+/*!
+ * \internal
+ * \brief Do deferred action checks after allocation
+ *
+ * When unpacking the resource history, the scheduler checks for resource
+ * configurations that have changed since an action was run. However, at that
+ * time, bundles using the REMOTE_CONTAINER_HACK don't have their final
+ * parameter information, so instead they add a deferred check to a list. This
+ * function processes one entry in that list.
+ *
+ * \param[in,out] rsc Resource that action history is for
+ * \param[in,out] node Node that action history is for
+ * \param[in] rsc_op Action history entry
+ * \param[in] check Type of deferred check to do
+ */
+static void
+check_params(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_op,
+ enum pe_check_parameters check)
+{
+ const char *reason = NULL;
+ op_digest_cache_t *digest_data = NULL;
+
+ switch (check) {
+ case pe_check_active:
+ if (pcmk__check_action_config(rsc, node, rsc_op)
+ && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL)) {
+ reason = "action definition changed";
+ }
+ break;
+
+ case pe_check_last_failure:
+ digest_data = rsc_action_digest_cmp(rsc, rsc_op, node,
+ rsc->cluster);
+ switch (digest_data->rc) {
+ case RSC_DIGEST_UNKNOWN:
+ crm_trace("Resource %s history entry %s on %s has "
+ "no digest to compare",
+ rsc->id, ID(rsc_op), node->details->id);
+ break;
+ case RSC_DIGEST_MATCH:
+ break;
+ default:
+ reason = "resource parameters have changed";
+ break;
+ }
+ break;
+ }
+ if (reason != NULL) {
+ pe__clear_failcount(rsc, node, reason, rsc->cluster);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource has failcount clearing scheduled on a node
+ *
+ * \param[in] node Node to check
+ * \param[in] rsc Resource to check
+ *
+ * \return true if \p rsc has failcount clearing scheduled on \p node,
+ * otherwise false
+ */
+static bool
+failcount_clear_action_exists(const pe_node_t *node, const pe_resource_t *rsc)
+{
+ GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
+
+ if (list != NULL) {
+ g_list_free(list);
+ return true;
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Ban a resource from a node if it reached its failure threshold there
+ *
+ * \param[in,out] rsc Resource to check failure threshold for
+ * \param[in] node Node to check \p rsc on
+ */
+static void
+check_failure_threshold(pe_resource_t *rsc, const pe_node_t *node)
+{
+ // If this is a collective resource, apply recursively to children instead
+ if (rsc->children != NULL) {
+ g_list_foreach(rsc->children, (GFunc) check_failure_threshold,
+ (gpointer) node);
+ return;
+
+ } else if (failcount_clear_action_exists(node, rsc)) {
+ /* Don't force the resource away from this node due to a failcount
+ * that's going to be cleared.
+ *
+ * @TODO Failcount clearing can be scheduled in
+ * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in
+ * schedule_resource_actions() via check_params(). This runs well before
+ * then, so it cannot detect those, meaning we might check the migration
+ * threshold when we shouldn't. Worst case, we stop or move the
+ * resource, then move it back in the next transition.
+ */
+ return;
+
+ } else {
+ pe_resource_t *failed = NULL;
+
+ if (pcmk__threshold_reached(rsc, node, &failed)) {
+ resource_location(failed, node, -INFINITY, "__fail_limit__",
+ rsc->cluster);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief If resource has exclusive discovery, ban node if not allowed
+ *
+ * Location constraints have a resource-discovery option that allows users to
+ * specify where probes are done for the affected resource. If this is set to
+ * exclusive, probes will only be done on nodes listed in exclusive constraints.
+ * This function bans the resource from the node if the node is not listed.
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in] node Node to check \p rsc on
+ */
+static void
+apply_exclusive_discovery(pe_resource_t *rsc, const pe_node_t *node)
+{
+ if (rsc->exclusive_discover
+ || pe__const_top_resource(rsc, false)->exclusive_discover) {
+ pe_node_t *match = NULL;
+
+ // If this is a collective resource, apply recursively to children
+ g_list_foreach(rsc->children, (GFunc) apply_exclusive_discovery,
+ (gpointer) node);
+
+ match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
+ if ((match != NULL)
+ && (match->rsc_discover_mode != pe_discover_exclusive)) {
+ match->weight = -INFINITY;
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Apply stickiness to a resource if appropriate
+ *
+ * \param[in,out] rsc Resource to check for stickiness
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+apply_stickiness(pe_resource_t *rsc, pe_working_set_t *data_set)
+{
+ pe_node_t *node = NULL;
+
+ // If this is a collective resource, apply recursively to children instead
+ if (rsc->children != NULL) {
+ g_list_foreach(rsc->children, (GFunc) apply_stickiness, data_set);
+ return;
+ }
+
+ /* A resource is sticky if it is managed, has stickiness configured, and is
+ * active on a single node.
+ */
+ if (!pcmk_is_set(rsc->flags, pe_rsc_managed)
+ || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) {
+ return;
+ }
+
+ node = rsc->running_on->data;
+
+ /* In a symmetric cluster, stickiness can always be used. In an
+ * asymmetric cluster, we have to check whether the resource is still
+ * allowed on the node, so we don't keep the resource somewhere it is no
+ * longer explicitly enabled.
+ */
+ if (!pcmk_is_set(rsc->cluster->flags, pe_flag_symmetric_cluster)
+ && (pe_hash_table_lookup(rsc->allowed_nodes,
+ node->details->id) == NULL)) {
+ pe_rsc_debug(rsc,
+ "Ignoring %s stickiness because the cluster is "
+ "asymmetric and %s is not explicitly allowed",
+ rsc->id, pe__node_name(node));
+ return;
+ }
+
+ pe_rsc_debug(rsc, "Resource %s has %d stickiness on %s",
+ rsc->id, rsc->stickiness, pe__node_name(node));
+ resource_location(rsc, node, rsc->stickiness, "stickiness", data_set);
+}
+
+/*!
+ * \internal
+ * \brief Apply shutdown locks for all resources as appropriate
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+apply_shutdown_locks(pe_working_set_t *data_set)
+{
+ if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
+ return;
+ }
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ rsc->cmds->shutdown_lock(rsc);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Calculate the number of available nodes in the cluster
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+count_available_nodes(pe_working_set_t *data_set)
+{
+ if (pcmk_is_set(data_set->flags, pe_flag_no_compat)) {
+ return;
+ }
+
+ // @COMPAT for API backward compatibility only (cluster does not use value)
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+
+ if ((node != NULL) && (node->weight >= 0) && node->details->online
+ && (node->details->type != node_ping)) {
+ data_set->max_valid_nodes++;
+ }
+ }
+ crm_trace("Online node count: %d", data_set->max_valid_nodes);
+}
+
+/*
+ * \internal
+ * \brief Apply node-specific scheduling criteria
+ *
+ * After the CIB has been unpacked, process node-specific scheduling criteria
+ * including shutdown locks, location constraints, resource stickiness,
+ * migration thresholds, and exclusive resource discovery.
+ */
+static void
+apply_node_criteria(pe_working_set_t *data_set)
+{
+ crm_trace("Applying node-specific scheduling criteria");
+ apply_shutdown_locks(data_set);
+ count_available_nodes(data_set);
+ pcmk__apply_locations(data_set);
+ g_list_foreach(data_set->resources, (GFunc) apply_stickiness, data_set);
+
+ for (GList *node_iter = data_set->nodes; node_iter != NULL;
+ node_iter = node_iter->next) {
+ for (GList *rsc_iter = data_set->resources; rsc_iter != NULL;
+ rsc_iter = rsc_iter->next) {
+ pe_node_t *node = (pe_node_t *) node_iter->data;
+ pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data;
+
+ check_failure_threshold(rsc, node);
+ apply_exclusive_discovery(rsc, node);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Allocate resources to nodes
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+allocate_resources(pe_working_set_t *data_set)
+{
+ GList *iter = NULL;
+
+ crm_trace("Allocating resources to nodes");
+
+ if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) {
+ pcmk__sort_resources(data_set);
+ }
+ pcmk__show_node_capacities("Original", data_set);
+
+ if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
+ /* Allocate remote connection resources first (which will also allocate
+ * any colocation dependencies). If the connection is migrating, always
+ * prefer the partial migration target.
+ */
+ for (iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ if (rsc->is_remote_node) {
+ pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
+ rsc->id);
+ rsc->cmds->assign(rsc, rsc->partial_migration_target);
+ }
+ }
+ }
+
+ /* now do the rest of the resources */
+ for (iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ if (!rsc->is_remote_node) {
+ pe_rsc_trace(rsc, "Allocating %s resource '%s'",
+ crm_element_name(rsc->xml), rsc->id);
+ rsc->cmds->assign(rsc, NULL);
+ }
+ }
+
+ pcmk__show_node_capacities("Remaining", data_set);
+}
+
+/*!
+ * \internal
+ * \brief Schedule fail count clearing on online nodes if resource is orphaned
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+clear_failcounts_if_orphaned(pe_resource_t *rsc, pe_working_set_t *data_set)
+{
+ if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
+ return;
+ }
+ crm_trace("Clear fail counts for orphaned resource %s", rsc->id);
+
+ /* There's no need to recurse into rsc->children because those
+ * should just be unallocated clone instances.
+ */
+
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+ pe_action_t *clear_op = NULL;
+
+ if (!node->details->online) {
+ continue;
+ }
+ if (pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL) == 0) {
+ continue;
+ }
+
+ clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set);
+
+ /* We can't use order_action_then_stop() here because its
+ * pe_order_preserve breaks things
+ */
+ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc),
+ NULL, pe_order_optional, data_set);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Schedule any resource actions needed
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+schedule_resource_actions(pe_working_set_t *data_set)
+{
+ // Process deferred action checks
+ pe__foreach_param_check(data_set, check_params);
+ pe__free_param_checks(data_set);
+
+ if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
+ crm_trace("Scheduling probes");
+ pcmk__schedule_probes(data_set);
+ }
+
+ if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
+ g_list_foreach(data_set->resources,
+ (GFunc) clear_failcounts_if_orphaned, data_set);
+ }
+
+ crm_trace("Scheduling resource actions");
+ for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
+ pe_resource_t *rsc = (pe_resource_t *) iter->data;
+
+ rsc->cmds->create_actions(rsc);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a resource or any of its descendants are managed
+ *
+ * \param[in] rsc Resource to check
+ *
+ * \return true if resource or any descendant is managed, otherwise false
+ */
+static bool
+is_managed(const pe_resource_t *rsc)
+{
+ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ return true;
+ }
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ if (is_managed((pe_resource_t *) iter->data)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether any resources in the cluster are managed
+ *
+ * \param[in] data_set Cluster working set
+ *
+ * \return true if any resource is managed, otherwise false
+ */
+static bool
+any_managed_resources(const pe_working_set_t *data_set)
+{
+ for (const GList *iter = data_set->resources;
+ iter != NULL; iter = iter->next) {
+ if (is_managed((const pe_resource_t *) iter->data)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node requires fencing
+ *
+ * \param[in] node Node to check
+ * \param[in] have_managed Whether any resource in cluster is managed
+ * \param[in] data_set Cluster working set
+ *
+ * \return true if \p node should be fenced, otherwise false
+ */
+static bool
+needs_fencing(const pe_node_t *node, bool have_managed,
+ const pe_working_set_t *data_set)
+{
+ return have_managed && node->details->unclean
+ && pe_can_fence(data_set, node);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node requires shutdown
+ *
+ * \param[in] node Node to check
+ *
+ * \return true if \p node should be shut down, otherwise false
+ */
+static bool
+needs_shutdown(const pe_node_t *node)
+{
+ if (pe__is_guest_or_remote_node(node)) {
+ /* Do not send shutdown actions for Pacemaker Remote nodes.
+ * @TODO We might come up with a good use for this in the future.
+ */
+ return false;
+ }
+ return node->details->online && node->details->shutdown;
+}
+
+/*!
+ * \internal
+ * \brief Track and order non-DC fencing
+ *
+ * \param[in,out] list List of existing non-DC fencing actions
+ * \param[in,out] action Fencing action to prepend to \p list
+ * \param[in] data_set Cluster working set
+ *
+ * \return (Possibly new) head of \p list
+ */
+static GList *
+add_nondc_fencing(GList *list, pe_action_t *action,
+ const pe_working_set_t *data_set)
+{
+ if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)
+ && (list != NULL)) {
+ /* Concurrent fencing is disabled, so order each non-DC
+ * fencing in a chain. If there is any DC fencing or
+ * shutdown, it will be ordered after the last action in the
+ * chain later.
+ */
+ order_actions((pe_action_t *) list->data, action, pe_order_optional);
+ }
+ return g_list_prepend(list, action);
+}
+
+/*!
+ * \internal
+ * \brief Schedule a node for fencing
+ *
+ * \param[in,out] node Node that requires fencing
+ * \param[in,out] data_set Cluster working set
+ */
+static pe_action_t *
+schedule_fencing(pe_node_t *node, pe_working_set_t *data_set)
+{
+ pe_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean",
+ FALSE, data_set);
+
+ pe_warn("Scheduling node %s for fencing", pe__node_name(node));
+ pcmk__order_vs_fence(fencing, data_set);
+ return fencing;
+}
+
+/*!
+ * \internal
+ * \brief Create and order node fencing and shutdown actions
+ *
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+schedule_fencing_and_shutdowns(pe_working_set_t *data_set)
+{
+ pe_action_t *dc_down = NULL;
+ bool integrity_lost = false;
+ bool have_managed = any_managed_resources(data_set);
+ GList *fencing_ops = NULL;
+ GList *shutdown_ops = NULL;
+
+ crm_trace("Scheduling fencing and shutdowns as needed");
+ if (!have_managed) {
+ crm_notice("No fencing will be done until there are resources to manage");
+ }
+
+ // Check each node for whether it needs fencing or shutdown
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+ pe_action_t *fencing = NULL;
+
+ /* Guest nodes are "fenced" by recovering their container resource,
+ * so handle them separately.
+ */
+ if (pe__is_guest_node(node)) {
+ if (node->details->remote_requires_reset && have_managed
+ && pe_can_fence(data_set, node)) {
+ pcmk__fence_guest(node);
+ }
+ continue;
+ }
+
+ if (needs_fencing(node, have_managed, data_set)) {
+ fencing = schedule_fencing(node, data_set);
+
+ // Track DC and non-DC fence actions separately
+ if (node->details->is_dc) {
+ dc_down = fencing;
+ } else {
+ fencing_ops = add_nondc_fencing(fencing_ops, fencing, data_set);
+ }
+
+ } else if (needs_shutdown(node)) {
+ pe_action_t *down_op = pcmk__new_shutdown_action(node);
+
+ // Track DC and non-DC shutdown actions separately
+ if (node->details->is_dc) {
+ dc_down = down_op;
+ } else {
+ shutdown_ops = g_list_prepend(shutdown_ops, down_op);
+ }
+ }
+
+ if ((fencing == NULL) && node->details->unclean) {
+ integrity_lost = true;
+ pe_warn("Node %s is unclean but cannot be fenced",
+ pe__node_name(node));
+ }
+ }
+
+ if (integrity_lost) {
+ if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
+ pe_warn("Resource functionality and data integrity cannot be "
+ "guaranteed (configure, enable, and test fencing to "
+ "correct this)");
+
+ } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
+ crm_notice("Unclean nodes will not be fenced until quorum is "
+ "attained or no-quorum-policy is set to ignore");
+ }
+ }
+
+ if (dc_down != NULL) {
+ /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
+ * DC elections. However, we don't want to order non-DC shutdowns before
+ * a DC *fencing*, because even though we don't want a node that's
+ * shutting down to become DC, the DC fencing could be ordered before a
+ * clone stop that's also ordered before the shutdowns, thus leading to
+ * a graph loop.
+ */
+ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
+ pcmk__order_after_each(dc_down, shutdown_ops);
+ }
+
+ // Order any non-DC fencing before any DC fencing or shutdown
+
+ if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) {
+ /* With concurrent fencing, order each non-DC fencing action
+ * separately before any DC fencing or shutdown.
+ */
+ pcmk__order_after_each(dc_down, fencing_ops);
+ } else if (fencing_ops != NULL) {
+ /* Without concurrent fencing, the non-DC fencing actions are
+ * already ordered relative to each other, so we just need to order
+ * the DC fencing after the last action in the chain (which is the
+ * first item in the list).
+ */
+ order_actions((pe_action_t *) fencing_ops->data, dc_down,
+ pe_order_optional);
+ }
+ }
+ g_list_free(fencing_ops);
+ g_list_free(shutdown_ops);
+}
+
+static void
+log_resource_details(pe_working_set_t *data_set)
+{
+ pcmk__output_t *out = data_set->priv;
+ GList *all = NULL;
+
+ /* We need a list of nodes that we are allowed to output information for.
+ * This is necessary because out->message for all the resource-related
+ * messages expects such a list, due to the `crm_mon --node=` feature. Here,
+ * we just make it a list of all the nodes.
+ */
+ all = g_list_prepend(all, (gpointer) "*");
+
+ for (GList *item = data_set->resources; item != NULL; item = item->next) {
+ pe_resource_t *rsc = (pe_resource_t *) item->data;
+
+ // Log all resources except inactive orphans
+ if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)
+ || (rsc->role != RSC_ROLE_STOPPED)) {
+ out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all);
+ }
+ }
+
+ g_list_free(all);
+}
+
+static void
+log_all_actions(pe_working_set_t *data_set)
+{
+ /* This only ever outputs to the log, so ignore whatever output object was
+ * previously set and just log instead.
+ */
+ pcmk__output_t *prev_out = data_set->priv;
+ pcmk__output_t *out = NULL;
+
+ if (pcmk__log_output_new(&out) != pcmk_rc_ok) {
+ return;
+ }
+
+ pe__register_messages(out);
+ pcmk__register_lib_messages(out);
+ pcmk__output_set_log_level(out, LOG_NOTICE);
+ data_set->priv = out;
+
+ out->begin_list(out, NULL, NULL, "Actions");
+ pcmk__output_actions(data_set);
+ out->end_list(out);
+ out->finish(out, CRM_EX_OK, true, NULL);
+ pcmk__output_free(out);
+
+ data_set->priv = prev_out;
+}
+
+/*!
+ * \internal
+ * \brief Log all required but unrunnable actions at trace level
+ *
+ * \param[in] data_set Cluster working set
+ */
+static void
+log_unrunnable_actions(const pe_working_set_t *data_set)
+{
+ const uint64_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo;
+
+ crm_trace("Required but unrunnable actions:");
+ for (const GList *iter = data_set->actions;
+ iter != NULL; iter = iter->next) {
+
+ const pe_action_t *action = (const pe_action_t *) iter->data;
+
+ if (!pcmk_any_flags_set(action->flags, flags)) {
+ pcmk__log_action("\t", action, true);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Unpack the CIB for scheduling
+ *
+ * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked)
+ * \param[in] flags Working set flags to set in addition to defaults
+ * \param[in,out] data_set Cluster working set
+ */
+static void
+unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set)
+{
+ const char* localhost_save = NULL;
+
+ if (pcmk_is_set(data_set->flags, pe_flag_have_status)) {
+ crm_trace("Reusing previously calculated cluster status");
+ pe__set_working_set_flags(data_set, flags);
+ return;
+ }
+
+ if (data_set->localhost) {
+ localhost_save = data_set->localhost;
+ }
+
+ CRM_ASSERT(cib != NULL);
+ crm_trace("Calculating cluster status");
+
+ /* This will zero the entire struct without freeing anything first, so
+ * callers should never call pcmk__schedule_actions() with a populated data
+ * set unless pe_flag_have_status is set (i.e. cluster_status() was
+ * previously called, whether directly or via pcmk__schedule_actions()).
+ */
+ set_working_set_defaults(data_set);
+
+ if (localhost_save) {
+ data_set->localhost = localhost_save;
+ }
+
+ pe__set_working_set_flags(data_set, flags);
+ data_set->input = cib;
+ cluster_status(data_set); // Sets pe_flag_have_status
+}
+
+/*!
+ * \internal
+ * \brief Run the scheduler for a given CIB
+ *
+ * \param[in,out] cib CIB XML to use as scheduler input
+ * \param[in] flags Working set flags to set in addition to defaults
+ * \param[in,out] data_set Cluster working set
+ */
+void
+pcmk__schedule_actions(xmlNode *cib, unsigned long long flags,
+ pe_working_set_t *data_set)
+{
+ unpack_cib(cib, flags, data_set);
+ pcmk__set_allocation_methods(data_set);
+ pcmk__apply_node_health(data_set);
+ pcmk__unpack_constraints(data_set);
+ if (pcmk_is_set(data_set->flags, pe_flag_check_config)) {
+ return;
+ }
+
+ if (!pcmk_is_set(data_set->flags, pe_flag_quick_location) &&
+ pcmk__is_daemon) {
+ log_resource_details(data_set);
+ }
+
+ apply_node_criteria(data_set);
+
+ if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) {
+ return;
+ }
+
+ pcmk__create_internal_constraints(data_set);
+ pcmk__handle_rsc_config_changes(data_set);
+ allocate_resources(data_set);
+ schedule_resource_actions(data_set);
+
+ /* Remote ordering constraints need to happen prior to calculating fencing
+ * because it is one more place we can mark nodes as needing fencing.
+ */
+ pcmk__order_remote_connection_actions(data_set);
+
+ schedule_fencing_and_shutdowns(data_set);
+ pcmk__apply_orderings(data_set);
+ log_all_actions(data_set);
+ pcmk__create_graph(data_set);
+
+ if (get_crm_log_level() == LOG_TRACE) {
+ log_unrunnable_actions(data_set);
+ }
+}
diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c
new file mode 100644
index 0000000..165c7d3
--- /dev/null
+++ b/lib/pacemaker/pcmk_simulate.c
@@ -0,0 +1,999 @@
+/*
+ * Copyright 2021-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/cib/internal.h>
+#include <crm/common/output.h>
+#include <crm/common/results.h>
+#include <crm/pengine/pe_types.h>
+#include <pacemaker-internal.h>
+#include <pacemaker.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "libpacemaker_private.h"
+
+static pcmk__output_t *out = NULL;
+static cib_t *fake_cib = NULL;
+static GList *fake_resource_list = NULL;
+static const GList *fake_op_fail_list = NULL;
+
+static void set_effective_date(pe_working_set_t *data_set, bool print_original,
+ const char *use_date);
+
+/*!
+ * \internal
+ * \brief Create an action name for use in a dot graph
+ *
+ * \param[in] action Action to create name for
+ * \param[in] verbose If true, add action ID to name
+ *
+ * \return Newly allocated string with action name
+ * \note It is the caller's responsibility to free the result.
+ */
+static char *
+create_action_name(const pe_action_t *action, bool verbose)
+{
+ char *action_name = NULL;
+ const char *prefix = "";
+ const char *action_host = NULL;
+ const char *clone_name = NULL;
+ const char *task = action->task;
+
+ if (action->node != NULL) {
+ action_host = action->node->details->uname;
+ } else if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
+ action_host = "<none>";
+ }
+
+ if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_none)) {
+ prefix = "Cancel ";
+ task = action->cancel_task;
+ }
+
+ if (action->rsc != NULL) {
+ clone_name = action->rsc->clone_name;
+ }
+
+ if (clone_name != NULL) {
+ char *key = NULL;
+ guint interval_ms = 0;
+
+ if (pcmk__guint_from_hash(action->meta,
+ XML_LRM_ATTR_INTERVAL_MS, 0,
+ &interval_ms) != pcmk_rc_ok) {
+ interval_ms = 0;
+ }
+
+ if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED,
+ NULL)) {
+ const char *n_type = g_hash_table_lookup(action->meta,
+ "notify_key_type");
+ const char *n_task = g_hash_table_lookup(action->meta,
+ "notify_key_operation");
+
+ CRM_ASSERT(n_type != NULL);
+ CRM_ASSERT(n_task != NULL);
+ key = pcmk__notify_key(clone_name, n_type, n_task);
+ } else {
+ key = pcmk__op_key(clone_name, task, interval_ms);
+ }
+
+ if (action_host != NULL) {
+ action_name = crm_strdup_printf("%s%s %s",
+ prefix, key, action_host);
+ } else {
+ action_name = crm_strdup_printf("%s%s", prefix, key);
+ }
+ free(key);
+
+ } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
+ const char *op = g_hash_table_lookup(action->meta, "stonith_action");
+
+ action_name = crm_strdup_printf("%s%s '%s' %s",
+ prefix, action->task, op, action_host);
+
+ } else if (action->rsc && action_host) {
+ action_name = crm_strdup_printf("%s%s %s",
+ prefix, action->uuid, action_host);
+
+ } else if (action_host) {
+ action_name = crm_strdup_printf("%s%s %s",
+ prefix, action->task, action_host);
+
+ } else {
+ action_name = crm_strdup_printf("%s", action->uuid);
+ }
+
+ if (verbose) {
+ char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id);
+
+ free(action_name);
+ action_name = with_id;
+ }
+ return action_name;
+}
+
+/*!
+ * \internal
+ * \brief Display the status of a cluster
+ *
+ * \param[in,out] data_set Cluster working set
+ * \param[in] show_opts How to modify display (as pcmk_show_opt_e flags)
+ * \param[in] section_opts Sections to display (as pcmk_section_e flags)
+ * \param[in] title What to use as list title
+ * \param[in] print_spacer Whether to display a spacer first
+ */
+static void
+print_cluster_status(pe_working_set_t *data_set, uint32_t show_opts,
+ uint32_t section_opts, const char *title, bool print_spacer)
+{
+ pcmk__output_t *out = data_set->priv;
+ GList *all = NULL;
+ crm_exit_t stonith_rc = 0;
+ enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid;
+
+ section_opts |= pcmk_section_nodes | pcmk_section_resources;
+ show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail;
+
+ all = g_list_prepend(all, (gpointer) "*");
+
+ PCMK__OUTPUT_SPACER_IF(out, print_spacer);
+ out->begin_list(out, NULL, NULL, "%s", title);
+ out->message(out, "cluster-status",
+ data_set, state, stonith_rc, NULL,
+ false, section_opts, show_opts, NULL, all, all);
+ out->end_list(out);
+
+ g_list_free(all);
+}
+
+/*!
+ * \internal
+ * \brief Display a summary of all actions scheduled in a transition
+ *
+ * \param[in,out] data_set Cluster working set (fully scheduled)
+ * \param[in] print_spacer Whether to display a spacer first
+ */
+static void
+print_transition_summary(pe_working_set_t *data_set, bool print_spacer)
+{
+ pcmk__output_t *out = data_set->priv;
+
+ PCMK__OUTPUT_SPACER_IF(out, print_spacer);
+ out->begin_list(out, NULL, NULL, "Transition Summary");
+ pcmk__output_actions(data_set);
+ out->end_list(out);
+}
+
+/*!
+ * \internal
+ * \brief Reset a cluster working set's input, output, date, and flags
+ *
+ * \param[in,out] data_set Cluster working set
+ * \param[in] input What to set as cluster input
+ * \param[in] out What to set as cluster output object
+ * \param[in] use_date What to set as cluster's current timestamp
+ * \param[in] flags Cluster flags to add (pe_flag_*)
+ */
+static void
+reset(pe_working_set_t *data_set, xmlNodePtr input, pcmk__output_t *out,
+ const char *use_date, unsigned int flags)
+{
+ data_set->input = input;
+ data_set->priv = out;
+ set_effective_date(data_set, true, use_date);
+ if (pcmk_is_set(flags, pcmk_sim_sanitized)) {
+ pe__set_working_set_flags(data_set, pe_flag_sanitized);
+ }
+ if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
+ pe__set_working_set_flags(data_set, pe_flag_show_scores);
+ }
+ if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
+ pe__set_working_set_flags(data_set, pe_flag_show_utilization);
+ }
+}
+
+/*!
+ * \brief Write out a file in dot(1) format describing the actions that will
+ * be taken by the scheduler in response to an input CIB file.
+ *
+ * \param[in,out] data_set Working set for the cluster
+ * \param[in] dot_file The filename to write
+ * \param[in] all_actions Write all actions, even those that are optional
+ * or are on unmanaged resources
+ * \param[in] verbose Add extra information, such as action IDs, to the
+ * output
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+write_sim_dotfile(pe_working_set_t *data_set, const char *dot_file,
+ bool all_actions, bool verbose)
+{
+ GList *gIter = NULL;
+ FILE *dot_strm = fopen(dot_file, "w");
+
+ if (dot_strm == NULL) {
+ return errno;
+ }
+
+ fprintf(dot_strm, " digraph \"g\" {\n");
+ for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+ const char *style = "dashed";
+ const char *font = "black";
+ const char *color = "black";
+ char *action_name = create_action_name(action, verbose);
+
+ if (pcmk_is_set(action->flags, pe_action_pseudo)) {
+ font = "orange";
+ }
+
+ if (pcmk_is_set(action->flags, pe_action_dumped)) {
+ style = "bold";
+ color = "green";
+
+ } else if ((action->rsc != NULL)
+ && !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
+ color = "red";
+ font = "purple";
+ if (!all_actions) {
+ goto do_not_write;
+ }
+
+ } else if (pcmk_is_set(action->flags, pe_action_optional)) {
+ color = "blue";
+ if (!all_actions) {
+ goto do_not_write;
+ }
+
+ } else {
+ color = "red";
+ CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pe_action_runnable));
+ }
+
+ pe__set_action_flags(action, pe_action_dumped);
+ fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n",
+ action_name, style, color, font);
+ do_not_write:
+ free(action_name);
+ }
+
+ for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
+ pe_action_t *action = (pe_action_t *) gIter->data;
+
+ GList *gIter2 = NULL;
+
+ for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) {
+ pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data;
+
+ char *before_name = NULL;
+ char *after_name = NULL;
+ const char *style = "dashed";
+ bool optional = true;
+
+ if (before->state == pe_link_dumped) {
+ optional = false;
+ style = "bold";
+ } else if (before->type == pe_order_none) {
+ continue;
+ } else if (pcmk_is_set(before->action->flags, pe_action_dumped)
+ && pcmk_is_set(action->flags, pe_action_dumped)
+ && before->type != pe_order_load) {
+ optional = false;
+ }
+
+ if (all_actions || !optional) {
+ before_name = create_action_name(before->action, verbose);
+ after_name = create_action_name(action, verbose);
+ fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n",
+ before_name, after_name, style);
+ free(before_name);
+ free(after_name);
+ }
+ }
+ }
+
+ fprintf(dot_strm, "}\n");
+ fflush(dot_strm);
+ fclose(dot_strm);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \brief Profile the configuration updates and scheduler actions in a single
+ * CIB file, printing the profiling timings.
+ *
+ * \note \p data_set->priv must have been set to a valid \p pcmk__output_t
+ * object before this function is called.
+ *
+ * \param[in] xml_file The CIB file to profile
+ * \param[in] repeat Number of times to run
+ * \param[in,out] data_set Working set for the cluster
+ * \param[in] use_date The date to set the cluster's time to (may be NULL)
+ */
+static void
+profile_file(const char *xml_file, long long repeat, pe_working_set_t *data_set,
+ const char *use_date)
+{
+ pcmk__output_t *out = data_set->priv;
+ xmlNode *cib_object = NULL;
+ clock_t start = 0;
+ clock_t end;
+ unsigned long long data_set_flags = pe_flag_no_compat;
+
+ CRM_ASSERT(out != NULL);
+
+ cib_object = filename2xml(xml_file);
+ start = clock();
+
+ if (pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS) == NULL) {
+ create_xml_node(cib_object, XML_CIB_TAG_STATUS);
+ }
+
+ if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
+ free_xml(cib_object);
+ return;
+ }
+
+ if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
+ free_xml(cib_object);
+ return;
+ }
+
+ if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
+ data_set_flags |= pe_flag_show_scores;
+ }
+ if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
+ data_set_flags |= pe_flag_show_utilization;
+ }
+
+ for (int i = 0; i < repeat; ++i) {
+ xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object);
+
+ data_set->input = input;
+ set_effective_date(data_set, false, use_date);
+ pcmk__schedule_actions(input, data_set_flags, data_set);
+ pe_reset_working_set(data_set);
+ }
+
+ end = clock();
+ out->message(out, "profile", xml_file, start, end);
+}
+
+void
+pcmk__profile_dir(const char *dir, long long repeat, pe_working_set_t *data_set,
+ const char *use_date)
+{
+ pcmk__output_t *out = data_set->priv;
+ struct dirent **namelist;
+
+ int file_num = scandir(dir, &namelist, 0, alphasort);
+
+ CRM_ASSERT(out != NULL);
+
+ if (file_num > 0) {
+ struct stat prop;
+ char buffer[FILENAME_MAX];
+
+ out->begin_list(out, NULL, NULL, "Timings");
+
+ while (file_num--) {
+ if ('.' == namelist[file_num]->d_name[0]) {
+ free(namelist[file_num]);
+ continue;
+
+ } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name,
+ ".xml")) {
+ free(namelist[file_num]);
+ continue;
+ }
+ snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name);
+ if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
+ profile_file(buffer, repeat, data_set, use_date);
+ }
+ free(namelist[file_num]);
+ }
+ free(namelist);
+
+ out->end_list(out);
+ }
+}
+
+/*!
+ * \brief Set the date of the cluster, either to the value given by
+ * \p use_date, or to the "execution-date" value in the CIB.
+ *
+ * \note \p data_set->priv must have been set to a valid \p pcmk__output_t
+ * object before this function is called.
+ *
+ * \param[in,out] data_set Working set for the cluster
+ * \param[in] print_original If \p true, the "execution-date" should
+ * also be printed
+ * \param[in] use_date The date to set the cluster's time to
+ * (may be NULL)
+ */
+static void
+set_effective_date(pe_working_set_t *data_set, bool print_original,
+ const char *use_date)
+{
+ pcmk__output_t *out = data_set->priv;
+ time_t original_date = 0;
+
+ CRM_ASSERT(out != NULL);
+
+ crm_element_value_epoch(data_set->input, "execution-date", &original_date);
+
+ if (use_date) {
+ data_set->now = crm_time_new(use_date);
+ out->info(out, "Setting effective cluster time: %s", use_date);
+ crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now,
+ crm_time_log_date | crm_time_log_timeofday);
+
+ } else if (original_date != 0) {
+ data_set->now = pcmk__copy_timet(original_date);
+
+ if (print_original) {
+ char *when = crm_time_as_string(data_set->now,
+ crm_time_log_date|crm_time_log_timeofday);
+
+ out->info(out, "Using the original execution date of: %s", when);
+ free(when);
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Simulate successfully executing a pseudo-action in a graph
+ *
+ * \param[in,out] graph Graph to update with pseudo-action result
+ * \param[in,out] action Pseudo-action to simulate executing
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ out->message(out, "inject-pseudo-action", node, task);
+
+ pcmk__update_graph(graph, action);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Simulate executing a resource action in a graph
+ *
+ * \param[in,out] graph Graph to update with resource action result
+ * \param[in,out] action Resource action to simulate executing
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ int rc;
+ lrmd_event_data_t *op = NULL;
+ int target_outcome = PCMK_OCF_OK;
+
+ const char *rtype = NULL;
+ const char *rclass = NULL;
+ const char *resource = NULL;
+ const char *rprovider = NULL;
+ const char *resource_config_name = NULL;
+ const char *operation = crm_element_value(action->xml, "operation");
+ const char *target_rc_s = crm_meta_value(action->params,
+ XML_ATTR_TE_TARGET_RC);
+
+ xmlNode *cib_node = NULL;
+ xmlNode *cib_resource = NULL;
+ xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
+
+ char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
+ char *uuid = NULL;
+ const char *router_node = crm_element_value(action->xml,
+ XML_LRM_ATTR_ROUTER_NODE);
+
+ // Certain actions don't need to be displayed or history entries
+ if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
+ crm_debug("No history injection for %s op on %s", operation, node);
+ goto done; // Confirm action and update graph
+ }
+
+ if (action_rsc == NULL) { // Shouldn't be possible
+ crm_log_xml_err(action->xml, "Bad");
+ free(node);
+ return EPROTO;
+ }
+
+ /* A resource might be known by different names in the configuration and in
+ * the action (for example, a clone instance). Grab the configuration name
+ * (which is preferred when writing history), and if necessary, the instance
+ * name.
+ */
+ resource_config_name = crm_element_value(action_rsc, XML_ATTR_ID);
+ if (resource_config_name == NULL) { // Shouldn't be possible
+ crm_log_xml_err(action->xml, "No ID");
+ free(node);
+ return EPROTO;
+ }
+ resource = resource_config_name;
+ if (pe_find_resource(fake_resource_list, resource) == NULL) {
+ const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
+
+ if ((longname != NULL)
+ && (pe_find_resource(fake_resource_list, longname) != NULL)) {
+ resource = longname;
+ }
+ }
+
+ // Certain actions need to be displayed but don't need history entries
+ if (pcmk__strcase_any_of(operation, "delete", RSC_METADATA, NULL)) {
+ out->message(out, "inject-rsc-action", resource, operation, node,
+ (guint) 0);
+ goto done; // Confirm action and update graph
+ }
+
+ rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
+ rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
+ rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
+
+ pcmk__scan_min_int(target_rc_s, &target_outcome, 0);
+
+ CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL,
+ cib_sync_call|cib_scope_local) == pcmk_ok);
+
+ // Ensure the action node is in the CIB
+ uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
+ cib_node = pcmk__inject_node(fake_cib, node,
+ ((router_node == NULL)? uuid: node));
+ free(uuid);
+ CRM_ASSERT(cib_node != NULL);
+
+ // Add a history entry for the action
+ cib_resource = pcmk__inject_resource_history(out, cib_node, resource,
+ resource_config_name,
+ rclass, rtype, rprovider);
+ if (cib_resource == NULL) {
+ crm_err("Could not simulate action %d history for resource %s",
+ action->id, resource);
+ free(node);
+ free_xml(cib_node);
+ return EINVAL;
+ }
+
+ // Simulate and display an executor event for the action result
+ op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE,
+ target_outcome, "User-injected result");
+ out->message(out, "inject-rsc-action", resource, op->op_type, node,
+ op->interval_ms);
+
+ // Check whether action is in a list of desired simulated failures
+ for (const GList *iter = fake_op_fail_list;
+ iter != NULL; iter = iter->next) {
+ const char *spec = (const char *) iter->data;
+ char *key = NULL;
+ const char *match_name = NULL;
+
+ // Allow user to specify anonymous clone with or without instance number
+ key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type,
+ op->interval_ms, node);
+ if (strncasecmp(key, spec, strlen(key)) == 0) {
+ match_name = resource;
+ }
+ free(key);
+
+ // If not found, try the resource's name in the configuration
+ if ((match_name == NULL)
+ && (strcmp(resource, resource_config_name) != 0)) {
+
+ key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name,
+ op->op_type, op->interval_ms, node);
+ if (strncasecmp(key, spec, strlen(key)) == 0) {
+ match_name = resource_config_name;
+ }
+ free(key);
+ }
+
+ if (match_name == NULL) {
+ continue; // This failed action entry doesn't match
+ }
+
+ // ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
+ rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
+ if (rc != 1) {
+ out->err(out, "Invalid failed operation '%s' "
+ "(result code must be integer)", spec);
+ continue; // Keep checking other list entries
+ }
+
+ out->info(out, "Pretending action %d failed with rc=%d",
+ action->id, op->rc);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
+ graph->abort_priority = INFINITY;
+ pcmk__inject_failcount(out, cib_node, match_name, op->op_type,
+ op->interval_ms, op->rc);
+ break;
+ }
+
+ pcmk__inject_action_result(cib_resource, op, target_outcome);
+ lrmd_free_event(op);
+ rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+
+ done:
+ free(node);
+ free_xml(cib_node);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__update_graph(graph, action);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Simulate successfully executing a cluster action
+ *
+ * \param[in,out] graph Graph to update with action result
+ * \param[in,out] action Cluster action to simulate
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
+ xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ out->message(out, "inject-cluster-action", node, task, rsc);
+ pcmk__update_graph(graph, action);
+ return pcmk_rc_ok;
+}
+
+/*!
+ * \internal
+ * \brief Simulate successfully executing a fencing action
+ *
+ * \param[in,out] graph Graph to update with action result
+ * \param[in,out] action Fencing action to simulate
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
+{
+ const char *op = crm_meta_value(action->params, "stonith_action");
+ char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
+
+ out->message(out, "inject-fencing-action", target, op);
+
+ if (!pcmk__str_eq(op, "on", pcmk__str_casei)) {
+ int rc = pcmk_ok;
+ GString *xpath = g_string_sized_new(512);
+
+ // Set node state to offline
+ xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target,
+ false);
+
+ CRM_ASSERT(cib_node != NULL);
+ crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__);
+ rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
+ cib_sync_call|cib_scope_local);
+ CRM_ASSERT(rc == pcmk_ok);
+
+ // Simulate controller clearing node's resource history and attributes
+ pcmk__g_strcat(xpath,
+ "//" XML_CIB_TAG_STATE
+ "[@" XML_ATTR_UNAME "='", target, "']/" XML_CIB_TAG_LRM,
+ NULL);
+ fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL,
+ cib_xpath|cib_sync_call|cib_scope_local);
+
+ g_string_truncate(xpath, 0);
+ pcmk__g_strcat(xpath,
+ "//" XML_CIB_TAG_STATE
+ "[@" XML_ATTR_UNAME "='", target, "']"
+ "/" XML_TAG_TRANSIENT_NODEATTRS, NULL);
+ fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL,
+ cib_xpath|cib_sync_call|cib_scope_local);
+
+ free_xml(cib_node);
+ g_string_free(xpath, TRUE);
+ }
+
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__update_graph(graph, action);
+ free(target);
+ return pcmk_rc_ok;
+}
+
+enum pcmk__graph_status
+pcmk__simulate_transition(pe_working_set_t *data_set, cib_t *cib,
+ const GList *op_fail_list)
+{
+ pcmk__graph_t *transition = NULL;
+ enum pcmk__graph_status graph_rc;
+
+ pcmk__graph_functions_t simulation_fns = {
+ simulate_pseudo_action,
+ simulate_resource_action,
+ simulate_cluster_action,
+ simulate_fencing_action,
+ };
+
+ out = data_set->priv;
+
+ fake_cib = cib;
+ fake_op_fail_list = op_fail_list;
+
+ if (!out->is_quiet(out)) {
+ out->begin_list(out, NULL, NULL, "Executing Cluster Transition");
+ }
+
+ pcmk__set_graph_functions(&simulation_fns);
+ transition = pcmk__unpack_graph(data_set->graph, crm_system_name);
+ pcmk__log_graph(LOG_DEBUG, transition);
+
+ fake_resource_list = data_set->resources;
+ do {
+ graph_rc = pcmk__execute_graph(transition);
+ } while (graph_rc == pcmk__graph_active);
+ fake_resource_list = NULL;
+
+ if (graph_rc != pcmk__graph_complete) {
+ out->err(out, "Transition failed: %s",
+ pcmk__graph_status2text(graph_rc));
+ pcmk__log_graph(LOG_ERR, transition);
+ out->err(out, "An invalid transition was produced");
+ }
+ pcmk__free_graph(transition);
+
+ if (!out->is_quiet(out)) {
+ // If not quiet, we'll need the resulting CIB for later display
+ xmlNode *cib_object = NULL;
+ int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object,
+ cib_sync_call|cib_scope_local);
+
+ CRM_ASSERT(rc == pcmk_ok);
+ pe_reset_working_set(data_set);
+ data_set->input = cib_object;
+ out->end_list(out);
+ }
+ return graph_rc;
+}
+
+int
+pcmk__simulate(pe_working_set_t *data_set, pcmk__output_t *out,
+ const pcmk_injections_t *injections, unsigned int flags,
+ uint32_t section_opts, const char *use_date,
+ const char *input_file, const char *graph_file,
+ const char *dot_file)
+{
+ int printed = pcmk_rc_no_output;
+ int rc = pcmk_rc_ok;
+ xmlNodePtr input = NULL;
+ cib_t *cib = NULL;
+
+ rc = cib__signon_query(out, &cib, &input);
+ if (rc != pcmk_rc_ok) {
+ goto simulate_done;
+ }
+
+ reset(data_set, input, out, use_date, flags);
+ cluster_status(data_set);
+
+ if ((cib->variant == cib_native)
+ && pcmk_is_set(section_opts, pcmk_section_times)) {
+ if (pcmk__our_nodename == NULL) {
+ // Currently used only in the times section
+ pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0);
+ }
+ data_set->localhost = pcmk__our_nodename;
+ }
+
+ if (!out->is_quiet(out)) {
+ if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) {
+ printed = out->message(out, "maint-mode", data_set->flags);
+ }
+
+ if (data_set->disabled_resources || data_set->blocked_resources) {
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ printed = out->info(out,
+ "%d of %d resource instances DISABLED and "
+ "%d BLOCKED from further action due to failure",
+ data_set->disabled_resources,
+ data_set->ninstances,
+ data_set->blocked_resources);
+ }
+
+ /* Most formatted output headers use caps for each word, but this one
+ * only has the first word capitalized for compatibility with pcs.
+ */
+ print_cluster_status(data_set,
+ pcmk_is_set(flags, pcmk_sim_show_pending)? pcmk_show_pending : 0,
+ section_opts, "Current cluster status",
+ (printed == pcmk_rc_ok));
+ printed = pcmk_rc_ok;
+ }
+
+ // If the user requested any injections, handle them
+ if ((injections->node_down != NULL)
+ || (injections->node_fail != NULL)
+ || (injections->node_up != NULL)
+ || (injections->op_inject != NULL)
+ || (injections->ticket_activate != NULL)
+ || (injections->ticket_grant != NULL)
+ || (injections->ticket_revoke != NULL)
+ || (injections->ticket_standby != NULL)
+ || (injections->watchdog != NULL)) {
+
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ pcmk__inject_scheduler_input(data_set, cib, injections);
+ printed = pcmk_rc_ok;
+
+ rc = cib->cmds->query(cib, NULL, &input, cib_sync_call);
+ if (rc != pcmk_rc_ok) {
+ rc = pcmk_legacy2rc(rc);
+ goto simulate_done;
+ }
+
+ cleanup_calculations(data_set);
+ reset(data_set, input, out, use_date, flags);
+ cluster_status(data_set);
+ }
+
+ if (input_file != NULL) {
+ rc = write_xml_file(input, input_file, FALSE);
+ if (rc < 0) {
+ rc = pcmk_legacy2rc(rc);
+ goto simulate_done;
+ }
+ }
+
+ if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) {
+ pcmk__output_t *logger_out = NULL;
+ unsigned long long data_set_flags = pe_flag_no_compat;
+
+ if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
+ data_set_flags |= pe_flag_show_scores;
+ }
+ if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
+ data_set_flags |= pe_flag_show_utilization;
+ }
+
+ if (pcmk_all_flags_set(data_set->flags,
+ pe_flag_show_scores|pe_flag_show_utilization)) {
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ out->begin_list(out, NULL, NULL,
+ "Allocation Scores and Utilization Information");
+ printed = pcmk_rc_ok;
+
+ } else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ out->begin_list(out, NULL, NULL, "Allocation Scores");
+ printed = pcmk_rc_ok;
+
+ } else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ out->begin_list(out, NULL, NULL, "Utilization Information");
+ printed = pcmk_rc_ok;
+
+ } else {
+ rc = pcmk__log_output_new(&logger_out);
+ if (rc != pcmk_rc_ok) {
+ goto simulate_done;
+ }
+ pe__register_messages(logger_out);
+ pcmk__register_lib_messages(logger_out);
+ data_set->priv = logger_out;
+ }
+
+ pcmk__schedule_actions(input, data_set_flags, data_set);
+
+ if (logger_out == NULL) {
+ out->end_list(out);
+ } else {
+ logger_out->finish(logger_out, CRM_EX_OK, true, NULL);
+ pcmk__output_free(logger_out);
+ data_set->priv = out;
+ }
+
+ input = NULL; /* Don't try and free it twice */
+
+ if (graph_file != NULL) {
+ rc = write_xml_file(data_set->graph, graph_file, FALSE);
+ if (rc < 0) {
+ rc = pcmk_rc_graph_error;
+ goto simulate_done;
+ }
+ }
+
+ if (dot_file != NULL) {
+ rc = write_sim_dotfile(data_set, dot_file,
+ pcmk_is_set(flags, pcmk_sim_all_actions),
+ pcmk_is_set(flags, pcmk_sim_verbose));
+ if (rc != pcmk_rc_ok) {
+ rc = pcmk_rc_dot_error;
+ goto simulate_done;
+ }
+ }
+
+ if (!out->is_quiet(out)) {
+ print_transition_summary(data_set, printed == pcmk_rc_ok);
+ }
+ }
+
+ rc = pcmk_rc_ok;
+
+ if (!pcmk_is_set(flags, pcmk_sim_simulate)) {
+ goto simulate_done;
+ }
+
+ PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
+ if (pcmk__simulate_transition(data_set, cib,
+ injections->op_fail) != pcmk__graph_complete) {
+ rc = pcmk_rc_invalid_transition;
+ }
+
+ if (out->is_quiet(out)) {
+ goto simulate_done;
+ }
+
+ set_effective_date(data_set, true, use_date);
+
+ if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
+ pe__set_working_set_flags(data_set, pe_flag_show_scores);
+ }
+ if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
+ pe__set_working_set_flags(data_set, pe_flag_show_utilization);
+ }
+
+ cluster_status(data_set);
+ print_cluster_status(data_set, 0, section_opts, "Revised Cluster Status",
+ true);
+
+simulate_done:
+ cib__clean_up_connection(&cib);
+ return rc;
+}
+
+int
+pcmk_simulate(xmlNodePtr *xml, pe_working_set_t *data_set,
+ const pcmk_injections_t *injections, unsigned int flags,
+ unsigned int section_opts, const char *use_date,
+ const char *input_file, const char *graph_file,
+ const char *dot_file)
+{
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ pe__register_messages(out);
+ pcmk__register_lib_messages(out);
+
+ rc = pcmk__simulate(data_set, out, injections, flags, section_opts,
+ use_date, input_file, graph_file, dot_file);
+ pcmk__xml_output_finish(out, xml);
+ return rc;
+}
diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c
new file mode 100644
index 0000000..0e82633
--- /dev/null
+++ b/lib/pacemaker/pcmk_status.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright 2004-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <crm/cib/internal.h>
+#include <crm/common/output.h>
+#include <crm/common/results.h>
+#include <crm/fencing/internal.h>
+#include <crm/stonith-ng.h>
+#include <pacemaker.h>
+#include <pacemaker-internal.h>
+
+static stonith_t *
+fencing_connect(void)
+{
+ stonith_t *st = stonith_api_new();
+ int rc = pcmk_rc_ok;
+
+ if (st == NULL) {
+ return NULL;
+ }
+
+ rc = st->cmds->connect(st, crm_system_name, NULL);
+ if (rc == pcmk_rc_ok) {
+ return st;
+ } else {
+ stonith_api_delete(st);
+ return NULL;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Output the cluster status given a fencer and CIB connection
+ *
+ * \param[in,out] out Output object
+ * \param[in,out] stonith Fencer connection
+ * \param[in,out] cib CIB connection
+ * \param[in] current_cib Current CIB XML
+ * \param[in] pcmkd_state \p pacemakerd state
+ * \param[in] fence_history How much of the fencing history to output
+ * \param[in] show Group of \p pcmk_section_e flags
+ * \param[in] show_opts Group of \p pcmk_show_opt_e flags
+ * \param[in] only_node If a node name or tag, include only the
+ * matching node(s) (if any) in the output.
+ * If \p "*" or \p NULL, include all nodes
+ * in the output.
+ * \param[in] only_rsc If a resource ID or tag, include only the
+ * matching resource(s) (if any) in the
+ * output. If \p "*" or \p NULL, include all
+ * resources in the output.
+ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID
+ * \param[in] simple_output Whether to use a simple output format.
+ * Note: This is for use by \p crm_mon only
+ * and is planned to be deprecated.
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, cib_t *cib,
+ xmlNode *current_cib,
+ enum pcmk_pacemakerd_state pcmkd_state,
+ enum pcmk__fence_history fence_history,
+ uint32_t show, uint32_t show_opts,
+ const char *only_node, const char *only_rsc,
+ const char *neg_location_prefix, bool simple_output)
+{
+ xmlNode *cib_copy = copy_xml(current_cib);
+ stonith_history_t *stonith_history = NULL;
+ int history_rc = 0;
+ pe_working_set_t *data_set = NULL;
+ GList *unames = NULL;
+ GList *resources = NULL;
+
+ int rc = pcmk_rc_ok;
+
+ if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
+ cib__clean_up_connection(&cib);
+ free_xml(cib_copy);
+ rc = pcmk_rc_schema_validation;
+ out->err(out, "Upgrade failed: %s", pcmk_rc_str(rc));
+ return rc;
+ }
+
+ /* get the stonith-history if there is evidence we need it */
+ if (fence_history != pcmk__fence_history_none) {
+ history_rc = pcmk__get_fencing_history(stonith, &stonith_history,
+ fence_history);
+ }
+
+ data_set = pe_new_working_set();
+ CRM_ASSERT(data_set != NULL);
+ pe__set_working_set_flags(data_set, pe_flag_no_compat);
+
+ data_set->input = cib_copy;
+ data_set->priv = out;
+ cluster_status(data_set);
+
+ if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) {
+ if (pcmk__our_nodename == NULL) {
+ // Currently used only in the times section
+ pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0);
+ }
+ data_set->localhost = pcmk__our_nodename;
+ }
+
+ /* Unpack constraints if any section will need them
+ * (tickets may be referenced in constraints but not granted yet,
+ * and bans need negative location constraints) */
+ if (pcmk_is_set(show, pcmk_section_bans) || pcmk_is_set(show, pcmk_section_tickets)) {
+ pcmk__unpack_constraints(data_set);
+ }
+
+ unames = pe__build_node_name_list(data_set, only_node);
+ resources = pe__build_rsc_list(data_set, only_rsc);
+
+ /* Always print DC if NULL. */
+ if (data_set->dc_node == NULL) {
+ show |= pcmk_section_dc;
+ }
+
+ if (simple_output) {
+ rc = pcmk__output_simple_status(out, data_set);
+ } else {
+ out->message(out, "cluster-status",
+ data_set, pcmkd_state, pcmk_rc2exitc(history_rc),
+ stonith_history, fence_history, show, show_opts,
+ neg_location_prefix, unames, resources);
+ }
+
+ g_list_free_full(unames, free);
+ g_list_free_full(resources, free);
+
+ stonith_history_free(stonith_history);
+ stonith_history = NULL;
+ pe_free_working_set(data_set);
+ return rc;
+}
+
+int
+pcmk_status(xmlNodePtr *xml)
+{
+ cib_t *cib = NULL;
+ pcmk__output_t *out = NULL;
+ int rc = pcmk_rc_ok;
+
+ uint32_t show_opts = pcmk_show_pending | pcmk_show_inactive_rscs | pcmk_show_timing;
+
+ cib = cib_new();
+
+ if (cib == NULL) {
+ return pcmk_rc_cib_corrupt;
+ }
+
+ rc = pcmk__xml_output_new(&out, xml);
+ if (rc != pcmk_rc_ok) {
+ cib_delete(cib);
+ return rc;
+ }
+
+ pcmk__register_lib_messages(out);
+ pe__register_messages(out);
+ stonith__register_messages(out);
+
+ rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all,
+ show_opts, NULL, NULL, NULL, false, 0);
+ pcmk__xml_output_finish(out, xml);
+
+ cib_delete(cib);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Query and output the cluster status
+ *
+ * The operation is considered a success if we're able to get the \p pacemakerd
+ * state. If possible, we'll also try to connect to the fencer and CIB and
+ * output their respective status information.
+ *
+ * \param[in,out] out Output object
+ * \param[in,out] cib CIB connection
+ * \param[in] fence_history How much of the fencing history to output
+ * \param[in] show Group of \p pcmk_section_e flags
+ * \param[in] show_opts Group of \p pcmk_show_opt_e flags
+ * \param[in] only_node If a node name or tag, include only the
+ * matching node(s) (if any) in the output.
+ * If \p "*" or \p NULL, include all nodes
+ * in the output.
+ * \param[in] only_rsc If a resource ID or tag, include only the
+ * matching resource(s) (if any) in the
+ * output. If \p "*" or \p NULL, include all
+ * resources in the output.
+ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID
+ * \param[in] simple_output Whether to use a simple output format.
+ * Note: This is for use by \p crm_mon only
+ * and is planned to be deprecated.
+ * \param[in] timeout_ms How long to wait for a reply from the
+ * \p pacemakerd API. If 0,
+ * \p pcmk_ipc_dispatch_sync will be used.
+ * If positive, \p pcmk_ipc_dispatch_main
+ * will be used, and a new mainloop will be
+ * created for this purpose (freed before
+ * return).
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+pcmk__status(pcmk__output_t *out, cib_t *cib,
+ enum pcmk__fence_history fence_history, uint32_t show,
+ uint32_t show_opts, const char *only_node, const char *only_rsc,
+ const char *neg_location_prefix, bool simple_output,
+ unsigned int timeout_ms)
+{
+ xmlNode *current_cib = NULL;
+ int rc = pcmk_rc_ok;
+ stonith_t *stonith = NULL;
+ enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
+ time_t last_updated = 0;
+
+ if (cib == NULL) {
+ return ENOTCONN;
+ }
+
+ if (cib->variant == cib_native) {
+ rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, false,
+ &pcmkd_state);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+
+ last_updated = time(NULL);
+
+ switch (pcmkd_state) {
+ case pcmk_pacemakerd_state_running:
+ case pcmk_pacemakerd_state_shutting_down:
+ case pcmk_pacemakerd_state_remote:
+ /* Fencer and CIB may still be available while shutting down or
+ * running on a Pacemaker Remote node
+ */
+ break;
+ default:
+ // Fencer and CIB are definitely unavailable
+ out->message(out, "pacemakerd-health",
+ NULL, pcmkd_state, NULL, last_updated);
+ return rc;
+ }
+
+ if (fence_history != pcmk__fence_history_none) {
+ stonith = fencing_connect();
+ }
+ }
+
+ rc = cib__signon_query(out, &cib, &current_cib);
+ if (rc != pcmk_rc_ok) {
+ if (pcmkd_state != pcmk_pacemakerd_state_invalid) {
+ // Invalid at this point means we didn't query the pcmkd state
+ out->message(out, "pacemakerd-health",
+ NULL, pcmkd_state, NULL, last_updated);
+ }
+ goto done;
+ }
+
+ rc = pcmk__output_cluster_status(out, stonith, cib, current_cib,
+ pcmkd_state, fence_history, show,
+ show_opts, only_node, only_rsc,
+ neg_location_prefix, simple_output);
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "Error outputting status info from the fencer or CIB");
+ }
+
+done:
+ stonith_api_delete(stonith);
+ free_xml(current_cib);
+ return pcmk_rc_ok;
+}
+
+/* This is an internal-only function that is planned to be deprecated and removed.
+ * It should only ever be called from crm_mon.
+ */
+int
+pcmk__output_simple_status(pcmk__output_t *out,
+ const pe_working_set_t *data_set)
+{
+ int nodes_online = 0;
+ int nodes_standby = 0;
+ int nodes_maintenance = 0;
+ GString *offline_nodes = NULL;
+ bool no_dc = false;
+ bool offline = false;
+ bool has_warnings = false;
+
+ if (data_set->dc_node == NULL) {
+ has_warnings = true;
+ no_dc = true;
+ }
+
+ for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
+ pe_node_t *node = (pe_node_t *) iter->data;
+
+ if (node->details->standby && node->details->online) {
+ nodes_standby++;
+ } else if (node->details->maintenance && node->details->online) {
+ nodes_maintenance++;
+ } else if (node->details->online) {
+ nodes_online++;
+ } else {
+ pcmk__add_word(&offline_nodes, 1024, "offline node:");
+ pcmk__add_word(&offline_nodes, 0, pe__node_name(node));
+ has_warnings = true;
+ offline = true;
+ }
+ }
+
+ if (has_warnings) {
+ out->info(out, "CLUSTER WARN: %s%s%s",
+ no_dc ? "No DC" : "",
+ no_dc && offline ? ", " : "",
+ (offline? (const char *) offline_nodes->str : ""));
+
+ if (offline_nodes != NULL) {
+ g_string_free(offline_nodes, TRUE);
+ }
+
+ } else {
+ char *nodes_standby_s = NULL;
+ char *nodes_maint_s = NULL;
+
+ if (nodes_standby > 0) {
+ nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby,
+ pcmk__plural_s(nodes_standby));
+ }
+
+ if (nodes_maintenance > 0) {
+ nodes_maint_s = crm_strdup_printf(", %d maintenance node%s",
+ nodes_maintenance,
+ pcmk__plural_s(nodes_maintenance));
+ }
+
+ out->info(out, "CLUSTER OK: %d node%s online%s%s, "
+ "%d resource instance%s configured",
+ nodes_online, pcmk__plural_s(nodes_online),
+ nodes_standby_s != NULL ? nodes_standby_s : "",
+ nodes_maint_s != NULL ? nodes_maint_s : "",
+ data_set->ninstances, pcmk__plural_s(data_set->ninstances));
+
+ free(nodes_standby_s);
+ free(nodes_maint_s);
+ }
+
+ if (has_warnings) {
+ return pcmk_rc_error;
+ } else {
+ return pcmk_rc_ok;
+ }
+ /* coverity[leaked_storage] False positive */
+}