diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /lib/pacemaker | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/pacemaker')
36 files changed, 29345 insertions, 0 deletions
diff --git a/lib/pacemaker/Makefile.am b/lib/pacemaker/Makefile.am new file mode 100644 index 0000000..ebf3b6d --- /dev/null +++ b/lib/pacemaker/Makefile.am @@ -0,0 +1,69 @@ +# +# Copyright 2004-2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +include $(top_srcdir)/mk/common.mk + +AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) + +noinst_HEADERS = libpacemaker_private.h + +## libraries +lib_LTLIBRARIES = libpacemaker.la + +## SOURCES + +libpacemaker_la_LDFLAGS = -version-info 7:0:6 + +libpacemaker_la_CFLAGS = $(CFLAGS_HARDENED_LIB) +libpacemaker_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) + +libpacemaker_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ + $(top_builddir)/lib/fencing/libstonithd.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/common/libcrmcommon.la + +# -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version +# Use += rather than backlashed continuation lines for parsing by bumplibs +libpacemaker_la_SOURCES = +libpacemaker_la_SOURCES += pcmk_acl.c +libpacemaker_la_SOURCES += pcmk_cluster_queries.c +libpacemaker_la_SOURCES += pcmk_fence.c +libpacemaker_la_SOURCES += pcmk_graph_consumer.c +libpacemaker_la_SOURCES += pcmk_graph_logging.c +libpacemaker_la_SOURCES += pcmk_graph_producer.c +libpacemaker_la_SOURCES += pcmk_injections.c +libpacemaker_la_SOURCES += pcmk_output.c +libpacemaker_la_SOURCES += pcmk_resource.c +libpacemaker_la_SOURCES += pcmk_result_code.c +libpacemaker_la_SOURCES += pcmk_rule.c +libpacemaker_la_SOURCES += pcmk_sched_actions.c +libpacemaker_la_SOURCES += pcmk_sched_bundle.c +libpacemaker_la_SOURCES += pcmk_sched_clone.c +libpacemaker_la_SOURCES += pcmk_sched_colocation.c +libpacemaker_la_SOURCES += pcmk_sched_constraints.c +libpacemaker_la_SOURCES += pcmk_sched_fencing.c +libpacemaker_la_SOURCES += pcmk_sched_group.c +libpacemaker_la_SOURCES += pcmk_sched_instances.c +libpacemaker_la_SOURCES += pcmk_sched_location.c +libpacemaker_la_SOURCES += pcmk_sched_migration.c +libpacemaker_la_SOURCES += pcmk_sched_nodes.c +libpacemaker_la_SOURCES += pcmk_sched_ordering.c +libpacemaker_la_SOURCES += pcmk_sched_primitive.c +libpacemaker_la_SOURCES += pcmk_sched_probes.c +libpacemaker_la_SOURCES += pcmk_sched_promotable.c +libpacemaker_la_SOURCES += pcmk_sched_recurring.c +libpacemaker_la_SOURCES += pcmk_sched_remote.c +libpacemaker_la_SOURCES += pcmk_sched_resource.c +libpacemaker_la_SOURCES += pcmk_sched_tickets.c +libpacemaker_la_SOURCES += pcmk_sched_utilization.c +libpacemaker_la_SOURCES += pcmk_scheduler.c +libpacemaker_la_SOURCES += pcmk_simulate.c +libpacemaker_la_SOURCES += pcmk_status.c diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h new file mode 100644 index 0000000..192d5a7 --- /dev/null +++ b/lib/pacemaker/libpacemaker_private.h @@ -0,0 +1,986 @@ +/* + * Copyright 2021-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__LIBPACEMAKER_PRIVATE__H +# define PCMK__LIBPACEMAKER_PRIVATE__H + +/* This header is for the sole use of libpacemaker, so that functions can be + * declared with G_GNUC_INTERNAL for efficiency. + */ + +#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t + +// Flags to modify the behavior of add_colocated_node_scores() +enum pcmk__coloc_select { + // With no other flags, apply all "with this" colocations + pcmk__coloc_select_default = 0, + + // Apply "this with" colocations instead of "with this" colocations + pcmk__coloc_select_this_with = (1 << 0), + + // Apply only colocations with non-negative scores + pcmk__coloc_select_nonnegative = (1 << 1), + + // Apply only colocations with at least one matching node + pcmk__coloc_select_active = (1 << 2), +}; + +// Flags the update_ordered_actions() method can return +enum pcmk__updated { + pcmk__updated_none = 0, // Nothing changed + pcmk__updated_first = (1 << 0), // First action was updated + pcmk__updated_then = (1 << 1), // Then action was updated +}; + +#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \ + au_flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Action update", \ + (action)->uuid, au_flags, \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \ + au_flags = pcmk__clear_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Action update", \ + (action)->uuid, au_flags, \ + (flags_to_clear), #flags_to_clear); \ + } while (0) + +// Resource allocation methods +struct resource_alloc_functions_s { + /*! + * \internal + * \brief Assign a resource to a node + * + * \param[in,out] rsc Resource to assign to a node + * \param[in] prefer Node to prefer, if all else is equal + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node + */ + pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer); + + /*! + * \internal + * \brief Create all actions needed for a given resource + * + * \param[in,out] rsc Resource to create actions for + */ + void (*create_actions)(pe_resource_t *rsc); + + /*! + * \internal + * \brief Schedule any probes needed for a resource on a node + * + * \param[in,out] rsc Resource to create probe for + * \param[in,out] node Node to create probe on + * + * \return true if any probe was created, otherwise false + */ + bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node); + + /*! + * \internal + * \brief Create implicit constraints needed for a resource + * + * \param[in,out] rsc Resource to create implicit constraints for + */ + void (*internal_constraints)(pe_resource_t *rsc); + + /*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint, apply its score to the dependent's + * allowed node weights (if we are still placing resources) or priority (if + * we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + * \param[in] for_dependent true if called on behalf of dependent + */ + void (*apply_coloc_score) (pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent); + + /*! + * \internal + * \brief Create list of all resources in colocations with a given resource + * + * Given a resource, create a list of all resources involved in mandatory + * colocations with it, whether directly or indirectly via chained colocations. + * + * \param[in] rsc Resource to add to colocated list + * \param[in] orig_rsc Resource originally requested + * \param[in,out] colocated_rscs Existing list + * + * \return List of given resource and all resources involved in colocations + * + * \note This function is recursive; top-level callers should pass NULL as + * \p colocated_rscs and \p orig_rsc, and the desired resource as + * \p rsc. The recursive calls will use other values. + */ + GList *(*colocated_resources)(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList *colocated_rscs); + + /*! + * \internal + * \brief Add colocations affecting a resource as primary to a list + * + * Given a resource being assigned (\p orig_rsc) and a resource somewhere in + * its chain of ancestors (\p rsc, which may be \p orig_rsc), get + * colocations that affect the ancestor as primary and should affect the + * resource, and add them to a given list. + * + * \param[in] rsc Resource whose colocations should be added + * \param[in] orig_rsc Affected resource (\p rsc or a descendant) + * \param[in,out] list List of colocations to add to + * + * \note All arguments should be non-NULL. + * \note The pcmk__with_this_colocations() wrapper should usually be used + * instead of using this method directly. + */ + void (*with_this_colocations)(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + + /*! + * \internal + * \brief Add colocations affecting a resource as dependent to a list + * + * Given a resource being assigned (\p orig_rsc) and a resource somewhere in + * its chain of ancestors (\p rsc, which may be \p orig_rsc), get + * colocations that affect the ancestor as dependent and should affect the + * resource, and add them to a given list. + * + * + * \param[in] rsc Resource whose colocations should be added + * \param[in] orig_rsc Affected resource (\p rsc or a descendant) + * \param[in,out] list List of colocations to add to + * + * \note All arguments should be non-NULL. + * \note The pcmk__this_with_colocations() wrapper should usually be used + * instead of using this method directly. + */ + void (*this_with_colocations)(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + + /*! + * \internal + * \brief Update nodes with scores of colocated resources' nodes + * + * Given a table of nodes and a resource, update the nodes' scores with the + * scores of the best nodes matching the attribute used for each of the + * resource's relevant colocations. + * + * \param[in,out] rsc Resource to check colocations for + * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID) + * \param[in,out] nodes Nodes to update + * \param[in] attr Colocation attribute (NULL to use default) + * \param[in] factor Incorporate scores multiplied by this factor + * \param[in] flags Bitmask of enum pcmk__coloc_select values + * + * \note The caller remains responsible for freeing \p *nodes. + */ + void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id, + GHashTable **nodes, const char *attr, + float factor, uint32_t flags); + + /*! + * \internal + * \brief Apply a location constraint to a resource's allowed node scores + * + * \param[in,out] rsc Resource to apply constraint to + * \param[in,out] location Location constraint to apply + */ + void (*apply_location)(pe_resource_t *rsc, pe__location_t *location); + + /*! + * \internal + * \brief Return action flags for a given resource action + * + * \param[in,out] action Action to get flags for + * \param[in] node If not NULL, limit effects to this node + * + * \return Flags appropriate to \p action on \p node + * \note For primitives, this will be the same as action->flags regardless + * of node. For collective resources, the flags can differ due to + * multiple instances possibly being involved. + */ + enum pe_action_flags (*action_flags)(pe_action_t *action, + const pe_node_t *node); + + /*! + * \internal + * \brief Update two actions according to an ordering between them + * + * Given information about an ordering of two actions, update the actions' + * flags (and runnable_before members if appropriate) as appropriate for the + * ordering. Effects may cascade to other orderings involving the actions as + * well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this + * node (only used when interleaving instances) + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates + * (may include pe_action_optional to affect only + * mandatory actions, and pe_action_runnable to + * affect only runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ + uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set); + + void (*output_actions)(pe_resource_t *rsc); + + /*! + * \internal + * \brief Add a resource's actions to the transition graph + * + * \param[in,out] rsc Resource whose actions should be added + */ + void (*add_actions_to_graph)(pe_resource_t *rsc); + + /*! + * \internal + * \brief Add meta-attributes relevant to transition graph actions to XML + * + * If a given resource supports variant-specific meta-attributes that are + * needed for transition graph actions, add them to a given XML element. + * + * \param[in] rsc Resource whose meta-attributes should be added + * \param[in,out] xml Transition graph action attributes XML to add to + */ + void (*add_graph_meta)(const pe_resource_t *rsc, xmlNode *xml); + + /*! + * \internal + * \brief Add a resource's utilization to a table of utilization values + * + * This function is used when summing the utilization of a resource and all + * resources colocated with it, to determine whether a node has sufficient + * capacity. Given a resource and a table of utilization values, it will add + * the resource's utilization to the existing values, if the resource has + * not yet been allocated to a node. + * + * \param[in] rsc Resource with utilization to add + * \param[in] orig_rsc Resource being allocated (for logging only) + * \param[in] all_rscs List of all resources that will be summed + * \param[in,out] utilization Table of utilization values to add to + */ + void (*add_utilization)(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization); + + /*! + * \internal + * \brief Apply a shutdown lock for a resource, if appropriate + * + * \param[in,out] rsc Resource to check for shutdown lock + */ + void (*shutdown_lock)(pe_resource_t *rsc); +}; + +// Actions (pcmk_sched_actions.c) + +G_GNUC_INTERNAL +void pcmk__update_action_for_orderings(pe_action_t *action, + pe_working_set_t *data_set); + +G_GNUC_INTERNAL +uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__log_action(const char *pre_text, const pe_action_t *action, + bool details); + +G_GNUC_INTERNAL +pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, + guint interval_ms, const pe_node_t *node); + +G_GNUC_INTERNAL +pe_action_t *pcmk__new_shutdown_action(pe_node_t *node); + +G_GNUC_INTERNAL +bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); + +G_GNUC_INTERNAL +void pcmk__deduplicate_action_inputs(pe_action_t *action); + +G_GNUC_INTERNAL +void pcmk__output_actions(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, + const xmlNode *xml_op); + +G_GNUC_INTERNAL +void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set); + + +// Recurring actions (pcmk_sched_recurring.c) + +G_GNUC_INTERNAL +void pcmk__create_recurring_actions(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, + const char *task, guint interval_ms, + const pe_node_t *node, const char *reason); + +G_GNUC_INTERNAL +void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, + guint interval_ms, pe_node_t *node); + +G_GNUC_INTERNAL +bool pcmk__action_is_recurring(const pe_action_t *action); + + +// Producing transition graphs (pcmk_graph_producer.c) + +G_GNUC_INTERNAL +bool pcmk__graph_has_loop(const pe_action_t *init_action, + const pe_action_t *action, + pe_action_wrapper_t *input); + +G_GNUC_INTERNAL +void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__create_graph(pe_working_set_t *data_set); + + +// Fencing (pcmk_sched_fencing.c) + +G_GNUC_INTERNAL +void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, + pe_action_t *action, enum pe_ordering order); + +G_GNUC_INTERNAL +void pcmk__fence_guest(pe_node_t *node); + +G_GNUC_INTERNAL +bool pcmk__node_unfenced(const pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data); + + +// Injected scheduler inputs (pcmk_sched_injections.c) + +void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, + const pcmk_injections_t *injections); + + +// Constraints of any type (pcmk_sched_constraints.c) + +G_GNUC_INTERNAL +pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); + +G_GNUC_INTERNAL +xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, + const pe_working_set_t *data_set); + +G_GNUC_INTERNAL +bool pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, + const char *id, pe_resource_t **rsc, + pe_tag_t **tag); + +G_GNUC_INTERNAL +bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, + bool convert_rsc, const pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__create_internal_constraints(pe_working_set_t *data_set); + + +// Location constraints + +G_GNUC_INTERNAL +void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, + int node_weight, const char *discover_mode, + pe_node_t *foo_node, + pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__apply_locations(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint); + + +// Colocation constraints (pcmk_sched_colocation.c) + +enum pcmk__coloc_affects { + pcmk__coloc_affects_nothing = 0, + pcmk__coloc_affects_location, + pcmk__coloc_affects_role, +}; + +G_GNUC_INTERNAL +enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool preview); + +G_GNUC_INTERNAL +void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation); + +G_GNUC_INTERNAL +void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation); + +G_GNUC_INTERNAL +void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, + GHashTable **nodes, const char *attr, + float factor, uint32_t flags); + +G_GNUC_INTERNAL +void pcmk__add_dependent_scores(gpointer data, gpointer user_data); + +G_GNUC_INTERNAL +void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation); + +G_GNUC_INTERNAL +void pcmk__add_this_with_list(GList **list, GList *addition); + +G_GNUC_INTERNAL +void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation); + +G_GNUC_INTERNAL +void pcmk__add_with_this_list(GList **list, GList *addition); + +G_GNUC_INTERNAL +void pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, + bool influence, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__block_colocation_dependents(pe_action_t *action, + pe_working_set_t *data_set); + +/*! + * \internal + * \brief Check whether colocation's dependent preferences should be considered + * + * \param[in] colocation Colocation constraint + * \param[in] rsc Primary instance (normally this will be + * colocation->primary, which NULL will be treated as, + * but for clones or bundles with multiple instances + * this can be a particular instance) + * + * \return true if colocation influence should be effective, otherwise false + */ +static inline bool +pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, + const pe_resource_t *rsc) +{ + if (rsc == NULL) { + rsc = colocation->primary; + } + + /* A bundle replica colocates its remote connection with its container, + * using a finite score so that the container can run on Pacemaker Remote + * nodes. + * + * Moving a connection is lightweight and does not interrupt the service, + * while moving a container is heavyweight and does interrupt the service, + * so don't move a clean, active container based solely on the preferences + * of its connection. + * + * This also avoids problematic scenarios where two containers want to + * perpetually swap places. + */ + if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes) + && !pcmk_is_set(rsc->flags, pe_rsc_failed) + && pcmk__list_of_1(rsc->running_on)) { + return false; + } + + /* The dependent in a colocation influences the primary's location + * if the influence option is true or the primary is not yet active. + */ + return colocation->influence || (rsc->running_on == NULL); +} + + +// Ordering constraints (pcmk_sched_ordering.c) + +G_GNUC_INTERNAL +void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, + pe_action_t *first_action, pe_resource_t *then_rsc, + char *then_task, pe_action_t *then_action, + uint32_t flags, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__order_stops_before_shutdown(pe_node_t *node, + pe_action_t *shutdown_op); + +G_GNUC_INTERNAL +void pcmk__apply_orderings(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +void pcmk__order_after_each(pe_action_t *after, GList *list); + + +/*! + * \internal + * \brief Create a new ordering between two resource actions + * + * \param[in,out] first_rsc Resource for 'first' action + * \param[in,out] first_task Action key for 'first' action + * \param[in] then_rsc Resource for 'then' action + * \param[in,out] then_task Action key for 'then' action + * \param[in] flags Bitmask of enum pe_ordering flags + */ +#define pcmk__order_resource_actions(first_rsc, first_task, \ + then_rsc, then_task, flags) \ + pcmk__new_ordering((first_rsc), \ + pcmk__op_key((first_rsc)->id, (first_task), 0), \ + NULL, \ + (then_rsc), \ + pcmk__op_key((then_rsc)->id, (then_task), 0), \ + NULL, (flags), (first_rsc)->cluster) + +#define pcmk__order_starts(rsc1, rsc2, flags) \ + pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ + (rsc2), CRMD_ACTION_START, (flags)) + +#define pcmk__order_stops(rsc1, rsc2, flags) \ + pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ + (rsc2), CRMD_ACTION_STOP, (flags)) + + +// Ticket constraints (pcmk_sched_tickets.c) + +G_GNUC_INTERNAL +void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); + + +// Promotable clone resources (pcmk_sched_promotable.c) + +G_GNUC_INTERNAL +void pcmk__add_promotion_scores(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__require_promotion_tickets(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__set_instance_roles(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__create_promotable_actions(pe_resource_t *clone); + +G_GNUC_INTERNAL +void pcmk__promotable_restart_ordering(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__order_promotable_instances(pe_resource_t *clone); + +G_GNUC_INTERNAL +void pcmk__update_dependent_with_promotable(const pe_resource_t *primary, + pe_resource_t *dependent, + const pcmk__colocation_t *colocation); + +G_GNUC_INTERNAL +void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, + pe_resource_t *dependent, + const pcmk__colocation_t *colocation); + + +// Pacemaker Remote nodes (pcmk_sched_remote.c) + +G_GNUC_INTERNAL +bool pcmk__is_failed_remote_node(const pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, + const pe_node_t *node); + +G_GNUC_INTERNAL +pe_node_t *pcmk__connection_host_for_action(const pe_action_t *action); + +G_GNUC_INTERNAL +void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params); + +G_GNUC_INTERNAL +void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action); + + +// Primitives (pcmk_sched_primitive.c) + +G_GNUC_INTERNAL +pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer); + +G_GNUC_INTERNAL +void pcmk__primitive_create_actions(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__primitive_internal_constraints(pe_resource_t *rsc); + +G_GNUC_INTERNAL +enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action, + const pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent); + +G_GNUC_INTERNAL +void pcmk__with_primitive_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList **list); + +G_GNUC_INTERNAL +void pcmk__primitive_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList **list); + +G_GNUC_INTERNAL +void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, + bool optional); + +G_GNUC_INTERNAL +void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); + +G_GNUC_INTERNAL +void pcmk__primitive_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList *all_rscs, GHashTable *utilization); + +G_GNUC_INTERNAL +void pcmk__primitive_shutdown_lock(pe_resource_t *rsc); + + +// Groups (pcmk_sched_group.c) + +G_GNUC_INTERNAL +pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer); + +G_GNUC_INTERNAL +void pcmk__group_create_actions(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__group_internal_constraints(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__group_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent); + +G_GNUC_INTERNAL +void pcmk__with_group_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +G_GNUC_INTERNAL +void pcmk__group_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +G_GNUC_INTERNAL +void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, + const char *log_id, + GHashTable **nodes, const char *attr, + float factor, uint32_t flags); + +G_GNUC_INTERNAL +void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location); + +G_GNUC_INTERNAL +enum pe_action_flags pcmk__group_action_flags(pe_action_t *action, + const pe_node_t *node); + +G_GNUC_INTERNAL +uint32_t pcmk__group_update_ordered_actions(pe_action_t *first, + pe_action_t *then, + const pe_node_t *node, + uint32_t flags, uint32_t filter, + uint32_t type, + pe_working_set_t *data_set); + +G_GNUC_INTERNAL +GList *pcmk__group_colocated_resources(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList *colocated_rscs); + +G_GNUC_INTERNAL +void pcmk__group_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization); + +G_GNUC_INTERNAL +void pcmk__group_shutdown_lock(pe_resource_t *rsc); + + +// Clones (pcmk_sched_clone.c) + +G_GNUC_INTERNAL +pe_node_t *pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer); + +G_GNUC_INTERNAL +void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent); + +G_GNUC_INTERNAL +void pcmk__with_clone_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +G_GNUC_INTERNAL +void pcmk__clone_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +// Bundles (pcmk_sched_bundle.c) + +G_GNUC_INTERNAL +const pe_resource_t *pcmk__get_rsc_in_container(const pe_resource_t *instance); + +G_GNUC_INTERNAL +void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent); + +G_GNUC_INTERNAL +void pcmk__with_bundle_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +G_GNUC_INTERNAL +void pcmk__bundle_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list); + +G_GNUC_INTERNAL +void pcmk__output_bundle_actions(pe_resource_t *rsc); + + +// Clone instances or bundle replica containers (pcmk_sched_instances.c) + +G_GNUC_INTERNAL +void pcmk__assign_instances(pe_resource_t *collective, GList *instances, + int max_total, int max_per_node); + +G_GNUC_INTERNAL +void pcmk__create_instance_actions(pe_resource_t *rsc, GList *instances); + +G_GNUC_INTERNAL +bool pcmk__instance_matches(const pe_resource_t *instance, + const pe_node_t *node, enum rsc_role_e role, + bool current); + +G_GNUC_INTERNAL +pe_resource_t *pcmk__find_compatible_instance(const pe_resource_t *match_rsc, + const pe_resource_t *rsc, + enum rsc_role_e role, + bool current); + +G_GNUC_INTERNAL +uint32_t pcmk__instance_update_ordered_actions(pe_action_t *first, + pe_action_t *then, + const pe_node_t *node, + uint32_t flags, uint32_t filter, + uint32_t type, + pe_working_set_t *data_set); + +G_GNUC_INTERNAL +enum pe_action_flags pcmk__collective_action_flags(pe_action_t *action, + const GList *instances, + const pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__add_collective_constraints(GList **list, + const pe_resource_t *instance, + const pe_resource_t *collective, + bool with_this); + + +// Injections (pcmk_injections.c) + +G_GNUC_INTERNAL +xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); + +G_GNUC_INTERNAL +xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, + bool up); + +G_GNUC_INTERNAL +xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, + const char *resource, + const char *lrm_name, + const char *rclass, + const char *rtype, + const char *rprovider); + +G_GNUC_INTERNAL +void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, + const char *resource, const char *task, + guint interval_ms, int rc); + +G_GNUC_INTERNAL +xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, + lrmd_event_data_t *op, int target_rc); + + +// Nodes (pcmk_sched_nodes.c) + +G_GNUC_INTERNAL +bool pcmk__node_available(const pe_node_t *node, bool consider_score, + bool consider_guest); + +G_GNUC_INTERNAL +bool pcmk__any_node_available(GHashTable *nodes); + +G_GNUC_INTERNAL +GHashTable *pcmk__copy_node_table(GHashTable *nodes); + +G_GNUC_INTERNAL +GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node); + +G_GNUC_INTERNAL +void pcmk__apply_node_health(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc, + const pe_node_t *node); + + +// Functions applying to more than one variant (pcmk_sched_resource.c) + +G_GNUC_INTERNAL +void pcmk__set_allocation_methods(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, + const xmlNode *rsc_entry, bool active_on_node); + +G_GNUC_INTERNAL +GList *pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set); + +G_GNUC_INTERNAL +GList *pcmk__colocated_resources(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList *colocated_rscs); + +G_GNUC_INTERNAL +void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); + +G_GNUC_INTERNAL +void pcmk__output_resource_actions(pe_resource_t *rsc); + +G_GNUC_INTERNAL +bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, + bool force); + +G_GNUC_INTERNAL +bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); + +G_GNUC_INTERNAL +void pcmk__unassign_resource(pe_resource_t *rsc); + +G_GNUC_INTERNAL +bool pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node, + pe_resource_t **failed); + +G_GNUC_INTERNAL +void pcmk__sort_resources(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +gint pcmk__cmp_instance(gconstpointer a, gconstpointer b); + +G_GNUC_INTERNAL +gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b); + + +// Functions related to probes (pcmk_sched_probes.c) + +G_GNUC_INTERNAL +bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__order_probes(pe_working_set_t *data_set); + +G_GNUC_INTERNAL +bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node); + +G_GNUC_INTERNAL +void pcmk__schedule_probes(pe_working_set_t *data_set); + + +// Functions related to live migration (pcmk_sched_migration.c) + +void pcmk__create_migration_actions(pe_resource_t *rsc, + const pe_node_t *current); + +void pcmk__abort_dangling_migration(void *data, void *user_data); + +bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current); + +void pcmk__order_migration_equivalents(pe__ordering_t *order); + + +// Functions related to node utilization (pcmk_sched_utilization.c) + +G_GNUC_INTERNAL +int pcmk__compare_node_capacities(const pe_node_t *node1, + const pe_node_t *node2); + +G_GNUC_INTERNAL +void pcmk__consume_node_capacity(GHashTable *current_utilization, + const pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__release_node_capacity(GHashTable *current_utilization, + const pe_resource_t *rsc); + +G_GNUC_INTERNAL +const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc); + +G_GNUC_INTERNAL +void pcmk__create_utilization_constraints(pe_resource_t *rsc, + const GList *allowed_nodes); + +G_GNUC_INTERNAL +void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set); + +#endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_acl.c b/lib/pacemaker/pcmk_acl.c new file mode 100644 index 0000000..c2072dc --- /dev/null +++ b/lib/pacemaker/pcmk_acl.c @@ -0,0 +1,379 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdio.h> +#include <sys/types.h> +#include <pwd.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> + +#include <libxml/parser.h> +#include <libxml/tree.h> +#include <libxml/xpath.h> +#include <libxslt/transform.h> +#include <libxslt/variables.h> +#include <libxslt/xsltutils.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/common/internal.h> + +#include <pacemaker-internal.h> + +#define ACL_NS_PREFIX "http://clusterlabs.org/ns/pacemaker/access/" +#define ACL_NS_Q_PREFIX "pcmk-access-" +#define ACL_NS_Q_WRITABLE (const xmlChar *) ACL_NS_Q_PREFIX "writable" +#define ACL_NS_Q_READABLE (const xmlChar *) ACL_NS_Q_PREFIX "readable" +#define ACL_NS_Q_DENIED (const xmlChar *) ACL_NS_Q_PREFIX "denied" + +static const xmlChar *NS_WRITABLE = (const xmlChar *) ACL_NS_PREFIX "writable"; +static const xmlChar *NS_READABLE = (const xmlChar *) ACL_NS_PREFIX "readable"; +static const xmlChar *NS_DENIED = (const xmlChar *) ACL_NS_PREFIX "denied"; + +/*! + * \brief This function takes a node and marks it with the namespace + * given in the ns parameter. + * + * \param[in,out] i_node + * \param[in] ns + * \param[in,out] ret + * \param[in,out] ns_recycle_writable + * \param[in,out] ns_recycle_readable + * \param[in,out] ns_recycle_denied + */ +static void +pcmk__acl_mark_node_with_namespace(xmlNode *i_node, const xmlChar *ns, int *ret, xmlNs **ns_recycle_writable, xmlNs **ns_recycle_readable, xmlNs **ns_recycle_denied) +{ + if (ns == NS_WRITABLE) + { + if (*ns_recycle_writable == NULL) + { + *ns_recycle_writable = xmlNewNs(xmlDocGetRootElement(i_node->doc), + NS_WRITABLE, ACL_NS_Q_WRITABLE); + } + xmlSetNs(i_node, *ns_recycle_writable); + *ret = pcmk_rc_ok; + } + else if (ns == NS_READABLE) + { + if (*ns_recycle_readable == NULL) + { + *ns_recycle_readable = xmlNewNs(xmlDocGetRootElement(i_node->doc), + NS_READABLE, ACL_NS_Q_READABLE); + } + xmlSetNs(i_node, *ns_recycle_readable); + *ret = pcmk_rc_ok; + } + else if (ns == NS_DENIED) + { + if (*ns_recycle_denied == NULL) + { + *ns_recycle_denied = xmlNewNs(xmlDocGetRootElement(i_node->doc), + NS_DENIED, ACL_NS_Q_DENIED); + }; + xmlSetNs(i_node, *ns_recycle_denied); + *ret = pcmk_rc_ok; + } +} + +/*! + * \brief This function takes some XML, and annotates it with XML + * namespaces to indicate the ACL permissions. + * + * \param[in,out] xml_modify + * + * \return A standard Pacemaker return code + * Namely: + * - pcmk_rc_ok upon success, + * - pcmk_rc_already if ACLs were not applicable, + * - pcmk_rc_schema_validation if the validation schema version + * is unsupported (see note), or + * - EINVAL or ENOMEM as appropriate; + * + * \note This function is recursive + */ +static int +pcmk__acl_annotate_permissions_recursive(xmlNode *xml_modify) +{ + + static xmlNs *ns_recycle_writable = NULL, + *ns_recycle_readable = NULL, + *ns_recycle_denied = NULL; + static const xmlDoc *prev_doc = NULL; + + xmlNode *i_node = NULL; + const xmlChar *ns; + int ret = EINVAL; // nodes have not been processed yet + + if (prev_doc == NULL || prev_doc != xml_modify->doc) { + prev_doc = xml_modify->doc; + ns_recycle_writable = ns_recycle_readable = ns_recycle_denied = NULL; + } + + for (i_node = xml_modify; i_node != NULL; i_node = i_node->next) { + switch (i_node->type) { + case XML_ELEMENT_NODE: + pcmk__set_xml_doc_flag(i_node, pcmk__xf_tracking); + + if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_read)) { + ns = NS_DENIED; + } else if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_write)) { + ns = NS_READABLE; + } else { + ns = NS_WRITABLE; + } + pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); + /* XXX recursion can be turned into plain iteration to save stack */ + if (i_node->properties != NULL) { + /* this is not entirely clear, but relies on the very same + class-hierarchy emulation that libxml2 has firmly baked in + its API/ABI */ + ret |= pcmk__acl_annotate_permissions_recursive((xmlNodePtr) i_node->properties); + } + if (i_node->children != NULL) { + ret |= pcmk__acl_annotate_permissions_recursive(i_node->children); + } + break; + case XML_ATTRIBUTE_NODE: + /* we can utilize that parent has already been assigned the ns */ + if (!pcmk__check_acl(i_node->parent, + (const char *) i_node->name, + pcmk__xf_acl_read)) { + ns = NS_DENIED; + } else if (!pcmk__check_acl(i_node, + (const char *) i_node->name, + pcmk__xf_acl_write)) { + ns = NS_READABLE; + } else { + ns = NS_WRITABLE; + } + pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); + break; + case XML_COMMENT_NODE: + /* we can utilize that parent has already been assigned the ns */ + if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_read)) + { + ns = NS_DENIED; + } + else if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_write)) + { + ns = NS_READABLE; + } + else + { + ns = NS_WRITABLE; + } + pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); + break; + default: + break; + } + } + + return ret; +} + +int +pcmk__acl_annotate_permissions(const char *cred, const xmlDoc *cib_doc, + xmlDoc **acl_evaled_doc) +{ + int ret, version; + xmlNode *target, *comment; + const char *validation; + + CRM_CHECK(cred != NULL, return EINVAL); + CRM_CHECK(cib_doc != NULL, return EINVAL); + CRM_CHECK(acl_evaled_doc != NULL, return EINVAL); + + /* avoid trivial accidental XML injection */ + if (strpbrk(cred, "<>&") != NULL) { + return EINVAL; + } + + if (!pcmk_acl_required(cred)) { + /* nothing to evaluate */ + return pcmk_rc_already; + } + + // @COMPAT xmlDocGetRootElement() requires non-const in libxml2 < 2.9.2 + + validation = crm_element_value(xmlDocGetRootElement((xmlDoc *) cib_doc), + XML_ATTR_VALIDATION); + version = get_schema_version(validation); + if (get_schema_version(PCMK__COMPAT_ACL_2_MIN_INCL) > version) { + return pcmk_rc_schema_validation; + } + + target = copy_xml(xmlDocGetRootElement((xmlDoc *) cib_doc)); + if (target == NULL) { + return EINVAL; + } + + pcmk__enable_acl(target, target, cred); + + ret = pcmk__acl_annotate_permissions_recursive(target); + + if (ret == pcmk_rc_ok) { + char* credentials = crm_strdup_printf("ACLs as evaluated for user %s", cred); + comment = xmlNewDocComment(target->doc, (pcmkXmlStr) credentials); + free(credentials); + if (comment == NULL) { + xmlFreeNode(target); + return EINVAL; + } + xmlAddPrevSibling(xmlDocGetRootElement(target->doc), comment); + *acl_evaled_doc = target->doc; + return pcmk_rc_ok; + } else { + xmlFreeNode(target); + return ret; //for now, it should be some kind of error + } +} + +int +pcmk__acl_evaled_render(xmlDoc *annotated_doc, enum pcmk__acl_render_how how, + xmlChar **doc_txt_ptr) +{ + xmlDoc *xslt_doc; + xsltStylesheet *xslt; + xsltTransformContext *xslt_ctxt; + xmlDoc *res; + char *sfile; + static const char *params_namespace[] = { + "accessrendercfg:c-writable", ACL_NS_Q_PREFIX "writable:", + "accessrendercfg:c-readable", ACL_NS_Q_PREFIX "readable:", + "accessrendercfg:c-denied", ACL_NS_Q_PREFIX "denied:", + "accessrendercfg:c-reset", "", + "accessrender:extra-spacing", "no", + "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX, + NULL + }, *params_useansi[] = { + /* start with hard-coded defaults, then adapt per the template ones */ + "accessrendercfg:c-writable", "\x1b[32m", + "accessrendercfg:c-readable", "\x1b[34m", + "accessrendercfg:c-denied", "\x1b[31m", + "accessrendercfg:c-reset", "\x1b[0m", + "accessrender:extra-spacing", "no", + "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX, + NULL + }, *params_noansi[] = { + "accessrendercfg:c-writable", "vvv---[ WRITABLE ]---vvv", + "accessrendercfg:c-readable", "vvv---[ READABLE ]---vvv", + "accessrendercfg:c-denied", "vvv---[ ~DENIED~ ]---vvv", + "accessrendercfg:c-reset", "", + "accessrender:extra-spacing", "yes", + "accessrender:self-reproducing-prefix", "", + NULL + }; + const char **params; + int ret; + xmlParserCtxtPtr parser_ctxt; + + /* unfortunately, the input (coming from CIB originally) was parsed with + blanks ignored, and since the output is a conversion of XML to text + format (we would be covered otherwise thanks to implicit + pretty-printing), we need to dump the tree to string output first, + only to subsequently reparse it -- this time with blanks honoured */ + xmlChar *annotated_dump; + int dump_size; + + CRM_ASSERT(how != pcmk__acl_render_none); + + // Color is the default render mode for terminals; text is default otherwise + if (how == pcmk__acl_render_default) { + if (isatty(STDOUT_FILENO)) { + how = pcmk__acl_render_color; + } else { + how = pcmk__acl_render_text; + } + } + + xmlDocDumpFormatMemory(annotated_doc, &annotated_dump, &dump_size, 1); + res = xmlReadDoc(annotated_dump, "on-the-fly-access-render", NULL, + XML_PARSE_NONET); + CRM_ASSERT(res != NULL); + xmlFree(annotated_dump); + xmlFreeDoc(annotated_doc); + annotated_doc = res; + + sfile = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_base_xslt, + "access-render-2"); + parser_ctxt = xmlNewParserCtxt(); + + CRM_ASSERT(sfile != NULL); + CRM_ASSERT(parser_ctxt != NULL); + + xslt_doc = xmlCtxtReadFile(parser_ctxt, sfile, NULL, XML_PARSE_NONET); + + xslt = xsltParseStylesheetDoc(xslt_doc); /* acquires xslt_doc! */ + if (xslt == NULL) { + crm_crit("Problem in parsing %s", sfile); + return EINVAL; + } + free(sfile); + sfile = NULL; + xmlFreeParserCtxt(parser_ctxt); + + xslt_ctxt = xsltNewTransformContext(xslt, annotated_doc); + CRM_ASSERT(xslt_ctxt != NULL); + + switch (how) { + case pcmk__acl_render_namespace: + params = params_namespace; + break; + case pcmk__acl_render_text: + params = params_noansi; + break; + default: + /* pcmk__acl_render_color is the only remaining option. + * The compiler complains about params possibly uninitialized if we + * don't use default here. + */ + params = params_useansi; + break; + } + + xsltQuoteUserParams(xslt_ctxt, params); + + res = xsltApplyStylesheetUser(xslt, annotated_doc, NULL, + NULL, NULL, xslt_ctxt); + + xmlFreeDoc(annotated_doc); + annotated_doc = NULL; + xsltFreeTransformContext(xslt_ctxt); + xslt_ctxt = NULL; + + if (how == pcmk__acl_render_color && params != params_useansi) { + char **param_i = (char **) params; + do { + free(*param_i); + } while (*param_i++ != NULL); + free(params); + } + + if (res == NULL) { + ret = EINVAL; + } else { + int doc_txt_len; + int temp = xsltSaveResultToString(doc_txt_ptr, &doc_txt_len, res, xslt); + xmlFreeDoc(res); + if (temp == 0) { + ret = pcmk_rc_ok; + } else { + ret = EINVAL; + } + } + xsltFreeStylesheet(xslt); + return ret; +} diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c new file mode 100644 index 0000000..6002cd4 --- /dev/null +++ b/lib/pacemaker/pcmk_cluster_queries.c @@ -0,0 +1,900 @@ +/* + * Copyright 2020-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> // gboolean, GMainLoop, etc. +#include <libxml/tree.h> // xmlNode + +#include <pacemaker.h> +#include <pacemaker-internal.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/cib/internal.h> +#include <crm/msg_xml.h> +#include <crm/common/output_internal.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/common/iso8601.h> +#include <crm/common/ipc_controld.h> +#include <crm/common/ipc_pacemakerd.h> + +//! Object to store node info from the controller API +typedef struct { + /* Adapted from pcmk_controld_api_reply_t:data:node_info. + * (char **) are convenient here for use within callbacks: we can skip + * copying strings unless the caller passes a non-NULL value. + */ + uint32_t id; + char **node_name; + char **uuid; + char **state; + bool have_quorum; + bool is_remote; +} node_info_t; + +//! Object to store API results, a timeout, and an output object +typedef struct { + pcmk__output_t *out; + bool show_output; + int rc; + unsigned int message_timeout_ms; + enum pcmk_pacemakerd_state pcmkd_state; + node_info_t node_info; +} data_t; + +/*! + * \internal + * \brief Validate that an IPC API event is a good reply + * + * \param[in,out] data API results and options + * \param[in] api IPC API connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * + * \return Standard Pacemaker return code + */ +static int +validate_reply_event(data_t *data, const pcmk_ipc_api_t *api, + enum pcmk_ipc_event event_type, crm_exit_t status) +{ + pcmk__output_t *out = data->out; + + switch (event_type) { + case pcmk_ipc_event_reply: + break; + + case pcmk_ipc_event_disconnect: + if (data->rc == ECONNRESET) { // Unexpected + out->err(out, "error: Lost connection to %s", + pcmk_ipc_name(api, true)); + } + // Nothing bad but not the reply we're looking for + return ENOTSUP; + + default: + // Ditto + return ENOTSUP; + } + + if (status != CRM_EX_OK) { + out->err(out, "error: Bad reply from %s: %s", + pcmk_ipc_name(api, true), crm_exit_str(status)); + data->rc = EBADMSG; + return data->rc; + } + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Validate that a controller API event is a good reply of expected type + * + * \param[in,out] data API results and options + * \param[in] api Controller connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in] event_data Event-specific data + * \param[in] expected_type Expected reply type + * + * \return Standard Pacemaker return code + */ +static int +validate_controld_reply(data_t *data, const pcmk_ipc_api_t *api, + enum pcmk_ipc_event event_type, crm_exit_t status, + const void *event_data, + enum pcmk_controld_api_reply expected_type) +{ + pcmk__output_t *out = data->out; + int rc = pcmk_rc_ok; + const pcmk_controld_api_reply_t *reply = NULL; + + rc = validate_reply_event(data, api, event_type, status); + if (rc != pcmk_rc_ok) { + return rc; + } + + reply = (const pcmk_controld_api_reply_t *) event_data; + + if (reply->reply_type != expected_type) { + out->err(out, "error: Unexpected reply type '%s' from controller", + pcmk__controld_api_reply2str(reply->reply_type)); + data->rc = EBADMSG; + return data->rc; + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Validate that a \p pacemakerd API event is a good reply of expected + * type + * + * \param[in,out] data API results and options + * \param[in] api \p pacemakerd connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in] event_data Event-specific data + * \param[in] expected_type Expected reply type + * + * \return Standard Pacemaker return code + */ +static int +validate_pcmkd_reply(data_t *data, const pcmk_ipc_api_t *api, + enum pcmk_ipc_event event_type, crm_exit_t status, + const void *event_data, + enum pcmk_pacemakerd_api_reply expected_type) +{ + pcmk__output_t *out = data->out; + const pcmk_pacemakerd_api_reply_t *reply = NULL; + int rc = validate_reply_event(data, api, event_type, status); + + if (rc != pcmk_rc_ok) { + return rc; + } + + reply = (const pcmk_pacemakerd_api_reply_t *) event_data; + + if (reply->reply_type != expected_type) { + out->err(out, "error: Unexpected reply type '%s' from pacemakerd", + pcmk__pcmkd_api_reply2str(reply->reply_type)); + data->rc = EBADMSG; + return data->rc; + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Process a controller status IPC event + * + * \param[in,out] controld_api Controller connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing + * event-specific data + * \param[in,out] user_data \p data_t object for API results and options + */ +static void +controller_status_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + data_t *data = (data_t *) user_data; + pcmk__output_t *out = data->out; + const pcmk_controld_api_reply_t *reply = NULL; + + int rc = validate_controld_reply(data, controld_api, event_type, status, + event_data, pcmk_controld_reply_ping); + + if (rc != pcmk_rc_ok) { + return; + } + + reply = (const pcmk_controld_api_reply_t *) event_data; + out->message(out, "health", + reply->data.ping.sys_from, reply->host_from, + reply->data.ping.fsa_state, reply->data.ping.result); + data->rc = pcmk_rc_ok; +} + +/*! + * \internal + * \brief Process a designated controller IPC event + * + * \param[in,out] controld_api Controller connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing + * event-specific data + * \param[in,out] user_data \p data_t object for API results and options + */ +static void +designated_controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, + void *user_data) +{ + data_t *data = (data_t *) user_data; + pcmk__output_t *out = data->out; + const pcmk_controld_api_reply_t *reply = NULL; + + int rc = validate_controld_reply(data, controld_api, event_type, status, + event_data, pcmk_controld_reply_ping); + + if (rc != pcmk_rc_ok) { + return; + } + + reply = (const pcmk_controld_api_reply_t *) event_data; + out->message(out, "dc", reply->host_from); + data->rc = pcmk_rc_ok; +} + +/*! + * \internal + * \brief Process a node info IPC event + * + * \param[in,out] controld_api Controller connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing + * event-specific data + * \param[in,out] user_data \p data_t object for API results and options + */ +static void +node_info_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, void *user_data) +{ + data_t *data = (data_t *) user_data; + pcmk__output_t *out = data->out; + + const pcmk_controld_api_reply_t *reply = NULL; + + int rc = validate_controld_reply(data, controld_api, event_type, status, + event_data, pcmk_controld_reply_info); + + if (rc != pcmk_rc_ok) { + return; + } + + reply = (const pcmk_controld_api_reply_t *) event_data; + + if (reply->data.node_info.uname == NULL) { + out->err(out, "Node is not known to cluster"); + data->rc = pcmk_rc_node_unknown; + return; + } + + data->node_info.have_quorum = reply->data.node_info.have_quorum; + data->node_info.is_remote = reply->data.node_info.is_remote; + data->node_info.id = (uint32_t) reply->data.node_info.id; + + pcmk__str_update(data->node_info.node_name, reply->data.node_info.uname); + pcmk__str_update(data->node_info.uuid, reply->data.node_info.uuid); + pcmk__str_update(data->node_info.state, reply->data.node_info.state); + + if (data->show_output) { + out->message(out, "node-info", + reply->data.node_info.id, reply->data.node_info.uname, + reply->data.node_info.uuid, reply->data.node_info.state, + reply->data.node_info.have_quorum, + reply->data.node_info.is_remote); + } + + data->rc = pcmk_rc_ok; +} + +/*! + * \internal + * \brief Process a \p pacemakerd status IPC event + * + * \param[in,out] pacemakerd_api \p pacemakerd connection + * \param[in] event_type Type of event that occurred + * \param[in] status Event status + * \param[in,out] event_data \p pcmk_pacemakerd_api_reply_t object + * containing event-specific data + * \param[in,out] user_data \p data_t object for API results and options + */ +static void +pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + data_t *data = user_data; + pcmk__output_t *out = data->out; + const pcmk_pacemakerd_api_reply_t *reply = NULL; + + int rc = validate_pcmkd_reply(data, pacemakerd_api, event_type, status, + event_data, pcmk_pacemakerd_reply_ping); + + if (rc != pcmk_rc_ok) { + return; + } + + // Parse desired information from reply + reply = (const pcmk_pacemakerd_api_reply_t *) event_data; + + data->pcmkd_state = reply->data.ping.state; + data->rc = pcmk_rc_ok; + + if (!data->show_output) { + return; + } + + if (reply->data.ping.status == pcmk_rc_ok) { + out->message(out, "pacemakerd-health", + reply->data.ping.sys_from, reply->data.ping.state, NULL, + reply->data.ping.last_good); + } else { + out->message(out, "pacemakerd-health", + reply->data.ping.sys_from, reply->data.ping.state, + "query failed", time(NULL)); + } +} + +static pcmk_ipc_api_t * +ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, + enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok) +{ + int rc; + pcmk__output_t *out = data->out; + pcmk_ipc_api_t *api = NULL; + + rc = pcmk_new_ipc_api(&api, server); + if (api == NULL) { + out->err(out, "error: Could not connect to %s: %s", + pcmk_ipc_name(api, true), + pcmk_rc_str(rc)); + data->rc = rc; + return NULL; + } + if (cb != NULL) { + pcmk_register_ipc_callback(api, cb, data); + } + + rc = pcmk_connect_ipc(api, dispatch_type); + + if (rc != pcmk_rc_ok) { + if (rc == EREMOTEIO) { + data->pcmkd_state = pcmk_pacemakerd_state_remote; + if (eremoteio_ok) { + /* EREMOTEIO may be expected and acceptable for some callers + * on a Pacemaker Remote node + */ + rc = pcmk_rc_ok; + } else { + out->err(out, "error: Could not connect to %s: %s", + pcmk_ipc_name(api, true), pcmk_rc_str(rc)); + } + } + data->rc = rc; + pcmk_free_ipc_api(api); + return NULL; + } + + return api; +} + +/*! + * \internal + * \brief Poll an IPC API connection until timeout or a reply is received + * + * \param[in,out] data API results and options + * \param[in,out] api IPC API connection + * \param[in] on_node If not \p NULL, name of the node to poll (used only + * for logging) + * + * \note Sets the \p rc member of \p data on error + */ +static void +poll_until_reply(data_t *data, pcmk_ipc_api_t *api, const char *on_node) +{ + pcmk__output_t *out = data->out; + + uint64_t start_nsec = qb_util_nano_current_get(); + uint64_t end_nsec = start_nsec; + uint64_t elapsed_ms = 0; + uint64_t remaining_ms = data->message_timeout_ms; + + while (remaining_ms > 0) { + int rc = pcmk_poll_ipc(api, remaining_ms); + + if (rc == EAGAIN) { + // Poll timed out + break; + } + + if (rc != pcmk_rc_ok) { + out->err(out, "error: Failed to poll %s API%s%s: %s", + pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "", + pcmk__s(on_node, ""), pcmk_rc_str(rc)); + data->rc = rc; + return; + } + + pcmk_dispatch_ipc(api); + + if (data->rc != EAGAIN) { + // Received a reply + return; + } + end_nsec = qb_util_nano_current_get(); + elapsed_ms = (end_nsec - start_nsec) / QB_TIME_NS_IN_MSEC; + remaining_ms = data->message_timeout_ms - elapsed_ms; + } + + out->err(out, + "error: Timed out after %ums waiting for reply from %s API%s%s", + data->message_timeout_ms, pcmk_ipc_name(api, true), + (on_node != NULL)? " on " : "", pcmk__s(on_node, "")); + data->rc = EAGAIN; +} + +/*! + * \internal + * \brief Get and output controller status + * + * \param[in,out] out Output object + * \param[in] node_name Name of node whose status is desired + * (\p NULL for DC) + * \param[in] message_timeout_ms How long to wait for a reply from the + * \p pacemaker-controld API. If 0, + * \p pcmk_ipc_dispatch_sync will be used. + * Otherwise, \p pcmk_ipc_dispatch_poll will + * be used. + * + * \return Standard Pacemaker return code + */ +int +pcmk__controller_status(pcmk__output_t *out, const char *node_name, + unsigned int message_timeout_ms) +{ + data_t data = { + .out = out, + .rc = EAGAIN, + .message_timeout_ms = message_timeout_ms, + }; + enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; + pcmk_ipc_api_t *controld_api = NULL; + + if (message_timeout_ms == 0) { + dispatch_type = pcmk_ipc_dispatch_sync; + } + controld_api = ipc_connect(&data, pcmk_ipc_controld, + controller_status_event_cb, dispatch_type, + false); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, node_name); + if (rc != pcmk_rc_ok) { + out->err(out, "error: Could not ping controller API on %s: %s", + pcmk__s(node_name, "DC"), pcmk_rc_str(rc)); + data.rc = rc; + } + + if (dispatch_type == pcmk_ipc_dispatch_poll) { + poll_until_reply(&data, controld_api, pcmk__s(node_name, "DC")); + } + pcmk_free_ipc_api(controld_api); + } + + return data.rc; +} + + +// Documented in header +int +pcmk_controller_status(xmlNodePtr *xml, const char *node_name, + unsigned int message_timeout_ms) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__controller_status(out, node_name, message_timeout_ms); + pcmk__xml_output_finish(out, xml); + return rc; +} + +/*! + * \internal + * \brief Get and output designated controller node name + * + * \param[in,out] out Output object + * \param[in] message_timeout_ms How long to wait for a reply from the + * \p pacemaker-controld API. If 0, + * \p pcmk_ipc_dispatch_sync will be used. + * Otherwise, \p pcmk_ipc_dispatch_poll will + * be used. + * + * \return Standard Pacemaker return code + */ +int +pcmk__designated_controller(pcmk__output_t *out, + unsigned int message_timeout_ms) +{ + data_t data = { + .out = out, + .rc = EAGAIN, + .message_timeout_ms = message_timeout_ms, + }; + enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; + pcmk_ipc_api_t *controld_api = NULL; + + if (message_timeout_ms == 0) { + dispatch_type = pcmk_ipc_dispatch_sync; + } + controld_api = ipc_connect(&data, pcmk_ipc_controld, + designated_controller_event_cb, dispatch_type, + false); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, NULL); + if (rc != pcmk_rc_ok) { + out->err(out, "error: Could not ping controller API on DC: %s", + pcmk_rc_str(rc)); + data.rc = rc; + } + + if (dispatch_type == pcmk_ipc_dispatch_poll) { + poll_until_reply(&data, controld_api, "DC"); + } + pcmk_free_ipc_api(controld_api); + } + + return data.rc; +} + +// Documented in header +int +pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__designated_controller(out, message_timeout_ms); + pcmk__xml_output_finish(out, xml); + return rc; +} + +/*! + * \internal + * \brief Get and optionally output node info corresponding to a node ID from + * the controller + * + * \param[in,out] out Output object + * \param[in,out] node_id ID of node whose name to get. If \p NULL + * or 0, get the local node name. If not + * \p NULL, store the true node ID here on + * success. + * \param[out] node_name If not \p NULL, where to store the node + * name + * \param[out] uuid If not \p NULL, where to store the node + * UUID + * \param[out] state If not \p NULL, where to store the + * membership state + * \param[out] is_remote If not \p NULL, where to store whether the + * node is a Pacemaker Remote node + * \param[out] have_quorum If not \p NULL, where to store whether the + * node has quorum + * \param[in] show_output Whether to show the node info + * \param[in] message_timeout_ms How long to wait for a reply from the + * \p pacemaker-controld API. If 0, + * \p pcmk_ipc_dispatch_sync will be used. + * Otherwise, \p pcmk_ipc_dispatch_poll will + * be used. + * + * \return Standard Pacemaker return code + * + * \note The caller is responsible for freeing \p *node_name, \p *uuid, and + * \p *state using \p free(). + */ +int +pcmk__query_node_info(pcmk__output_t *out, uint32_t *node_id, char **node_name, + char **uuid, char **state, bool *have_quorum, + bool *is_remote, bool show_output, + unsigned int message_timeout_ms) +{ + data_t data = { + .out = out, + .show_output = show_output, + .rc = EAGAIN, + .message_timeout_ms = message_timeout_ms, + .node_info = { + .id = (node_id == NULL)? 0 : *node_id, + .node_name = node_name, + .uuid = uuid, + .state = state, + }, + }; + enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; + pcmk_ipc_api_t *controld_api = NULL; + + if (node_name != NULL) { + *node_name = NULL; + } + if (uuid != NULL) { + *uuid = NULL; + } + if (state != NULL) { + *state = NULL; + } + + if (message_timeout_ms == 0) { + dispatch_type = pcmk_ipc_dispatch_sync; + } + controld_api = ipc_connect(&data, pcmk_ipc_controld, node_info_event_cb, + dispatch_type, false); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_node_info(controld_api, + (node_id != NULL)? *node_id : 0); + + if (rc != pcmk_rc_ok) { + out->err(out, + "error: Could not send request to controller API on local " + "node: %s", pcmk_rc_str(rc)); + data.rc = rc; + } + + if (dispatch_type == pcmk_ipc_dispatch_poll) { + poll_until_reply(&data, controld_api, "local node"); + } + pcmk_free_ipc_api(controld_api); + } + + if (data.rc != pcmk_rc_ok) { + return data.rc; + } + + // String outputs are set in callback + if (node_id != NULL) { + *node_id = data.node_info.id; + } + if (have_quorum != NULL) { + *have_quorum = data.node_info.have_quorum; + } + if (is_remote != NULL) { + *is_remote = data.node_info.is_remote; + } + + return data.rc; +} + +// Documented in header +int +pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name, + char **uuid, char **state, bool *have_quorum, + bool *is_remote, bool show_output, + unsigned int message_timeout_ms) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + CRM_ASSERT(node_name != NULL); + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__query_node_info(out, node_id, node_name, uuid, state, + have_quorum, is_remote, show_output, + message_timeout_ms); + pcmk__xml_output_finish(out, xml); + return rc; +} + +/*! + * \internal + * \brief Get and optionally output \p pacemakerd status + * + * \param[in,out] out Output object + * \param[in] ipc_name IPC name for request + * \param[in] message_timeout_ms How long to wait for a reply from the + * \p pacemakerd API. If 0, + * \p pcmk_ipc_dispatch_sync will be used. + * Otherwise, \p pcmk_ipc_dispatch_poll will + * be used. + * \param[in] show_output Whether to output the \p pacemakerd state + * \param[out] state Where to store the \p pacemakerd state, if + * not \p NULL + * + * \return Standard Pacemaker return code + * + * \note This function sets \p state to \p pcmk_pacemakerd_state_remote and + * returns \p pcmk_rc_ok if the IPC connection attempt returns + * \p EREMOTEIO. That code indicates that this is a Pacemaker Remote node + * with \p pacemaker-remoted running. The node may be connected to the + * cluster. + */ +int +pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, + unsigned int message_timeout_ms, bool show_output, + enum pcmk_pacemakerd_state *state) +{ + data_t data = { + .out = out, + .show_output = show_output, + .rc = EAGAIN, + .message_timeout_ms = message_timeout_ms, + .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; + enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; + pcmk_ipc_api_t *pacemakerd_api = NULL; + + if (message_timeout_ms == 0) { + dispatch_type = pcmk_ipc_dispatch_sync; + } + pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, + pacemakerd_event_cb, dispatch_type, true); + + if (pacemakerd_api != NULL) { + int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); + if (rc != pcmk_rc_ok) { + out->err(out, "error: Could not ping launcher API: %s", + pcmk_rc_str(rc)); + data.rc = rc; + } + + if (dispatch_type == pcmk_ipc_dispatch_poll) { + poll_until_reply(&data, pacemakerd_api, NULL); + } + pcmk_free_ipc_api(pacemakerd_api); + + } else if ((data.pcmkd_state == pcmk_pacemakerd_state_remote) + && show_output) { + // No API connection so the callback wasn't run + out->message(out, "pacemakerd-health", + NULL, data.pcmkd_state, NULL, time(NULL)); + } + + if (state != NULL) { + *state = data.pcmkd_state; + } + return data.rc; +} + +// Documented in header +int +pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, + unsigned int message_timeout_ms) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__pacemakerd_status(out, ipc_name, message_timeout_ms, true, NULL); + pcmk__xml_output_finish(out, xml); + return rc; +} + +/* user data for looping through remote node xpath searches */ +struct node_data { + pcmk__output_t *out; + int found; + const char *field; /* XML attribute to check for node name */ + const char *type; + gboolean bash_export; +}; + +static void +remote_node_print_helper(xmlNode *result, void *user_data) +{ + struct node_data *data = user_data; + pcmk__output_t *out = data->out; + const char *name = crm_element_value(result, XML_ATTR_UNAME); + const char *id = crm_element_value(result, data->field); + + // node name and node id are the same for remote/guest nodes + out->message(out, "crmadmin-node", data->type, + name ? name : id, + id, + data->bash_export); + data->found++; +} + +// \return Standard Pacemaker return code +int +pcmk__list_nodes(pcmk__output_t *out, const char *node_types, + gboolean bash_export) +{ + xmlNode *xml_node = NULL; + int rc; + + rc = cib__signon_query(out, NULL, &xml_node); + + if (rc == pcmk_rc_ok) { + struct node_data data = { + .out = out, + .found = 0, + .bash_export = bash_export + }; + + out->begin_list(out, NULL, NULL, "nodes"); + + if (!pcmk__str_empty(node_types) && strstr(node_types, "all")) { + node_types = NULL; + } + + if (pcmk__str_empty(node_types) || strstr(node_types, "cluster")) { + data.field = "id"; + data.type = "cluster"; + crm_foreach_xpath_result(xml_node, PCMK__XP_MEMBER_NODE_CONFIG, + remote_node_print_helper, &data); + } + + if (pcmk__str_empty(node_types) || strstr(node_types, "guest")) { + data.field = "value"; + data.type = "guest"; + crm_foreach_xpath_result(xml_node, PCMK__XP_GUEST_NODE_CONFIG, + remote_node_print_helper, &data); + } + + if (pcmk__str_empty(node_types) || !pcmk__strcmp(node_types, ",|^remote", pcmk__str_regex)) { + data.field = "id"; + data.type = "remote"; + crm_foreach_xpath_result(xml_node, PCMK__XP_REMOTE_NODE_CONFIG, + remote_node_print_helper, &data); + } + + out->end_list(out); + + if (data.found == 0) { + out->info(out, "No nodes configured"); + } + + free_xml(xml_node); + } + + return rc; +} + +int +pcmk_list_nodes(xmlNodePtr *xml, const char *node_types) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__list_nodes(out, node_types, FALSE); + pcmk__xml_output_finish(out, xml); + return rc; +} diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c new file mode 100644 index 0000000..7a0490f --- /dev/null +++ b/lib/pacemaker/pcmk_fence.c @@ -0,0 +1,626 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/common/mainloop.h> +#include <crm/common/results.h> +#include <crm/common/output.h> +#include <crm/common/output_internal.h> +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> + +#include <glib.h> +#include <libxml/tree.h> +#include <pacemaker.h> +#include <pacemaker-internal.h> + +static const int st_opts = st_opt_sync_call | st_opt_allow_suicide; + +static GMainLoop *mainloop = NULL; + +static struct { + stonith_t *st; + const char *target; + const char *action; + char *name; + unsigned int timeout; + unsigned int tolerance; + int delay; + pcmk__action_result_t result; +} async_fence_data = { NULL, }; + +static int +handle_level(stonith_t *st, const char *target, int fence_level, + const stonith_key_value_t *devices, bool added) +{ + const char *node = NULL; + const char *pattern = NULL; + const char *name = NULL; + char *value = NULL; + int rc = pcmk_rc_ok; + + if (target == NULL) { + // Not really possible, but makes static analysis happy + return EINVAL; + } + + /* Determine if targeting by attribute, node name pattern or node name */ + value = strchr(target, '='); + if (value != NULL) { + name = target; + *value++ = '\0'; + } else if (*target == '@') { + pattern = target + 1; + } else { + node = target; + } + + /* Register or unregister level as appropriate */ + if (added) { + rc = st->cmds->register_level_full(st, st_opts, node, pattern, + name, value, fence_level, + devices); + } else { + rc = st->cmds->remove_level_full(st, st_opts, node, pattern, + name, value, fence_level); + } + + return pcmk_legacy2rc(rc); +} + +static stonith_history_t * +reduce_fence_history(stonith_history_t *history) +{ + stonith_history_t *new, *hp, *np; + + if (!history) { + return history; + } + + new = history; + hp = new->next; + new->next = NULL; + + while (hp) { + stonith_history_t *hp_next = hp->next; + + hp->next = NULL; + + for (np = new; ; np = np->next) { + if ((hp->state == st_done) || (hp->state == st_failed)) { + /* action not in progress */ + if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) && + pcmk__str_eq(hp->action, np->action, pcmk__str_none) && + (hp->state == np->state) && + ((hp->state == st_done) || + pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) { + /* purge older hp */ + stonith_history_free(hp); + break; + } + } + + if (!np->next) { + np->next = hp; + break; + } + } + hp = hp_next; + } + + return new; +} + +static void +notify_callback(stonith_t * st, stonith_event_t * e) +{ + if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) + && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) { + + pcmk__set_result(&async_fence_data.result, + stonith__event_exit_status(e), + stonith__event_execution_status(e), + stonith__event_exit_reason(e)); + g_main_loop_quit(mainloop); + } +} + +static void +fence_callback(stonith_t * stonith, stonith_callback_data_t * data) +{ + pcmk__set_result(&async_fence_data.result, stonith__exit_status(data), + stonith__execution_status(data), + stonith__exit_reason(data)); + g_main_loop_quit(mainloop); +} + +static gboolean +async_fence_helper(gpointer user_data) +{ + stonith_t *st = async_fence_data.st; + int call_id = 0; + int rc = stonith_api_connect_retry(st, async_fence_data.name, 10); + + if (rc != pcmk_ok) { + g_main_loop_quit(mainloop); + pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, + PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc)); + return TRUE; + } + + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback); + + call_id = st->cmds->fence_with_delay(st, + st_opt_allow_suicide, + async_fence_data.target, + async_fence_data.action, + async_fence_data.timeout/1000, + async_fence_data.tolerance/1000, + async_fence_data.delay); + + if (call_id < 0) { + g_main_loop_quit(mainloop); + pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, + PCMK_EXEC_ERROR, pcmk_strerror(call_id)); + return TRUE; + } + + st->cmds->register_callback(st, + call_id, + (async_fence_data.timeout/1000 + + (async_fence_data.delay > 0 ? async_fence_data.delay : 0)), + st_opt_timeout_updates, NULL, "callback", fence_callback); + + return TRUE; +} + +int +pcmk__request_fencing(stonith_t *st, const char *target, const char *action, + const char *name, unsigned int timeout, + unsigned int tolerance, int delay, char **reason) +{ + crm_trigger_t *trig; + int rc = pcmk_rc_ok; + + async_fence_data.st = st; + async_fence_data.name = strdup(name); + async_fence_data.target = target; + async_fence_data.action = action; + async_fence_data.timeout = timeout; + async_fence_data.tolerance = tolerance; + async_fence_data.delay = delay; + pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN, + NULL); + + trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL); + mainloop_set_trigger(trig); + + mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + + free(async_fence_data.name); + + if (reason != NULL) { + // Give the caller ownership of the exit reason + *reason = async_fence_data.result.exit_reason; + async_fence_data.result.exit_reason = NULL; + } + rc = stonith__result2rc(&async_fence_data.result); + pcmk__reset_result(&async_fence_data.result); + return rc; +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_request_fencing(stonith_t *st, const char *target, const char *action, + const char *name, unsigned int timeout, + unsigned int tolerance, int delay, char **reason) +{ + return pcmk__request_fencing(st, target, action, name, timeout, tolerance, + delay, reason); +} +#endif + +int +pcmk__fence_history(pcmk__output_t *out, stonith_t *st, const char *target, + unsigned int timeout, int verbose, bool broadcast, + bool cleanup) +{ + stonith_history_t *history = NULL, *hp, *latest = NULL; + int rc = pcmk_rc_ok; + int opts = 0; + + if (cleanup) { + out->info(out, "cleaning up fencing-history%s%s", + target ? " for node " : "", target ? target : ""); + } + if (broadcast) { + out->info(out, "gather fencing-history from all nodes"); + } + + stonith__set_call_options(opts, target, st_opts); + if (cleanup) { + stonith__set_call_options(opts, target, st_opt_cleanup); + } + if (broadcast) { + stonith__set_call_options(opts, target, st_opt_broadcast); + } + rc = st->cmds->history(st, opts, + pcmk__str_eq(target, "*", pcmk__str_none)? NULL : target, + &history, timeout/1000); + + if (cleanup) { + // Cleanup doesn't return a history list + stonith_history_free(history); + return pcmk_legacy2rc(rc); + } + + out->begin_list(out, "event", "events", "Fencing history"); + + history = stonith__sort_history(history); + for (hp = history; hp; hp = hp->next) { + if (hp->state == st_done) { + latest = hp; + } + + if (out->is_quiet(out) || !verbose) { + continue; + } + + out->message(out, "stonith-event", hp, true, false, + stonith__later_succeeded(hp, history), + (uint32_t) pcmk_show_failed_detail); + out->increment_list(out); + } + + if (latest) { + if (out->is_quiet(out)) { + out->message(out, "stonith-event", latest, false, true, NULL, + (uint32_t) pcmk_show_failed_detail); + } else if (!verbose) { // already printed if verbose + out->message(out, "stonith-event", latest, false, false, NULL, + (uint32_t) pcmk_show_failed_detail); + out->increment_list(out); + } + } + + out->end_list(out); + + stonith_history_free(history); + return pcmk_legacy2rc(rc); +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_history(xmlNodePtr *xml, stonith_t *st, const char *target, + unsigned int timeout, bool quiet, int verbose, + bool broadcast, bool cleanup) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + out->quiet = quiet; + + rc = pcmk__fence_history(out, st, target, timeout, verbose, broadcast, cleanup); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_installed(pcmk__output_t *out, stonith_t *st, unsigned int timeout) +{ + stonith_key_value_t *devices = NULL; + int rc = pcmk_rc_ok; + + rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout/1000); + /* list_agents returns a negative error code or a positive number of agents. */ + if (rc < 0) { + return pcmk_legacy2rc(rc); + } + + out->begin_list(out, "fence device", "fence devices", "Installed fence devices"); + for (stonith_key_value_t *dIter = devices; dIter; dIter = dIter->next) { + out->list_item(out, "device", "%s", dIter->value); + } + out->end_list(out); + + stonith_key_value_freeall(devices, 1, 1); + return pcmk_rc_ok; +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_installed(xmlNodePtr *xml, stonith_t *st, unsigned int timeout) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_installed(out, st, timeout); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_last(pcmk__output_t *out, const char *target, bool as_nodeid) +{ + time_t when = 0; + + if (target == NULL) { + return pcmk_rc_ok; + } + + if (as_nodeid) { + when = stonith_api_time(atol(target), NULL, FALSE); + } else { + when = stonith_api_time(0, target, FALSE); + } + + return out->message(out, "last-fenced", target, when); +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_last(out, target, as_nodeid); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_list_targets(pcmk__output_t *out, stonith_t *st, + const char *device_id, unsigned int timeout) +{ + GList *targets = NULL; + char *lists = NULL; + int rc = pcmk_rc_ok; + + rc = st->cmds->list(st, st_opts, device_id, &lists, timeout/1000); + if (rc != pcmk_rc_ok) { + return pcmk_legacy2rc(rc); + } + + targets = stonith__parse_targets(lists); + + out->begin_list(out, "fence target", "fence targets", "Fence Targets"); + while (targets != NULL) { + out->list_item(out, NULL, "%s", (const char *) targets->data); + targets = targets->next; + } + out->end_list(out); + + free(lists); + return rc; +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_list_targets(xmlNodePtr *xml, stonith_t *st, const char *device_id, + unsigned int timeout) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_list_targets(out, st, device_id, timeout); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_metadata(pcmk__output_t *out, stonith_t *st, const char *agent, + unsigned int timeout) +{ + char *buffer = NULL; + int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, + timeout/1000); + + if (rc != pcmk_rc_ok) { + return pcmk_legacy2rc(rc); + } + + out->output_xml(out, "metadata", buffer); + free(buffer); + return rc; +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_metadata(xmlNodePtr *xml, stonith_t *st, const char *agent, + unsigned int timeout) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_metadata(out, st, agent, timeout); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_registered(pcmk__output_t *out, stonith_t *st, const char *target, + unsigned int timeout) +{ + stonith_key_value_t *devices = NULL; + int rc = pcmk_rc_ok; + + rc = st->cmds->query(st, st_opts, target, &devices, timeout/1000); + /* query returns a negative error code or a positive number of results. */ + if (rc < 0) { + return pcmk_legacy2rc(rc); + } + + out->begin_list(out, "fence device", "fence devices", "Registered fence devices"); + for (stonith_key_value_t *dIter = devices; dIter; dIter = dIter->next) { + out->list_item(out, "device", "%s", dIter->value); + } + out->end_list(out); + + stonith_key_value_freeall(devices, 1, 1); + + /* Return pcmk_rc_ok here, not the number of results. Callers probably + * don't care. + */ + return pcmk_rc_ok; +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_registered(xmlNodePtr *xml, stonith_t *st, const char *target, + unsigned int timeout) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_registered(out, st, target, timeout); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__fence_register_level(stonith_t *st, const char *target, int fence_level, + const stonith_key_value_t *devices) +{ + return handle_level(st, target, fence_level, devices, true); +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_register_level(stonith_t *st, const char *target, int fence_level, + const stonith_key_value_t *devices) +{ + return pcmk__fence_register_level(st, target, fence_level, devices); +} +#endif + +int +pcmk__fence_unregister_level(stonith_t *st, const char *target, int fence_level) +{ + return handle_level(st, target, fence_level, NULL, false); +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_unregister_level(stonith_t *st, const char *target, int fence_level) +{ + return pcmk__fence_unregister_level(st, target, fence_level); +} +#endif + +int +pcmk__fence_validate(pcmk__output_t *out, stonith_t *st, const char *agent, + const char *id, const stonith_key_value_t *params, + unsigned int timeout) +{ + char *output = NULL; + char *error_output = NULL; + int rc; + + rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params, + timeout/1000, &output, &error_output); + out->message(out, "validate", agent, id, output, error_output, rc); + return pcmk_legacy2rc(rc); +} + +#ifdef BUILD_PUBLIC_LIBPACEMAKER +int +pcmk_fence_validate(xmlNodePtr *xml, stonith_t *st, const char *agent, + const char *id, const stonith_key_value_t *params, + unsigned int timeout) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + stonith__register_messages(out); + + rc = pcmk__fence_validate(out, st, agent, id, params, timeout); + pcmk__xml_output_finish(out, xml); + return rc; +} +#endif + +int +pcmk__get_fencing_history(stonith_t *st, stonith_history_t **stonith_history, + enum pcmk__fence_history fence_history) +{ + int rc = pcmk_rc_ok; + + if ((st == NULL) || (st->state == stonith_disconnected)) { + rc = ENOTCONN; + } else if (fence_history != pcmk__fence_history_none) { + rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120); + + rc = pcmk_legacy2rc(rc); + if (rc != pcmk_rc_ok) { + return rc; + } + + *stonith_history = stonith__sort_history(*stonith_history); + if (fence_history == pcmk__fence_history_reduced) { + *stonith_history = reduce_fence_history(*stonith_history); + } + } + + return rc; +} diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c new file mode 100644 index 0000000..f2f172e --- /dev/null +++ b/lib/pacemaker/pcmk_graph_consumer.c @@ -0,0 +1,874 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <sys/stat.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/lrmd_internal.h> +#include <pacemaker-internal.h> + + +/* + * Functions for updating graph + */ + +/*! + * \internal + * \brief Update synapse after completed prerequisite + * + * A synapse is ready to be executed once all its prerequisite actions (inputs) + * complete. Given a completed action, check whether it is an input for a given + * synapse, and if so, mark the input as confirmed, and mark the synapse as + * ready if appropriate. + * + * \param[in,out] synapse Transition graph synapse to update + * \param[in] action_id ID of an action that completed + * + * \note The only substantial effect here is confirming synapse inputs. + * should_fire_synapse() will recalculate pcmk__synapse_ready, so the only + * thing that uses the pcmk__synapse_ready from here is + * synapse_state_str(). + */ +static void +update_synapse_ready(pcmk__graph_synapse_t *synapse, int action_id) +{ + if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { + return; // All inputs have already been confirmed + } + pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise + for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data; + + if (prereq->id == action_id) { + crm_trace("Confirming input %d of synapse %d", + action_id, synapse->id); + pcmk__set_graph_action_flags(prereq, pcmk__graph_action_confirmed); + + } else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) { + pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); + crm_trace("Synapse %d still not ready after action %d", + synapse->id, action_id); + } + } + if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { + crm_trace("Synapse %d is now ready to execute", synapse->id); + } +} + +/*! + * \internal + * \brief Update action and synapse confirmation after action completion + * + * \param[in,out] synapse Transition graph synapse that action belongs to + * \param[in] action_id ID of action that completed + */ +static void +update_synapse_confirmed(pcmk__graph_synapse_t *synapse, int action_id) +{ + bool all_confirmed = true; + + for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data; + + if (action->id == action_id) { + crm_trace("Confirmed action %d of synapse %d", + action_id, synapse->id); + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + + } else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { + all_confirmed = false; + crm_trace("Synapse %d still not confirmed after action %d", + synapse->id, action_id); + } + } + + if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) { + crm_trace("Confirmed synapse %d", synapse->id); + pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed); + } +} + +/*! + * \internal + * \brief Update the transition graph with a completed action result + * + * \param[in,out] graph Transition graph to update + * \param[in] action Action that completed + */ +void +pcmk__update_graph(pcmk__graph_t *graph, const pcmk__graph_action_t *action) +{ + for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; + + if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) { + continue; // This synapse already completed + + } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + update_synapse_confirmed(synapse, action->id); + + } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) { + update_synapse_ready(synapse, action->id); + } + } +} + + +/* + * Functions for executing graph + */ + +/* A transition graph consists of various types of actions. The library caller + * registers execution functions for each action type, which will be stored + * here. + */ +static pcmk__graph_functions_t *graph_fns = NULL; + +/*! + * \internal + * \brief Set transition graph execution functions + * + * \param[in] Execution functions to use + */ +void +pcmk__set_graph_functions(pcmk__graph_functions_t *fns) +{ + crm_debug("Setting custom functions for executing transition graphs"); + graph_fns = fns; + + CRM_ASSERT(graph_fns != NULL); + CRM_ASSERT(graph_fns->rsc != NULL); + CRM_ASSERT(graph_fns->cluster != NULL); + CRM_ASSERT(graph_fns->pseudo != NULL); + CRM_ASSERT(graph_fns->fence != NULL); +} + +/*! + * \internal + * \brief Check whether a graph synapse is ready to be executed + * + * \param[in,out] graph Transition graph that synapse is part of + * \param[in,out] synapse Synapse to check + * + * \return true if synapse is ready, false otherwise + */ +static bool +should_fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse) +{ + GList *lpc = NULL; + + pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); + for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data; + + if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) { + crm_trace("Input %d for synapse %d not yet confirmed", + prereq->id, synapse->id); + pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); + break; + + } else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) { + crm_trace("Input %d for synapse %d confirmed but failed", + prereq->id, synapse->id); + pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); + break; + } + } + if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { + crm_trace("Synapse %d is ready to execute", synapse->id); + } else { + return false; + } + + for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *a = (pcmk__graph_action_t *) lpc->data; + + if (a->type == pcmk__pseudo_graph_action) { + /* None of the below applies to pseudo ops */ + + } else if (synapse->priority < graph->abort_priority) { + crm_trace("Skipping synapse %d: priority %d is less than " + "abort priority %d", + synapse->id, synapse->priority, graph->abort_priority); + graph->skipped++; + return false; + + } else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) { + crm_trace("Deferring synapse %d: not allowed", synapse->id); + return false; + } + } + + return true; +} + +/*! + * \internal + * \brief Initiate an action from a transition graph + * + * \param[in,out] graph Transition graph containing action + * \param[in,out] action Action to execute + * + * \return Standard Pacemaker return code + */ +static int +initiate_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *id = ID(action->xml); + + CRM_CHECK(id != NULL, return EINVAL); + CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed), + return pcmk_rc_already); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_executed); + switch (action->type) { + case pcmk__pseudo_graph_action: + crm_trace("Executing pseudo-action %d (%s)", action->id, id); + return graph_fns->pseudo(graph, action); + + case pcmk__rsc_graph_action: + crm_trace("Executing resource action %d (%s)", action->id, id); + return graph_fns->rsc(graph, action); + + case pcmk__cluster_graph_action: + if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), + CRM_OP_FENCE, pcmk__str_casei)) { + crm_trace("Executing fencing action %d (%s)", + action->id, id); + return graph_fns->fence(graph, action); + } + crm_trace("Executing cluster action %d (%s)", action->id, id); + return graph_fns->cluster(graph, action); + + default: + crm_err("Unsupported graph action type <%s " XML_ATTR_ID "='%s'> " + "(bug?)", + crm_element_name(action->xml), id); + return EINVAL; + } +} + +/*! + * \internal + * \brief Execute a graph synapse + * + * \param[in,out] graph Transition graph with synapse to execute + * \param[in,out] synapse Synapse to execute + * + * \return Standard Pacemaker return value + */ +static int +fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse) +{ + pcmk__set_synapse_flags(synapse, pcmk__synapse_executed); + for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data; + int rc = initiate_action(graph, action); + + if (rc != pcmk_rc_ok) { + crm_err("Failed initiating <%s " XML_ATTR_ID "=%d> in synapse %d: " + "%s", + crm_element_name(action->xml), action->id, synapse->id, + pcmk_rc_str(rc)); + pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed); + pcmk__set_graph_action_flags(action, + pcmk__graph_action_confirmed + |pcmk__graph_action_failed); + return pcmk_rc_error; + } + } + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Dummy graph method that can be used with simulations + * + * \param[in,out] graph Transition graph containing action + * \param[in,out] action Graph action to be initiated + * + * \return Standard Pacemaker return code + * \note If the PE_fail environment variable is set to the action ID, + * then the graph action will be marked as failed. + */ +static int +pseudo_action_dummy(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + static int fail = -1; + + if (fail < 0) { + long long fail_ll; + + if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok) + && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) { + fail = (int) fail_ll; + } else { + fail = 0; + } + } + + if (action->id == fail) { + crm_err("Dummy event handler: pretending action %d failed", action->id); + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + graph->abort_priority = INFINITY; + } else { + crm_trace("Dummy event handler: action %d initiated", action->id); + } + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + pcmk__update_graph(graph, action); + return pcmk_rc_ok; +} + +static pcmk__graph_functions_t default_fns = { + pseudo_action_dummy, + pseudo_action_dummy, + pseudo_action_dummy, + pseudo_action_dummy +}; + +/*! + * \internal + * \brief Execute all actions in a transition graph + * + * \param[in,out] graph Transition graph to execute + * + * \return Status of transition after execution + */ +enum pcmk__graph_status +pcmk__execute_graph(pcmk__graph_t *graph) +{ + GList *lpc = NULL; + int log_level = LOG_DEBUG; + enum pcmk__graph_status pass_result = pcmk__graph_active; + const char *status = "In progress"; + + if (graph_fns == NULL) { + graph_fns = &default_fns; + } + if (graph == NULL) { + return pcmk__graph_complete; + } + + graph->fired = 0; + graph->pending = 0; + graph->skipped = 0; + graph->completed = 0; + graph->incomplete = 0; + + // Count completed and in-flight synapses + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; + + if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { + graph->completed++; + + } else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + graph->pending++; + } + } + crm_trace("Executing graph %d (%d synapses already completed, %d pending)", + graph->id, graph->completed, graph->pending); + + // Execute any synapses that are ready + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; + + if ((graph->batch_limit > 0) + && (graph->pending >= graph->batch_limit)) { + + crm_debug("Throttling graph execution: batch limit (%d) reached", + graph->batch_limit); + break; + + } else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) { + graph->skipped++; + continue; + + } else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) { + continue; // Already handled + + } else if (should_fire_synapse(graph, synapse)) { + graph->fired++; + if (fire_synapse(graph, synapse) != pcmk_rc_ok) { + crm_err("Synapse %d failed to fire", synapse->id); + log_level = LOG_ERR; + graph->abort_priority = INFINITY; + graph->incomplete++; + graph->fired--; + } + + if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) { + graph->pending++; + } + + } else { + crm_trace("Synapse %d cannot fire", synapse->id); + graph->incomplete++; + } + } + + if ((graph->pending == 0) && (graph->fired == 0)) { + graph->complete = true; + + if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) { + log_level = LOG_WARNING; + pass_result = pcmk__graph_terminated; + status = "Terminated"; + + } else if (graph->skipped != 0) { + log_level = LOG_NOTICE; + pass_result = pcmk__graph_complete; + status = "Stopped"; + + } else { + log_level = LOG_NOTICE; + pass_result = pcmk__graph_complete; + status = "Complete"; + } + + } else if (graph->fired == 0) { + pass_result = pcmk__graph_pending; + } + + do_crm_log(log_level, + "Transition %d (Complete=%d, Pending=%d," + " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", + graph->id, graph->completed, graph->pending, graph->fired, + graph->skipped, graph->incomplete, graph->source, status); + + return pass_result; +} + + +/* + * Functions for unpacking transition graph XML into structs + */ + +/*! + * \internal + * \brief Unpack a transition graph action from XML + * + * \param[in] parent Synapse that action is part of + * \param[in] xml_action Action XML to unparse + * + * \return Newly allocated action on success, or NULL otherwise + */ +static pcmk__graph_action_t * +unpack_action(pcmk__graph_synapse_t *parent, xmlNode *xml_action) +{ + enum pcmk__graph_action_type action_type; + pcmk__graph_action_t *action = NULL; + const char *element = TYPE(xml_action); + const char *value = ID(xml_action); + + if (value == NULL) { + crm_err("Ignoring transition graph action without id (bug?)"); + crm_log_xml_trace(xml_action, "invalid"); + return NULL; + } + + if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) { + action_type = pcmk__rsc_graph_action; + + } else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT, + pcmk__str_casei)) { + action_type = pcmk__pseudo_graph_action; + + } else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT, + pcmk__str_casei)) { + action_type = pcmk__cluster_graph_action; + + } else { + crm_err("Ignoring transition graph action of unknown type '%s' (bug?)", + element); + crm_log_xml_trace(xml_action, "invalid"); + return NULL; + } + + action = calloc(1, sizeof(pcmk__graph_action_t)); + if (action == NULL) { + crm_perror(LOG_CRIT, "Cannot unpack transition graph action"); + crm_log_xml_trace(xml_action, "lost"); + return NULL; + } + + pcmk__scan_min_int(value, &(action->id), -1); + action->type = pcmk__rsc_graph_action; + action->xml = copy_xml(xml_action); + action->synapse = parent; + action->type = action_type; + action->params = xml2list(action->xml); + + value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); + pcmk__scan_min_int(value, &(action->timeout), 0); + + /* Take start-delay into account for the timeout of the action timer */ + value = g_hash_table_lookup(action->params, "CRM_meta_start_delay"); + { + int start_delay; + + pcmk__scan_min_int(value, &start_delay, 0); + action->timeout += start_delay; + } + + if (pcmk__guint_from_hash(action->params, + CRM_META "_" XML_LRM_ATTR_INTERVAL, 0, + &(action->interval_ms)) != pcmk_rc_ok) { + action->interval_ms = 0; + } + + value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); + if (value != NULL) { + + gboolean can_fail = FALSE; + crm_str_to_boolean(value, &can_fail); + if (can_fail) { + pcmk__set_graph_action_flags(action, pcmk__graph_action_can_fail); + } else { + pcmk__clear_graph_action_flags(action, pcmk__graph_action_can_fail); + } + +#ifndef PCMK__COMPAT_2_0 + if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) { + crm_warn("Support for the can_fail meta-attribute is deprecated" + " and will be removed in a future release"); + } +#endif + } + + crm_trace("Action %d has timer set to %dms", action->id, action->timeout); + + return action; +} + +/*! + * \internal + * \brief Unpack transition graph synapse from XML + * + * \param[in,out] new_graph Transition graph that synapse is part of + * \param[in] xml_synapse Synapse XML + * + * \return Newly allocated synapse on success, or NULL otherwise + */ +static pcmk__graph_synapse_t * +unpack_synapse(pcmk__graph_t *new_graph, const xmlNode *xml_synapse) +{ + const char *value = NULL; + xmlNode *action_set = NULL; + pcmk__graph_synapse_t *new_synapse = NULL; + + crm_trace("Unpacking synapse %s", ID(xml_synapse)); + + new_synapse = calloc(1, sizeof(pcmk__graph_synapse_t)); + if (new_synapse == NULL) { + return NULL; + } + + pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0); + + value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); + pcmk__scan_min_int(value, &(new_synapse->priority), 0); + + CRM_CHECK(new_synapse->id >= 0, free(new_synapse); + return NULL); + + new_graph->num_synapses++; + + crm_trace("Unpacking synapse %s action sets", + crm_element_value(xml_synapse, XML_ATTR_ID)); + + for (action_set = first_named_child(xml_synapse, "action_set"); + action_set != NULL; action_set = crm_next_same_xml(action_set)) { + + for (xmlNode *action = pcmk__xml_first_child(action_set); + action != NULL; action = pcmk__xml_next(action)) { + + pcmk__graph_action_t *new_action = unpack_action(new_synapse, + action); + + if (new_action == NULL) { + continue; + } + + crm_trace("Adding action %d to synapse %d", + new_action->id, new_synapse->id); + new_graph->num_actions++; + new_synapse->actions = g_list_append(new_synapse->actions, + new_action); + } + } + + crm_trace("Unpacking synapse %s inputs", ID(xml_synapse)); + + for (xmlNode *inputs = first_named_child(xml_synapse, "inputs"); + inputs != NULL; inputs = crm_next_same_xml(inputs)) { + + for (xmlNode *trigger = first_named_child(inputs, "trigger"); + trigger != NULL; trigger = crm_next_same_xml(trigger)) { + + for (xmlNode *input = pcmk__xml_first_child(trigger); + input != NULL; input = pcmk__xml_next(input)) { + + pcmk__graph_action_t *new_input = unpack_action(new_synapse, + input); + + if (new_input == NULL) { + continue; + } + + crm_trace("Adding input %d to synapse %d", + new_input->id, new_synapse->id); + + new_synapse->inputs = g_list_append(new_synapse->inputs, + new_input); + } + } + } + + return new_synapse; +} + +/*! + * \internal + * \brief Unpack transition graph XML + * + * \param[in] xml_graph Transition graph XML to unpack + * \param[in] reference Where the XML came from (for logging) + * + * \return Newly allocated transition graph on success, NULL otherwise + * \note The caller is responsible for freeing the return value using + * pcmk__free_graph(). + * \note The XML is expected to be structured like: + <transition_graph ...> + <synapse id="0"> + <action_set> + <rsc_op id="2" ...> + ... + </action_set> + <inputs> + <rsc_op id="1" ... + ... + </inputs> + </synapse> + ... + </transition_graph> + */ +pcmk__graph_t * +pcmk__unpack_graph(const xmlNode *xml_graph, const char *reference) +{ + pcmk__graph_t *new_graph = NULL; + + new_graph = calloc(1, sizeof(pcmk__graph_t)); + if (new_graph == NULL) { + return NULL; + } + + new_graph->source = strdup((reference == NULL)? "unknown" : reference); + if (new_graph->source == NULL) { + free(new_graph); + return NULL; + } + + new_graph->id = -1; + new_graph->abort_priority = 0; + new_graph->network_delay = 0; + new_graph->stonith_timeout = 0; + new_graph->completion_action = pcmk__graph_done; + + // Parse top-level attributes from <transition_graph> + if (xml_graph != NULL) { + const char *buf = crm_element_value(xml_graph, "transition_id"); + + CRM_CHECK(buf != NULL, free(new_graph); + return NULL); + pcmk__scan_min_int(buf, &(new_graph->id), -1); + + buf = crm_element_value(xml_graph, "cluster-delay"); + CRM_CHECK(buf != NULL, free(new_graph); + return NULL); + new_graph->network_delay = crm_parse_interval_spec(buf); + + buf = crm_element_value(xml_graph, "stonith-timeout"); + if (buf == NULL) { + new_graph->stonith_timeout = new_graph->network_delay; + } else { + new_graph->stonith_timeout = crm_parse_interval_spec(buf); + } + + // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum + buf = crm_element_value(xml_graph, "batch-limit"); + if ((buf == NULL) + || (pcmk__scan_min_int(buf, &(new_graph->batch_limit), + -1) != pcmk_rc_ok)) { + new_graph->batch_limit = 0; + } + + buf = crm_element_value(xml_graph, "migration-limit"); + pcmk__scan_min_int(buf, &(new_graph->migration_limit), -1); + + pcmk__str_update(&(new_graph->failed_stop_offset), + crm_element_value(xml_graph, "failed-stop-offset")); + pcmk__str_update(&(new_graph->failed_start_offset), + crm_element_value(xml_graph, "failed-start-offset")); + + if (crm_element_value_epoch(xml_graph, "recheck-by", + &(new_graph->recheck_by)) != pcmk_ok) { + new_graph->recheck_by = 0; + } + } + + // Unpack each child <synapse> element + for (const xmlNode *synapse_xml = first_named_child(xml_graph, "synapse"); + synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) { + + pcmk__graph_synapse_t *new_synapse = unpack_synapse(new_graph, + synapse_xml); + + if (new_synapse != NULL) { + new_graph->synapses = g_list_append(new_graph->synapses, + new_synapse); + } + } + + crm_debug("Unpacked transition %d from %s: %d actions in %d synapses", + new_graph->id, new_graph->source, new_graph->num_actions, + new_graph->num_synapses); + + return new_graph; +} + + +/* + * Functions for freeing transition graph objects + */ + +/*! + * \internal + * \brief Free a transition graph action object + * + * \param[in,out] user_data Action to free + */ +static void +free_graph_action(gpointer user_data) +{ + pcmk__graph_action_t *action = user_data; + + if (action->timer != 0) { + crm_warn("Cancelling timer for graph action %d", action->id); + g_source_remove(action->timer); + } + if (action->params != NULL) { + g_hash_table_destroy(action->params); + } + free_xml(action->xml); + free(action); +} + +/*! + * \internal + * \brief Free a transition graph synapse object + * + * \param[in,out] user_data Synapse to free + */ +static void +free_graph_synapse(gpointer user_data) +{ + pcmk__graph_synapse_t *synapse = user_data; + + g_list_free_full(synapse->actions, free_graph_action); + g_list_free_full(synapse->inputs, free_graph_action); + free(synapse); +} + +/*! + * \internal + * \brief Free a transition graph object + * + * \param[in,out] graph Transition graph to free + */ +void +pcmk__free_graph(pcmk__graph_t *graph) +{ + if (graph != NULL) { + g_list_free_full(graph->synapses, free_graph_synapse); + free(graph->source); + free(graph->failed_stop_offset); + free(graph->failed_start_offset); + free(graph); + } +} + + +/* + * Other transition graph utilities + */ + +/*! + * \internal + * \brief Synthesize an executor event from a graph action + * + * \param[in] resource If not NULL, use greater call ID than in this XML + * \param[in] action Graph action + * \param[in] status What to use as event execution status + * \param[in] rc What to use as event exit status + * \param[in] exit_reason What to use as event exit reason + * + * \return Newly allocated executor event on success, or NULL otherwise + */ +lrmd_event_data_t * +pcmk__event_from_graph_action(const xmlNode *resource, + const pcmk__graph_action_t *action, + int status, int rc, const char *exit_reason) +{ + lrmd_event_data_t *op = NULL; + GHashTableIter iter; + const char *name = NULL; + const char *value = NULL; + xmlNode *action_resource = NULL; + + CRM_CHECK(action != NULL, return NULL); + CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL); + + action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid"); + return NULL); + + op = lrmd_new_event(ID(action_resource), + crm_element_value(action->xml, XML_LRM_ATTR_TASK), + action->interval_ms); + lrmd__set_result(op, rc, status, exit_reason); + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + op->params = pcmk__strkey_table(free, free); + + g_hash_table_iter_init(&iter, action->params); + while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { + g_hash_table_insert(op->params, strdup(name), strdup(value)); + } + + for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL; + xop = pcmk__xml_next(xop)) { + int tmp = 0; + + crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); + crm_debug("Got call_id=%d for %s", tmp, ID(resource)); + if (tmp > op->call_id) { + op->call_id = tmp; + } + } + + op->call_id++; + return op; +} diff --git a/lib/pacemaker/pcmk_graph_logging.c b/lib/pacemaker/pcmk_graph_logging.c new file mode 100644 index 0000000..b922a3e --- /dev/null +++ b/lib/pacemaker/pcmk_graph_logging.c @@ -0,0 +1,234 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <pacemaker-internal.h> + +/*! + * \internal + * \brief Return text equivalent of an enum pcmk__graph_status for logging + * + * \param[in] state Transition status + * + * \return Human-readable text equivalent of \p state + */ +const char * +pcmk__graph_status2text(enum pcmk__graph_status state) +{ + switch (state) { + case pcmk__graph_active: + return "active"; + case pcmk__graph_pending: + return "pending"; + case pcmk__graph_complete: + return "complete"; + case pcmk__graph_terminated: + return "terminated"; + } + return "unknown"; +} + +static const char * +actiontype2text(enum pcmk__graph_action_type type) +{ + switch (type) { + case pcmk__pseudo_graph_action: + return "pseudo"; + case pcmk__rsc_graph_action: + return "resource"; + case pcmk__cluster_graph_action: + return "cluster"; + } + return "invalid"; +} + +/*! + * \internal + * \brief Find a transition graph action by ID + * + * \param[in] graph Transition graph to search + * \param[in] id Action ID to search for + * + * \return Transition graph action corresponding to \p id, or NULL if none + */ +static const pcmk__graph_action_t * +find_graph_action_by_id(const pcmk__graph_t *graph, int id) +{ + if (graph == NULL) { + return NULL; + } + + for (const GList *sIter = graph->synapses; sIter != NULL; + sIter = sIter->next) { + + const pcmk__graph_synapse_t *synapse = NULL; + + synapse = (const pcmk__graph_synapse_t *) sIter->data; + for (const GList *aIter = synapse->actions; aIter != NULL; + aIter = aIter->next) { + + const pcmk__graph_action_t *action = NULL; + + action = (const pcmk__graph_action_t *) aIter->data; + if (action->id == id) { + return action; + } + } + } + return NULL; +} + +static const char * +synapse_state_str(pcmk__graph_synapse_t *synapse) +{ + if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) { + return "Failed"; + + } else if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { + return "Completed"; + + } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + return "In-flight"; + + } else if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { + return "Ready"; + } + return "Pending"; +} + +/*! + * \internal + * \brief List the action IDs of pending inputs to a transition graph synapse + * + * \param[in] graph Transition graph to which \p synapse belongs + * \param[in] synapse Synapse whose inputs to check + * + * \return A \p GString containing the space-delimited action IDs of inputs to + * \p synapse that haven't completed successfully + * + * \note The caller is responsible for freeing the return value using + * \p g_string_free(). + */ +static GString * +synapse_pending_inputs(const pcmk__graph_t *graph, + const pcmk__graph_synapse_t *synapse) +{ + GString *pending = NULL; + + for (const GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + const pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data; + + if (pcmk_is_set(input->flags, pcmk__graph_action_failed)) { + pcmk__add_word(&pending, 1024, ID(input->xml)); + + } else if (pcmk_is_set(input->flags, pcmk__graph_action_confirmed)) { + // Confirmed successful inputs are not pending + + } else if (find_graph_action_by_id(graph, input->id) != NULL) { + // In-flight or pending + pcmk__add_word(&pending, 1024, ID(input->xml)); + } + } + return pending; +} + +// Log synapse inputs that aren't in graph +static void +log_unresolved_inputs(unsigned int log_level, pcmk__graph_t *graph, + pcmk__graph_synapse_t *synapse) +{ + for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data; + const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY); + const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); + + if (find_graph_action_by_id(graph, input->id) == NULL) { + do_crm_log(log_level, + " * [Input %2d]: Unresolved dependency %s op %s%s%s", + input->id, actiontype2text(input->type), key, + (host? " on " : ""), (host? host : "")); + } + } +} + +static void +log_synapse_action(unsigned int log_level, pcmk__graph_synapse_t *synapse, + pcmk__graph_action_t *action, const char *pending_inputs) +{ + const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + char *desc = crm_strdup_printf("%s %s op %s", + synapse_state_str(synapse), + actiontype2text(action->type), key); + + do_crm_log(log_level, + "[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)", + action->id, desc, (host? " on " : ""), (host? host : ""), + synapse->priority, pending_inputs); + free(desc); +} + +static void +log_synapse(unsigned int log_level, pcmk__graph_t *graph, + pcmk__graph_synapse_t *synapse) +{ + GString *g_pending = NULL; + const char *pending = "none"; + + if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + g_pending = synapse_pending_inputs(graph, synapse); + + if (g_pending != NULL) { + pending = (const char *) g_pending->str; + } + } + + for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + log_synapse_action(log_level, synapse, + (pcmk__graph_action_t *) lpc->data, pending); + } + + if (g_pending != NULL) { + g_string_free(g_pending, TRUE); + } + + if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { + log_unresolved_inputs(log_level, graph, synapse); + } +} + +void +pcmk__log_graph_action(int log_level, pcmk__graph_action_t *action) +{ + log_synapse(log_level, NULL, action->synapse); +} + +void +pcmk__log_graph(unsigned int log_level, pcmk__graph_t *graph) +{ + if ((graph == NULL) || (graph->num_actions == 0)) { + if (log_level == LOG_TRACE) { + crm_debug("Empty transition graph"); + } + return; + } + + do_crm_log(log_level, "Graph %d with %d actions:" + " batch-limit=%d jobs, network-delay=%ums", + graph->id, graph->num_actions, + graph->batch_limit, graph->network_delay); + + for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + log_synapse(log_level, graph, (pcmk__graph_synapse_t *) lpc->data); + } +} diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c new file mode 100644 index 0000000..5484e8b --- /dev/null +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -0,0 +1,1078 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <glib.h> + +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +// Convenience macros for logging action properties + +#define action_type_str(flags) \ + (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") + +#define action_optional_str(flags) \ + (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") + +#define action_runnable_str(flags) \ + (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") + +#define action_node_str(a) \ + (((a)->node == NULL)? "no node" : (a)->node->details->uname) + +/*! + * \internal + * \brief Add an XML node tag for a specified ID + * + * \param[in] id Node UUID to add + * \param[in,out] xml Parent XML tag to add to + */ +static xmlNode* +add_node_to_xml_by_id(const char *id, xmlNode *xml) +{ + xmlNode *node_xml; + + node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); + crm_xml_add(node_xml, XML_ATTR_ID, id); + + return node_xml; +} + +/*! + * \internal + * \brief Add an XML node tag for a specified node + * + * \param[in] node Node to add + * \param[in,out] xml XML to add node to + */ +static void +add_node_to_xml(const pe_node_t *node, void *xml) +{ + add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); +} + +/*! + * \internal + * \brief Add XML with nodes that need an update of their maintenance state + * + * \param[in,out] xml Parent XML tag to add to + * \param[in] data_set Working set for cluster + */ +static int +add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) +{ + GList *gIter = NULL; + xmlNode *maintenance = + xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL; + int count = 0; + + for (gIter = data_set->nodes; gIter != NULL; + gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + struct pe_node_shared_s *details = node->details; + + if (!pe__is_guest_or_remote_node(node)) { + continue; /* just remote nodes need to know atm */ + } + + if (details->maintenance != details->remote_maintenance) { + if (maintenance) { + crm_xml_add( + add_node_to_xml_by_id(node->details->id, maintenance), + XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0"); + } + count++; + } + } + crm_trace("%s %d nodes to adjust maintenance-mode " + "to transition", maintenance?"Added":"Counted", count); + return count; +} + +/*! + * \internal + * \brief Add pseudo action with nodes needing maintenance state update + * + * \param[in,out] data_set Working set for cluster + */ +static void +add_maintenance_update(pe_working_set_t *data_set) +{ + pe_action_t *action = NULL; + + if (add_maintenance_nodes(NULL, data_set)) { + crm_trace("adding maintenance state update pseudo action"); + action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set); + pe__set_action_flags(action, pe_action_print_always); + } +} + +/*! + * \internal + * \brief Add XML with nodes that an action is expected to bring down + * + * If a specified action is expected to bring any nodes down, add an XML block + * with their UUIDs. When a node is lost, this allows the controller to + * determine whether it was expected. + * + * \param[in,out] xml Parent XML tag to add to + * \param[in] action Action to check for downed nodes + * \param[in] data_set Working set for cluster + */ +static void +add_downed_nodes(xmlNode *xml, const pe_action_t *action, + const pe_working_set_t *data_set) +{ + CRM_CHECK(xml && action && action->node && data_set, return); + + if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { + + /* Shutdown makes the action's node down */ + xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); + add_node_to_xml_by_id(action->node->details->id, downed); + + } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { + + /* Fencing makes the action's node and any hosted guest nodes down */ + const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); + + if (pcmk__is_fencing_action(fence)) { + xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); + add_node_to_xml_by_id(action->node->details->id, downed); + pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); + } + + } else if (action->rsc && action->rsc->is_remote_node + && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { + + /* Stopping a remote connection resource makes connected node down, + * unless it's part of a migration + */ + GList *iter; + pe_action_t *input; + gboolean migrating = FALSE; + + for (iter = action->actions_before; iter != NULL; iter = iter->next) { + input = ((pe_action_wrapper_t *) iter->data)->action; + if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei) + && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { + migrating = TRUE; + break; + } + } + if (!migrating) { + xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); + add_node_to_xml_by_id(action->rsc->id, downed); + } + } +} + +/*! + * \internal + * \brief Create a transition graph operation key for a clone action + * + * \param[in] action Clone action + * \param[in] interval_ms Action interval in milliseconds + * + * \return Newly allocated string with transition graph operation key + */ +static char * +clone_op_key(const pe_action_t *action, guint interval_ms) +{ + if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) { + const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); + const char *n_task = g_hash_table_lookup(action->meta, + "notify_operation"); + + CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); + return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); + + } else if (action->cancel_task != NULL) { + return pcmk__op_key(action->rsc->clone_name, action->cancel_task, + interval_ms); + } else { + return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); + } +} + +/*! + * \internal + * \brief Add node details to transition graph action XML + * + * \param[in] action Scheduled action + * \param[in,out] xml Transition graph action XML for \p action + */ +static void +add_node_details(const pe_action_t *action, xmlNode *xml) +{ + pe_node_t *router_node = pcmk__connection_host_for_action(action); + + crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); + crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); + if (router_node != NULL) { + crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); + } +} + +/*! + * \internal + * \brief Add resource details to transition graph action XML + * + * \param[in] action Scheduled action + * \param[in,out] action_xml Transition graph action XML for \p action + */ +static void +add_resource_details(const pe_action_t *action, xmlNode *action_xml) +{ + xmlNode *rsc_xml = NULL; + const char *attr_list[] = { + XML_AGENT_ATTR_CLASS, + XML_AGENT_ATTR_PROVIDER, + XML_ATTR_TYPE + }; + + /* If a resource is locked to a node via shutdown-lock, mark its actions + * so the controller can preserve the lock when the action completes. + */ + if (pcmk__action_locks_rsc_to_node(action)) { + crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) action->rsc->lock_time); + } + + // List affected resource + + rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); + if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) + && (action->rsc->clone_name != NULL)) { + /* Use the numbered instance name here, because if there is more + * than one instance on a node, we need to make sure the command + * goes to the right one. + * + * This is important even for anonymous clones, because the clone's + * unique meta-attribute might have just been toggled from on to + * off. + */ + crm_debug("Using orphan clone name %s instead of %s", + action->rsc->id, action->rsc->clone_name); + crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); + + } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { + const char *xml_id = ID(action->rsc->xml); + + crm_debug("Using anonymous clone name %s for %s (aka %s)", + xml_id, action->rsc->id, action->rsc->clone_name); + + /* ID is what we'd like client to use + * ID_LONG is what they might know it as instead + * + * ID_LONG is only strictly needed /here/ during the + * transition period until all nodes in the cluster + * are running the new software /and/ have rebooted + * once (meaning that they've only ever spoken to a DC + * supporting this feature). + * + * If anyone toggles the unique flag to 'on', the + * 'instance free' name will correspond to an orphan + * and fall into the clause above instead + */ + crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); + if ((action->rsc->clone_name != NULL) + && !pcmk__str_eq(xml_id, action->rsc->clone_name, + pcmk__str_none)) { + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); + } else { + crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); + } + + } else { + CRM_ASSERT(action->rsc->clone_name == NULL); + crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); + } + + for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { + crm_xml_add(rsc_xml, attr_list[lpc], + g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); + } +} + +/*! + * \internal + * \brief Add action attributes to transition graph action XML + * + * \param[in,out] action Scheduled action + * \param[in,out] action_xml Transition graph action XML for \p action + */ +static void +add_action_attributes(pe_action_t *action, xmlNode *action_xml) +{ + xmlNode *args_xml = NULL; + + /* We create free-standing XML to start, so we can sort the attributes + * before adding it to action_xml, which keeps the scheduler regression + * test graphs comparable. + */ + args_xml = create_xml_node(NULL, XML_TAG_ATTRS); + + crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + g_hash_table_foreach(action->extra, hash2field, args_xml); + + if ((action->rsc != NULL) && (action->node != NULL)) { + // Get the resource instance attributes, evaluated properly for node + GHashTable *params = pe_rsc_params(action->rsc, action->node, + action->rsc->cluster); + + pcmk__substitute_remote_addr(action->rsc, params); + + g_hash_table_foreach(params, hash2smartfield, args_xml); + + } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { + GHashTable *params = pe_rsc_params(action->rsc, NULL, + action->rsc->cluster); + + g_hash_table_foreach(params, hash2smartfield, args_xml); + } + + g_hash_table_foreach(action->meta, hash2metafield, args_xml); + if (action->rsc != NULL) { + pe_resource_t *parent = action->rsc; + + while (parent != NULL) { + parent->cmds->add_graph_meta(parent, args_xml); + parent = parent->parent; + } + + pcmk__add_bundle_meta_to_xml(args_xml, action); + + } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none) + && (action->node != NULL)) { + /* Pass the node's attributes as meta-attributes. + * + * @TODO: Determine whether it is still necessary to do this. It was + * added in 33d99707, probably for the libfence-based implementation in + * c9a90bd, which is no longer used. + */ + g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); + } + + sorted_xml(args_xml, action_xml, FALSE); + free_xml(args_xml); +} + +/*! + * \internal + * \brief Create the transition graph XML for a scheduled action + * + * \param[in,out] parent Parent XML element to add action to + * \param[in,out] action Scheduled action + * \param[in] skip_details If false, add action details as sub-elements + * \param[in] data_set Cluster working set + */ +static void +create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, + const pe_working_set_t *data_set) +{ + bool needs_node_info = true; + bool needs_maintenance_info = false; + xmlNode *action_xml = NULL; + + if ((action == NULL) || (data_set == NULL)) { + return; + } + + // Create the top-level element based on task + + if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { + /* All fences need node info; guest node fences are pseudo-events */ + action_xml = create_xml_node(parent, + pcmk_is_set(action->flags, pe_action_pseudo)? + XML_GRAPH_TAG_PSEUDO_EVENT : + XML_GRAPH_TAG_CRM_EVENT); + + } else if (pcmk__str_any_of(action->task, + CRM_OP_SHUTDOWN, + CRM_OP_CLEAR_FAILCOUNT, NULL)) { + action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); + + } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { + // CIB-only clean-up for shutdown locks + action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); + crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); + + } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { + if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, + pcmk__str_none)) { + needs_maintenance_info = true; + } + action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); + needs_node_info = false; + + } else { + action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); + } + + crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); + crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); + + if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { + char *clone_key = NULL; + guint interval_ms; + + if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, + &interval_ms) != pcmk_rc_ok) { + interval_ms = 0; + } + clone_key = clone_op_key(action, interval_ms); + crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); + crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); + free(clone_key); + } else { + crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); + } + + if (needs_node_info && (action->node != NULL)) { + add_node_details(action, action_xml); + g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), + strdup(action->node->details->uname)); + g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), + strdup(action->node->details->id)); + } + + if (skip_details) { + return; + } + + if ((action->rsc != NULL) + && !pcmk_is_set(action->flags, pe_action_pseudo)) { + + // This is a real resource action, so add resource details + add_resource_details(action, action_xml); + } + + /* List any attributes in effect */ + add_action_attributes(action, action_xml); + + /* List any nodes this action is expected to make down */ + if (needs_node_info && (action->node != NULL)) { + add_downed_nodes(action_xml, action, data_set); + } + + if (needs_maintenance_info) { + add_maintenance_nodes(action_xml, data_set); + } +} + +/*! + * \internal + * \brief Check whether an action should be added to the transition graph + * + * \param[in] action Action to check + * + * \return true if action should be added to graph, otherwise false + */ +static bool +should_add_action_to_graph(const pe_action_t *action) +{ + if (!pcmk_is_set(action->flags, pe_action_runnable)) { + crm_trace("Ignoring action %s (%d): unrunnable", + action->uuid, action->id); + return false; + } + + if (pcmk_is_set(action->flags, pe_action_optional) + && !pcmk_is_set(action->flags, pe_action_print_always)) { + crm_trace("Ignoring action %s (%d): optional", + action->uuid, action->id); + return false; + } + + /* Actions for unmanaged resources should be excluded from the graph, + * with the exception of monitors and cancellation of recurring monitors. + */ + if ((action->rsc != NULL) + && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) + && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) { + const char *interval_ms_s; + + /* A cancellation of a recurring monitor will get here because the task + * is cancel rather than monitor, but the interval can still be used to + * recognize it. The interval has been normalized to milliseconds by + * this point, so a string comparison is sufficient. + */ + interval_ms_s = g_hash_table_lookup(action->meta, + XML_LRM_ATTR_INTERVAL_MS); + if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { + crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", + action->uuid, action->id, action->rsc->id); + return false; + } + } + + /* Always add pseudo-actions, fence actions, and shutdown actions (already + * determined to be required and runnable by this point) + */ + if (pcmk_is_set(action->flags, pe_action_pseudo) + || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, + NULL)) { + return true; + } + + if (action->node == NULL) { + pe_err("Skipping action %s (%d) " + "because it was not allocated to a node (bug?)", + action->uuid, action->id); + pcmk__log_action("Unallocated", action, false); + return false; + } + + if (pcmk_is_set(action->flags, pe_action_dc)) { + crm_trace("Action %s (%d) should be dumped: " + "can run on DC instead of %s", + action->uuid, action->id, pe__node_name(action->node)); + + } else if (pe__is_guest_node(action->node) + && !action->node->details->remote_requires_reset) { + crm_trace("Action %s (%d) should be dumped: " + "assuming will be runnable on guest %s", + action->uuid, action->id, pe__node_name(action->node)); + + } else if (!action->node->details->online) { + pe_err("Skipping action %s (%d) " + "because it was scheduled for offline node (bug?)", + action->uuid, action->id); + pcmk__log_action("Offline node", action, false); + return false; + + } else if (action->node->details->unclean) { + pe_err("Skipping action %s (%d) " + "because it was scheduled for unclean node (bug?)", + action->uuid, action->id); + pcmk__log_action("Unclean node", action, false); + return false; + } + return true; +} + +/*! + * \internal + * \brief Check whether an ordering's flags can change an action + * + * \param[in] ordering Ordering to check + * + * \return true if ordering has flags that can change an action, false otherwise + */ +static bool +ordering_can_change_actions(const pe_action_wrapper_t *ordering) +{ + return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed + |pe_order_implies_then_printed + |pe_order_optional)); +} + +/*! + * \internal + * \brief Check whether an action input should be in the transition graph + * + * \param[in] action Action to check + * \param[in,out] input Action input to check + * + * \return true if input should be in graph, false otherwise + * \note This function may not only check an input, but disable it under certian + * circumstances (load or anti-colocation orderings that are not needed). + */ +static bool +should_add_input_to_graph(const pe_action_t *action, pe_action_wrapper_t *input) +{ + if (input->state == pe_link_dumped) { + return true; + } + + if (input->type == pe_order_none) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "ordering disabled", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) + && !ordering_can_change_actions(input)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "optional and input unrunnable", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) + && pcmk_is_set(input->type, pe_order_one_or_more)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "one-or-more and input unrunnable", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) + && !pcmk_is_set(input->action->flags, pe_action_runnable)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "implies input migratable but input unrunnable", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) + && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "only if input unmigratable but input unrunnable", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if ((input->type == pe_order_optional) + && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) + && pcmk__ends_with(input->action->uuid, "_stop_0")) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "optional but stop in migration", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + + } else if (input->type == pe_order_load) { + pe_node_t *input_node = input->action->node; + + // load orderings are relevant only if actions are for same node + + if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) { + pe_node_t *allocated = action->rsc->allocated_to; + + /* For load_stopped -> migrate_to orderings, we care about where it + * has been allocated to, not where it will be executed. + */ + if ((input_node == NULL) || (allocated == NULL) + || (input_node->details != allocated->details)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "load ordering node mismatch %s vs %s", + action->uuid, action->id, + input->action->uuid, input->action->id, + (allocated? allocated->details->uname : "<none>"), + (input_node? input_node->details->uname : "<none>")); + input->type = pe_order_none; + return false; + } + + } else if ((input_node == NULL) || (action->node == NULL) + || (input_node->details != action->node->details)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "load ordering node mismatch %s vs %s", + action->uuid, action->id, + input->action->uuid, input->action->id, + (action->node? action->node->details->uname : "<none>"), + (input_node? input_node->details->uname : "<none>")); + input->type = pe_order_none; + return false; + + } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "load ordering input optional", + action->uuid, action->id, + input->action->uuid, input->action->id); + input->type = pe_order_none; + return false; + } + + } else if (input->type == pe_order_anti_colocation) { + if (input->action->node && action->node + && (input->action->node->details != action->node->details)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "anti-colocation node mismatch %s vs %s", + action->uuid, action->id, + input->action->uuid, input->action->id, + pe__node_name(action->node), + pe__node_name(input->action->node)); + input->type = pe_order_none; + return false; + + } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "anti-colocation input optional", + action->uuid, action->id, + input->action->uuid, input->action->id); + input->type = pe_order_none; + return false; + } + + } else if (input->action->rsc + && input->action->rsc != action->rsc + && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) + && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) + && pcmk__ends_with(input->action->uuid, "_stop_0") + && action->rsc && pe_rsc_is_clone(action->rsc)) { + crm_warn("Ignoring requirement that %s complete before %s:" + " unmanaged failed resources cannot prevent clone shutdown", + input->action->uuid, action->uuid); + return false; + + } else if (pcmk_is_set(input->action->flags, pe_action_optional) + && !pcmk_any_flags_set(input->action->flags, + pe_action_print_always|pe_action_dumped) + && !should_add_action_to_graph(input->action)) { + crm_trace("Ignoring %s (%d) input %s (%d): " + "input optional", + action->uuid, action->id, + input->action->uuid, input->action->id); + return false; + } + + crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", + action->uuid, action->id, action_type_str(input->action->flags), + input->action->uuid, input->action->id, + action_node_str(input->action), + action_runnable_str(input->action->flags), + action_optional_str(input->action->flags), input->type); + return true; +} + +/*! + * \internal + * \brief Check whether an ordering creates an ordering loop + * + * \param[in] init_action "First" action in ordering + * \param[in] action Callers should always set this the same as + * \p init_action (this function may use a different + * value for recursive calls) + * \param[in,out] input Action wrapper for "then" action in ordering + * + * \return true if the ordering creates a loop, otherwise false + */ +bool +pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, + pe_action_wrapper_t *input) +{ + bool has_loop = false; + + if (pcmk_is_set(input->action->flags, pe_action_tracking)) { + crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", + input->action->uuid, + input->action->node? input->action->node->details->uname : "", + action->uuid, + action->node? action->node->details->uname : "", + input->type); + return false; + } + + // Don't need to check inputs that won't be used + if (!should_add_input_to_graph(action, input)) { + return false; + } + + if (input->action == init_action) { + crm_debug("Input loop found in %s@%s ->...-> %s@%s", + action->uuid, + action->node? action->node->details->uname : "", + init_action->uuid, + init_action->node? init_action->node->details->uname : ""); + return true; + } + + pe__set_action_flags(input->action, pe_action_tracking); + + crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" + "for graph loop with %s@%s ", + action->uuid, + action->node? action->node->details->uname : "", + input->action->uuid, + input->action->node? input->action->node->details->uname : "", + input->type, + init_action->uuid, + init_action->node? init_action->node->details->uname : ""); + + // Recursively check input itself for loops + for (GList *iter = input->action->actions_before; + iter != NULL; iter = iter->next) { + + if (pcmk__graph_has_loop(init_action, input->action, + (pe_action_wrapper_t *) iter->data)) { + // Recursive call already logged a debug message + has_loop = true; + break; + } + } + + pe__clear_action_flags(input->action, pe_action_tracking); + + if (!has_loop) { + crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", + input->action->uuid, + input->action->node? input->action->node->details->uname : "", + action->uuid, + action->node? action->node->details->uname : "", + input->type); + } + return has_loop; +} + +/*! + * \internal + * \brief Create a synapse XML element for a transition graph + * + * \param[in] action Action that synapse is for + * \param[in,out] data_set Cluster working set containing graph + * + * \return Newly added XML element for new graph synapse + */ +static xmlNode * +create_graph_synapse(const pe_action_t *action, pe_working_set_t *data_set) +{ + int synapse_priority = 0; + xmlNode *syn = create_xml_node(data_set->graph, "synapse"); + + crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); + data_set->num_synapse++; + + if (action->rsc != NULL) { + synapse_priority = action->rsc->priority; + } + if (action->priority > synapse_priority) { + synapse_priority = action->priority; + } + if (synapse_priority > 0) { + crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); + } + return syn; +} + +/*! + * \internal + * \brief Add an action to the transition graph XML if appropriate + * + * \param[in,out] data Action to possibly add + * \param[in,out] user_data Cluster working set + * + * \note This will de-duplicate the action inputs, meaning that the + * pe_action_wrapper_t:type flags can no longer be relied on to retain + * their original settings. That means this MUST be called after + * pcmk__apply_orderings() is complete, and nothing after this should rely + * on those type flags. (For example, some code looks for type equal to + * some flag rather than whether the flag is set, and some code looks for + * particular combinations of flags -- such code must be done before + * pcmk__create_graph().) + */ +static void +add_action_to_graph(gpointer data, gpointer user_data) +{ + pe_action_t *action = (pe_action_t *) data; + pe_working_set_t *data_set = (pe_working_set_t *) user_data; + + xmlNode *syn = NULL; + xmlNode *set = NULL; + xmlNode *in = NULL; + + /* If we haven't already, de-duplicate inputs (even if we won't be adding + * the action to the graph, so that crm_simulate's dot graphs don't have + * duplicates). + */ + if (!pcmk_is_set(action->flags, pe_action_dedup)) { + pcmk__deduplicate_action_inputs(action); + pe__set_action_flags(action, pe_action_dedup); + } + + if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or + || !should_add_action_to_graph(action)) { // shouldn't be added + return; + } + pe__set_action_flags(action, pe_action_dumped); + + crm_trace("Adding action %d (%s%s%s) to graph", + action->id, action->uuid, + ((action->node == NULL)? "" : " on "), + ((action->node == NULL)? "" : action->node->details->uname)); + + syn = create_graph_synapse(action, data_set); + set = create_xml_node(syn, "action_set"); + in = create_xml_node(syn, "inputs"); + + create_graph_action(set, action, false, data_set); + + for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { + pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; + + if (should_add_input_to_graph(action, input)) { + xmlNode *input_xml = create_xml_node(in, "trigger"); + + input->state = pe_link_dumped; + create_graph_action(input_xml, input->action, true, data_set); + } + } +} + +static int transition_id = -1; + +/*! + * \internal + * \brief Log a message after calculating a transition + * + * \param[in] filename Where transition input is stored + */ +void +pcmk__log_transition_summary(const char *filename) +{ + if (was_processing_error) { + crm_err("Calculated transition %d (with errors)%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); + + } else if (was_processing_warning) { + crm_warn("Calculated transition %d (with warnings)%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); + + } else { + crm_notice("Calculated transition %d%s%s", + transition_id, + (filename == NULL)? "" : ", saving inputs in ", + (filename == NULL)? "" : filename); + } + if (crm_config_error) { + crm_notice("Configuration errors found during scheduler processing," + " please run \"crm_verify -L\" to identify issues"); + } +} + +/*! + * \internal + * \brief Add a resource's actions to the transition graph + * + * \param[in,out] rsc Resource whose actions should be added + */ +void +pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc) +{ + GList *iter = NULL; + + CRM_ASSERT(rsc != NULL); + pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); + + // First add the resource's own actions + g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster); + + // Then recursively add its children's actions (appropriate to variant) + for (iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) iter->data; + + child_rsc->cmds->add_actions_to_graph(child_rsc); + } +} + +/*! + * \internal + * \brief Create a transition graph with all cluster actions needed + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__create_graph(pe_working_set_t *data_set) +{ + GList *iter = NULL; + const char *value = NULL; + long long limit = 0LL; + + transition_id++; + crm_trace("Creating transition graph %d", transition_id); + + data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); + + value = pe_pref(data_set->config_hash, "cluster-delay"); + crm_xml_add(data_set->graph, "cluster-delay", value); + + value = pe_pref(data_set->config_hash, "stonith-timeout"); + crm_xml_add(data_set->graph, "stonith-timeout", value); + + crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); + + if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { + crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); + } else { + crm_xml_add(data_set->graph, "failed-start-offset", "1"); + } + + value = pe_pref(data_set->config_hash, "batch-limit"); + crm_xml_add(data_set->graph, "batch-limit", value); + + crm_xml_add_int(data_set->graph, "transition_id", transition_id); + + value = pe_pref(data_set->config_hash, "migration-limit"); + if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { + crm_xml_add(data_set->graph, "migration-limit", value); + } + + if (data_set->recheck_by > 0) { + char *recheck_epoch = NULL; + + recheck_epoch = crm_strdup_printf("%llu", + (long long) data_set->recheck_by); + crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); + free(recheck_epoch); + } + + /* The following code will de-duplicate action inputs, so nothing past this + * should rely on the action input type flags retaining their original + * values. + */ + + // Add resource actions to graph + for (iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); + rsc->cmds->add_actions_to_graph(rsc); + } + + // Add pseudo-action for list of nodes with maintenance state update + add_maintenance_update(data_set); + + // Add non-resource (node) actions + for (iter = data_set->actions; iter != NULL; iter = iter->next) { + pe_action_t *action = (pe_action_t *) iter->data; + + if ((action->rsc != NULL) + && (action->node != NULL) + && action->node->details->shutdown + && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) + && !pcmk_any_flags_set(action->flags, + pe_action_optional|pe_action_runnable) + && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) { + /* Eventually we should just ignore the 'fence' case, but for now + * it's the best way to detect (in CTS) when CIB resource updates + * are being lost. + */ + if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) + || (data_set->no_quorum_policy == no_quorum_ignore)) { + crm_crit("Cannot %s %s because of %s:%s%s (%s)", + action->node->details->unclean? "fence" : "shut down", + pe__node_name(action->node), action->rsc->id, + pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", + pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", + action->uuid); + } + } + + add_action_to_graph((gpointer) action, (gpointer) data_set); + } + + crm_log_xml_trace(data_set->graph, "graph"); +} diff --git a/lib/pacemaker/pcmk_injections.c b/lib/pacemaker/pcmk_injections.c new file mode 100644 index 0000000..ea8fc17 --- /dev/null +++ b/lib/pacemaker/pcmk_injections.c @@ -0,0 +1,784 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> + +#include <sys/stat.h> +#include <sys/param.h> +#include <sys/types.h> +#include <dirent.h> + +#include <crm/crm.h> +#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event() +#include <crm/cib.h> +#include <crm/cib/internal.h> +#include <crm/common/util.h> +#include <crm/common/iso8601.h> +#include <crm/common/xml_internal.h> +#include <crm/lrmd_internal.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +bool pcmk__simulate_node_config = false; + +#define XPATH_NODE_CONFIG "//" XML_CIB_TAG_NODE "[@" XML_ATTR_UNAME "='%s']" +#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" +#define XPATH_RSC_HISTORY XPATH_NODE_STATE \ + "//" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" + + +/*! + * \internal + * \brief Inject a fictitious transient node attribute into scheduler input + * + * \param[in,out] out Output object for displaying error messages + * \param[in,out] cib_node node_state XML to inject attribute into + * \param[in] name Transient node attribute name to inject + * \param[in] value Transient node attribute value to inject + */ +static void +inject_transient_attr(pcmk__output_t *out, xmlNode *cib_node, + const char *name, const char *value) +{ + xmlNode *attrs = NULL; + xmlNode *instance_attrs = NULL; + const char *node_uuid = ID(cib_node); + + out->message(out, "inject-attr", name, value, cib_node); + + attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); + if (attrs == NULL) { + attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); + crm_xml_add(attrs, XML_ATTR_ID, node_uuid); + } + + instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS); + if (instance_attrs == NULL) { + instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS); + crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid); + } + + crm_create_nvpair_xml(instance_attrs, NULL, name, value); +} + +/*! + * \internal + * \brief Inject a fictitious fail count into a scheduler input + * + * \param[in,out] out Output object for displaying error messages + * \param[in,out] cib_node Node state XML to inject into + * \param[in] resource ID of resource for fail count to inject + * \param[in] task Action name for fail count to inject + * \param[in] interval_ms Action interval (in milliseconds) for fail count + * \param[in] rc Action result for fail count to inject (if 0, or + * 7 when interval_ms is 0, inject nothing) + */ +void +pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, + const char *resource, const char *task, + guint interval_ms, int rc) +{ + if (rc == 0) { + return; + + } else if ((rc == 7) && (interval_ms == 0)) { + return; + + } else { + char *name = NULL; + char *now = pcmk__ttoa(time(NULL)); + + name = pcmk__failcount_name(resource, task, interval_ms); + inject_transient_attr(out, cib_node, name, "value++"); + free(name); + + name = pcmk__lastfailure_name(resource, task, interval_ms); + inject_transient_attr(out, cib_node, name, now); + free(name); + + free(now); + } +} + +/*! + * \internal + * \brief Create a CIB configuration entry for a fictitious node + * + * \param[in,out] cib_conn CIB object to use + * \param[in] node Node name to use + */ +static void +create_node_entry(cib_t *cib_conn, const char *node) +{ + int rc = pcmk_ok; + char *xpath = crm_strdup_printf(XPATH_NODE_CONFIG, node); + + rc = cib_conn->cmds->query(cib_conn, xpath, NULL, + cib_xpath|cib_sync_call|cib_scope_local); + + if (rc == -ENXIO) { // Only add if not already existing + xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); + + crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID + crm_xml_add(cib_object, XML_ATTR_UNAME, node); + cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, + cib_sync_call|cib_scope_local); + /* Not bothering with subsequent query to see if it exists, + we'll bomb out later in the call to query_node_uuid()... */ + + free_xml(cib_object); + } + + free(xpath); +} + +/*! + * \internal + * \brief Synthesize a fake executor event for an action + * + * \param[in] cib_resource XML for any existing resource action history + * \param[in] task Name of action to synthesize + * \param[in] interval_ms Interval of action to synthesize + * \param[in] outcome Result of action to synthesize + * + * \return Newly allocated executor event + * \note It is the caller's responsibility to free the result with + * lrmd_free_event(). + */ +static lrmd_event_data_t * +create_op(const xmlNode *cib_resource, const char *task, guint interval_ms, + int outcome) +{ + lrmd_event_data_t *op = NULL; + xmlNode *xop = NULL; + + op = lrmd_new_event(ID(cib_resource), task, interval_ms); + lrmd__set_result(op, outcome, PCMK_EXEC_DONE, "Simulated action result"); + op->params = NULL; // Not needed for simulation purposes + op->t_run = (unsigned int) time(NULL); + op->t_rcchange = op->t_run; + + // Use a call ID higher than any existing history entries + op->call_id = 0; + for (xop = pcmk__xe_first_child(cib_resource); xop != NULL; + xop = pcmk__xe_next(xop)) { + + int tmp = 0; + + crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); + if (tmp > op->call_id) { + op->call_id = tmp; + } + } + op->call_id++; + + return op; +} + +/*! + * \internal + * \brief Inject a fictitious resource history entry into a scheduler input + * + * \param[in,out] cib_resource Resource history XML to inject entry into + * \param[in,out] op Action result to inject + * \param[in] target_rc Expected result for action to inject + * + * \return XML of injected resource history entry + */ +xmlNode * +pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, + int target_rc) +{ + return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET, + target_rc, NULL, crm_system_name); +} + +/*! + * \internal + * \brief Inject a fictitious node into a scheduler input + * + * \param[in,out] cib_conn Scheduler input CIB to inject node into + * \param[in] node Name of node to inject + * \param[in] uuid UUID of node to inject + * + * \return XML of node_state entry for new node + * \note If the global pcmk__simulate_node_config has been set to true, a + * node entry in the configuration section will be added, as well as a + * node state entry in the status section. + */ +xmlNode * +pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid) +{ + int rc = pcmk_ok; + xmlNode *cib_object = NULL; + char *xpath = crm_strdup_printf(XPATH_NODE_STATE, node); + bool duplicate = false; + char *found_uuid = NULL; + + if (pcmk__simulate_node_config) { + create_node_entry(cib_conn, node); + } + + rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, + cib_xpath|cib_sync_call|cib_scope_local); + + if ((cib_object != NULL) && (ID(cib_object) == NULL)) { + crm_err("Detected multiple node_state entries for xpath=%s, bailing", + xpath); + duplicate = true; + goto done; + } + + if (rc == -ENXIO) { + if (uuid == NULL) { + query_node_uuid(cib_conn, node, &found_uuid, NULL); + } else { + found_uuid = strdup(uuid); + } + + if (found_uuid) { + char *xpath_by_uuid = crm_strdup_printf("//" XML_CIB_TAG_STATE "[@" XML_ATTR_ID "='%s']", + found_uuid); + + // It's possible that a node_state entry doesn't have an uname yet. + rc = cib_conn->cmds->query(cib_conn, xpath_by_uuid, &cib_object, + cib_xpath|cib_sync_call|cib_scope_local); + + if ((cib_object != NULL) && (ID(cib_object) == NULL)) { + crm_err("Detected multiple node_state entries for xpath=%s, bailing", + xpath_by_uuid); + duplicate = true; + free(xpath_by_uuid); + goto done; + + } else if (cib_object != NULL) { + crm_xml_add(cib_object, XML_ATTR_UNAME, node); + + rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, cib_object, + cib_sync_call|cib_scope_local); + } + + free(xpath_by_uuid); + } + } + + if (rc == -ENXIO) { + cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); + crm_xml_add(cib_object, XML_ATTR_ID, found_uuid); + crm_xml_add(cib_object, XML_ATTR_UNAME, node); + cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, + cib_sync_call|cib_scope_local); + free_xml(cib_object); + + rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, + cib_xpath|cib_sync_call|cib_scope_local); + crm_trace("Injecting node state for %s (rc=%d)", node, rc); + } + +done: + free(found_uuid); + free(xpath); + + if (duplicate) { + crm_log_xml_warn(cib_object, "Duplicates"); + crm_exit(CRM_EX_SOFTWARE); + return NULL; // not reached, but makes static analysis happy + } + + CRM_ASSERT(rc == pcmk_ok); + return cib_object; +} + +/*! + * \internal + * \brief Inject a fictitious node state change into a scheduler input + * + * \param[in,out] cib_conn Scheduler input CIB to inject into + * \param[in] node Name of node to inject change for + * \param[in] up If true, change state to online, otherwise offline + * + * \return XML of changed (or added) node state entry + */ +xmlNode * +pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up) +{ + xmlNode *cib_node = pcmk__inject_node(cib_conn, node, NULL); + + if (up) { + pcmk__xe_set_props(cib_node, + XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES, + XML_NODE_IS_PEER, ONLINESTATUS, + XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER, + XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER, + NULL); + } else { + pcmk__xe_set_props(cib_node, + XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO, + XML_NODE_IS_PEER, OFFLINESTATUS, + XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN, + XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN, + NULL); + } + crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); + return cib_node; +} + +/*! + * \internal + * \brief Check whether a node has history for a given resource + * + * \param[in,out] cib_node Node state XML to check + * \param[in] resource Resource name to check for + * + * \return Resource's lrm_resource XML entry beneath \p cib_node if found, + * otherwise NULL + */ +static xmlNode * +find_resource_xml(xmlNode *cib_node, const char *resource) +{ + const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); + char *xpath = crm_strdup_printf(XPATH_RSC_HISTORY, node, resource); + xmlNode *match = get_xpath_object(xpath, cib_node, LOG_TRACE); + + free(xpath); + return match; +} + +/*! + * \internal + * \brief Inject a resource history element into a scheduler input + * + * \param[in,out] out Output object for displaying error messages + * \param[in,out] cib_node Node state XML to inject resource history entry into + * \param[in] resource ID (in configuration) of resource to inject + * \param[in] lrm_name ID as used in history (could be clone instance) + * \param[in] rclass Resource agent class of resource to inject + * \param[in] rtype Resource agent type of resource to inject + * \param[in] rprovider Resource agent provider of resource to inject + * + * \return XML of injected resource history element + * \note If a history element already exists under either \p resource or + * \p lrm_name, this will return it rather than injecting a new one. + */ +xmlNode * +pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, + const char *resource, const char *lrm_name, + const char *rclass, const char *rtype, + const char *rprovider) +{ + xmlNode *lrm = NULL; + xmlNode *container = NULL; + xmlNode *cib_resource = NULL; + + cib_resource = find_resource_xml(cib_node, resource); + if (cib_resource != NULL) { + /* If an existing LRM history entry uses the resource name, + * continue using it, even if lrm_name is different. + */ + return cib_resource; + } + + // Check for history entry under preferred name + if (strcmp(resource, lrm_name) != 0) { + cib_resource = find_resource_xml(cib_node, lrm_name); + if (cib_resource != NULL) { + return cib_resource; + } + } + + if ((rclass == NULL) || (rtype == NULL)) { + // @TODO query configuration for class, provider, type + out->err(out, "Resource %s not found in the status section of %s." + " Please supply the class and type to continue", resource, ID(cib_node)); + return NULL; + + } else if (!pcmk__strcase_any_of(rclass, + PCMK_RESOURCE_CLASS_OCF, + PCMK_RESOURCE_CLASS_STONITH, + PCMK_RESOURCE_CLASS_SERVICE, + PCMK_RESOURCE_CLASS_UPSTART, + PCMK_RESOURCE_CLASS_SYSTEMD, + PCMK_RESOURCE_CLASS_LSB, NULL)) { + out->err(out, "Invalid class for %s: %s", resource, rclass); + return NULL; + + } else if (pcmk_is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider) + && (rprovider == NULL)) { + // @TODO query configuration for provider + out->err(out, "Please specify the provider for resource %s", resource); + return NULL; + } + + crm_info("Injecting new resource %s into node state '%s'", + lrm_name, ID(cib_node)); + + lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); + if (lrm == NULL) { + const char *node_uuid = ID(cib_node); + + lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); + crm_xml_add(lrm, XML_ATTR_ID, node_uuid); + } + + container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); + if (container == NULL) { + container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); + } + + cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); + + // If we're creating a new entry, use the preferred name + crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name); + + crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); + crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); + crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); + + return cib_resource; +} + +static int +find_ticket_state(pcmk__output_t *out, cib_t *the_cib, const char *ticket_id, + xmlNode **ticket_state_xml) +{ + int rc = pcmk_ok; + xmlNode *xml_search = NULL; + + GString *xpath = g_string_sized_new(256); + + CRM_ASSERT(ticket_state_xml != NULL); + *ticket_state_xml = NULL; + + g_string_append(xpath, + "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS + "/" XML_CIB_TAG_TICKETS); + + if (ticket_id) { + pcmk__g_strcat(xpath, + "/" XML_CIB_TAG_TICKET_STATE + "[@" XML_ATTR_ID "=\"", ticket_id, "\"]", NULL); + } + rc = the_cib->cmds->query(the_cib, (const char *) xpath->str, &xml_search, + cib_sync_call|cib_scope_local|cib_xpath); + g_string_free(xpath, TRUE); + + if (rc != pcmk_ok) { + return rc; + } + + crm_log_xml_debug(xml_search, "Match"); + if (xml_has_children(xml_search) && (ticket_id != NULL)) { + out->err(out, "Multiple ticket_states match ticket_id=%s", ticket_id); + } + *ticket_state_xml = xml_search; + + return rc; +} + +/*! + * \internal + * \brief Inject a ticket attribute into ticket state + * + * \param[in,out] out Output object for displaying error messages + * \param[in] ticket_id Ticket whose state should be changed + * \param[in] attr_name Ticket attribute name to inject + * \param[in] attr_value Boolean value of ticket attribute to inject + * \param[in,out] cib CIB object to use + * + * \return Standard Pacemaker return code + */ +static int +set_ticket_state_attr(pcmk__output_t *out, const char *ticket_id, + const char *attr_name, bool attr_value, cib_t *cib) +{ + int rc = pcmk_rc_ok; + xmlNode *xml_top = NULL; + xmlNode *ticket_state_xml = NULL; + + // Check for an existing ticket state entry + rc = find_ticket_state(out, cib, ticket_id, &ticket_state_xml); + rc = pcmk_legacy2rc(rc); + + if (rc == pcmk_rc_ok) { // Ticket state found, use it + crm_debug("Injecting attribute into existing ticket state %s", + ticket_id); + xml_top = ticket_state_xml; + + } else if (rc == ENXIO) { // No ticket state, create it + xmlNode *xml_obj = NULL; + + xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); + xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); + ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); + crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); + + } else { // Error + return rc; + } + + // Add the attribute to the ticket state + pcmk__xe_set_bool_attr(ticket_state_xml, attr_name, attr_value); + crm_log_xml_debug(xml_top, "Update"); + + // Commit the change to the CIB + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, + cib_sync_call|cib_scope_local); + rc = pcmk_legacy2rc(rc); + + free_xml(xml_top); + return rc; +} + +/*! + * \internal + * \brief Inject a fictitious action into the cluster + * + * \param[in,out] out Output object for displaying error messages + * \param[in] spec Action specification to inject + * \param[in,out] cib CIB object for scheduler input + * \param[in] data_set Cluster working set + */ +static void +inject_action(pcmk__output_t *out, const char *spec, cib_t *cib, + const pe_working_set_t *data_set) +{ + int rc; + int outcome = PCMK_OCF_OK; + guint interval_ms = 0; + + char *key = NULL; + char *node = NULL; + char *task = NULL; + char *resource = NULL; + + const char *rtype = NULL; + const char *rclass = NULL; + const char *rprovider = NULL; + + xmlNode *cib_op = NULL; + xmlNode *cib_node = NULL; + xmlNode *cib_resource = NULL; + const pe_resource_t *rsc = NULL; + lrmd_event_data_t *op = NULL; + + out->message(out, "inject-spec", spec); + + key = calloc(1, strlen(spec) + 1); + node = calloc(1, strlen(spec) + 1); + rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); + if (rc != 3) { + out->err(out, "Invalid operation spec: %s. Only found %d fields", + spec, rc); + goto done; + } + + parse_op_key(key, &resource, &task, &interval_ms); + + rsc = pe_find_resource(data_set->resources, resource); + if (rsc == NULL) { + out->err(out, "Invalid resource name: %s", resource); + goto done; + } + + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); + rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); + + cib_node = pcmk__inject_node(cib, node, NULL); + CRM_ASSERT(cib_node != NULL); + + pcmk__inject_failcount(out, cib_node, resource, task, interval_ms, outcome); + + cib_resource = pcmk__inject_resource_history(out, cib_node, + resource, resource, + rclass, rtype, rprovider); + CRM_ASSERT(cib_resource != NULL); + + op = create_op(cib_resource, task, interval_ms, outcome); + CRM_ASSERT(op != NULL); + + cib_op = pcmk__inject_action_result(cib_resource, op, 0); + CRM_ASSERT(cib_op != NULL); + lrmd_free_event(op); + + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + +done: + free(task); + free(node); + free(key); +} + +/*! + * \internal + * \brief Inject fictitious scheduler inputs + * + * \param[in,out] data_set Cluster working set + * \param[in,out] cib CIB object for scheduler input to modify + * \param[in] injections Injections to apply + */ +void +pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, + const pcmk_injections_t *injections) +{ + int rc = pcmk_ok; + const GList *iter = NULL; + xmlNode *cib_node = NULL; + pcmk__output_t *out = data_set->priv; + + out->message(out, "inject-modify-config", injections->quorum, + injections->watchdog); + if (injections->quorum != NULL) { + xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); + + /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ + crm_xml_add(top, XML_ATTR_HAVE_QUORUM, injections->quorum); + + rc = cib->cmds->modify(cib, NULL, top, cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + } + + if (injections->watchdog != NULL) { + rc = cib__update_node_attr(out, cib, cib_sync_call|cib_scope_local, + XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, + XML_ATTR_HAVE_WATCHDOG, injections->watchdog, + NULL, NULL); + CRM_ASSERT(rc == pcmk_rc_ok); + } + + for (iter = injections->node_up; iter != NULL; iter = iter->next) { + const char *node = (const char *) iter->data; + + out->message(out, "inject-modify-node", "Online", node); + + cib_node = pcmk__inject_node_state_change(cib, node, true); + CRM_ASSERT(cib_node != NULL); + + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + free_xml(cib_node); + } + + for (iter = injections->node_down; iter != NULL; iter = iter->next) { + const char *node = (const char *) iter->data; + char *xpath = NULL; + + out->message(out, "inject-modify-node", "Offline", node); + + cib_node = pcmk__inject_node_state_change(cib, node, false); + CRM_ASSERT(cib_node != NULL); + + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + free_xml(cib_node); + + xpath = crm_strdup_printf("//node_state[@uname='%s']/%s", + node, XML_CIB_TAG_LRM); + cib->cmds->remove(cib, xpath, NULL, + cib_xpath|cib_sync_call|cib_scope_local); + free(xpath); + + xpath = crm_strdup_printf("//node_state[@uname='%s']/%s", + node, XML_TAG_TRANSIENT_NODEATTRS); + cib->cmds->remove(cib, xpath, NULL, + cib_xpath|cib_sync_call|cib_scope_local); + free(xpath); + } + + for (iter = injections->node_fail; iter != NULL; iter = iter->next) { + const char *node = (const char *) iter->data; + + out->message(out, "inject-modify-node", "Failing", node); + + cib_node = pcmk__inject_node_state_change(cib, node, true); + crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); + CRM_ASSERT(cib_node != NULL); + + rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + free_xml(cib_node); + } + + for (iter = injections->ticket_grant; iter != NULL; iter = iter->next) { + const char *ticket_id = (const char *) iter->data; + + out->message(out, "inject-modify-ticket", "Granting", ticket_id); + + rc = set_ticket_state_attr(out, ticket_id, "granted", true, cib); + CRM_ASSERT(rc == pcmk_rc_ok); + } + + for (iter = injections->ticket_revoke; iter != NULL; iter = iter->next) { + const char *ticket_id = (const char *) iter->data; + + out->message(out, "inject-modify-ticket", "Revoking", ticket_id); + + rc = set_ticket_state_attr(out, ticket_id, "granted", false, cib); + CRM_ASSERT(rc == pcmk_rc_ok); + } + + for (iter = injections->ticket_standby; iter != NULL; iter = iter->next) { + const char *ticket_id = (const char *) iter->data; + + out->message(out, "inject-modify-ticket", "Standby", ticket_id); + + rc = set_ticket_state_attr(out, ticket_id, "standby", true, cib); + CRM_ASSERT(rc == pcmk_rc_ok); + } + + for (iter = injections->ticket_activate; iter != NULL; iter = iter->next) { + const char *ticket_id = (const char *) iter->data; + + out->message(out, "inject-modify-ticket", "Activating", ticket_id); + + rc = set_ticket_state_attr(out, ticket_id, "standby", false, cib); + CRM_ASSERT(rc == pcmk_rc_ok); + } + + for (iter = injections->op_inject; iter != NULL; iter = iter->next) { + inject_action(out, (const char *) iter->data, cib, data_set); + } + + if (!out->is_quiet(out)) { + out->end_list(out); + } +} + +void +pcmk_free_injections(pcmk_injections_t *injections) +{ + if (injections == NULL) { + return; + } + + g_list_free_full(injections->node_up, g_free); + g_list_free_full(injections->node_down, g_free); + g_list_free_full(injections->node_fail, g_free); + g_list_free_full(injections->op_fail, g_free); + g_list_free_full(injections->op_inject, g_free); + g_list_free_full(injections->ticket_grant, g_free); + g_list_free_full(injections->ticket_revoke, g_free); + g_list_free_full(injections->ticket_standby, g_free); + g_list_free_full(injections->ticket_activate, g_free); + free(injections->quorum); + free(injections->watchdog); + + free(injections); +} diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c new file mode 100644 index 0000000..7379516 --- /dev/null +++ b/lib/pacemaker/pcmk_output.c @@ -0,0 +1,2331 @@ +/* + * Copyright 2019-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/common/output.h> +#include <crm/common/results.h> +#include <crm/msg_xml.h> +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> +#include <crm/pengine/internal.h> +#include <libxml/tree.h> +#include <pacemaker-internal.h> + +#include <inttypes.h> +#include <stdint.h> + +static char * +colocations_header(pe_resource_t *rsc, pcmk__colocation_t *cons, + bool dependents) { + char *retval = NULL; + + if (cons->primary_role > RSC_ROLE_STARTED) { + retval = crm_strdup_printf("%s (score=%s, %s role=%s, id=%s)", + rsc->id, pcmk_readable_score(cons->score), + (dependents? "needs" : "with"), + role2text(cons->primary_role), cons->id); + } else { + retval = crm_strdup_printf("%s (score=%s, id=%s)", + rsc->id, pcmk_readable_score(cons->score), + cons->id); + } + return retval; +} + +static void +colocations_xml_node(pcmk__output_t *out, pe_resource_t *rsc, + pcmk__colocation_t *cons) { + xmlNodePtr node = NULL; + + node = pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_DEPEND, + "id", cons->id, + "rsc", cons->dependent->id, + "with-rsc", cons->primary->id, + "score", pcmk_readable_score(cons->score), + NULL); + + if (cons->node_attribute) { + xmlSetProp(node, (pcmkXmlStr) "node-attribute", (pcmkXmlStr) cons->node_attribute); + } + + if (cons->dependent_role != RSC_ROLE_UNKNOWN) { + xmlSetProp(node, (pcmkXmlStr) "rsc-role", + (pcmkXmlStr) role2text(cons->dependent_role)); + } + + if (cons->primary_role != RSC_ROLE_UNKNOWN) { + xmlSetProp(node, (pcmkXmlStr) "with-rsc-role", + (pcmkXmlStr) role2text(cons->primary_role)); + } +} + +static int +do_locations_list_xml(pcmk__output_t *out, pe_resource_t *rsc, bool add_header) +{ + GList *lpc = NULL; + GList *list = rsc->rsc_location; + int rc = pcmk_rc_no_output; + + for (lpc = list; lpc != NULL; lpc = lpc->next) { + pe__location_t *cons = lpc->data; + + GList *lpc2 = NULL; + + for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { + pe_node_t *node = (pe_node_t *) lpc2->data; + + if (add_header) { + PCMK__OUTPUT_LIST_HEADER(out, false, rc, "locations"); + } + + pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_LOCATION, + "node", node->details->uname, + "rsc", rsc->id, + "id", cons->id, + "score", pcmk_readable_score(node->weight), + NULL); + } + } + + if (add_header) { + PCMK__OUTPUT_LIST_FOOTER(out, rc); + } + + return rc; +} + +PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *", + "pe_node_t *", "pe_node_t *", "pe_action_t *", + "pe_action_t *") +static int +rsc_action_item(pcmk__output_t *out, va_list args) +{ + const char *change = va_arg(args, const char *); + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + pe_node_t *origin = va_arg(args, pe_node_t *); + pe_node_t *destination = va_arg(args, pe_node_t *); + pe_action_t *action = va_arg(args, pe_action_t *); + pe_action_t *source = va_arg(args, pe_action_t *); + + int len = 0; + char *reason = NULL; + char *details = NULL; + bool same_host = false; + bool same_role = false; + bool need_role = false; + + static int rsc_width = 5; + static int detail_width = 5; + + CRM_ASSERT(action); + CRM_ASSERT(destination != NULL || origin != NULL); + + if(source == NULL) { + source = action; + } + + len = strlen(rsc->id); + if(len > rsc_width) { + rsc_width = len + 2; + } + + if ((rsc->role > RSC_ROLE_STARTED) + || (rsc->next_role > RSC_ROLE_UNPROMOTED)) { + need_role = true; + } + + if(origin != NULL && destination != NULL && origin->details == destination->details) { + same_host = true; + } + + if(rsc->role == rsc->next_role) { + same_role = true; + } + + if (need_role && (origin == NULL)) { + /* Starting and promoting a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), + role2text(rsc->next_role), + pe__node_name(destination)); + + } else if (origin == NULL) { + /* Starting a resource */ + details = crm_strdup_printf("%s", pe__node_name(destination)); + + } else if (need_role && (destination == NULL)) { + /* Stopping a promotable clone instance */ + details = crm_strdup_printf("%s %s", role2text(rsc->role), + pe__node_name(origin)); + + } else if (destination == NULL) { + /* Stopping a resource */ + details = crm_strdup_printf("%s", pe__node_name(origin)); + + } else if (need_role && same_role && same_host) { + /* Recovering, restarting or re-promoting a promotable clone instance */ + details = crm_strdup_printf("%s %s", role2text(rsc->role), + pe__node_name(origin)); + + } else if (same_role && same_host) { + /* Recovering or Restarting a normal resource */ + details = crm_strdup_printf("%s", pe__node_name(origin)); + + } else if (need_role && same_role) { + /* Moving a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", pe__node_name(origin), + pe__node_name(destination), + role2text(rsc->role)); + + } else if (same_role) { + /* Moving a normal resource */ + details = crm_strdup_printf("%s -> %s", pe__node_name(origin), + pe__node_name(destination)); + + } else if (same_host) { + /* Promoting or demoting a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), + role2text(rsc->next_role), + pe__node_name(origin)); + + } else { + /* Moving and promoting/demoting */ + details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role), + pe__node_name(origin), + role2text(rsc->next_role), + pe__node_name(destination)); + } + + len = strlen(details); + if(len > detail_width) { + detail_width = len; + } + + if(source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) { + reason = crm_strdup_printf("due to %s (blocked)", source->reason); + + } else if(source->reason) { + reason = crm_strdup_printf("due to %s", source->reason); + + } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { + reason = strdup("blocked"); + + } + + out->list_item(out, NULL, "%-8s %-*s ( %*s )%s%s", change, rsc_width, + rsc->id, detail_width, details, reason ? " " : "", reason ? reason : ""); + + free(details); + free(reason); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *", + "pe_node_t *", "pe_node_t *", "pe_action_t *", + "pe_action_t *") +static int +rsc_action_item_xml(pcmk__output_t *out, va_list args) +{ + const char *change = va_arg(args, const char *); + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + pe_node_t *origin = va_arg(args, pe_node_t *); + pe_node_t *destination = va_arg(args, pe_node_t *); + pe_action_t *action = va_arg(args, pe_action_t *); + pe_action_t *source = va_arg(args, pe_action_t *); + + char *change_str = NULL; + + bool same_host = false; + bool same_role = false; + bool need_role = false; + xmlNode *xml = NULL; + + CRM_ASSERT(action); + CRM_ASSERT(destination != NULL || origin != NULL); + + if (source == NULL) { + source = action; + } + + if ((rsc->role > RSC_ROLE_STARTED) + || (rsc->next_role > RSC_ROLE_UNPROMOTED)) { + need_role = true; + } + + if(origin != NULL && destination != NULL && origin->details == destination->details) { + same_host = true; + } + + if(rsc->role == rsc->next_role) { + same_role = true; + } + + change_str = g_ascii_strdown(change, -1); + xml = pcmk__output_create_xml_node(out, "rsc_action", + "action", change_str, + "resource", rsc->id, + NULL); + g_free(change_str); + + if (need_role && (origin == NULL)) { + /* Starting and promoting a promotable clone instance */ + pcmk__xe_set_props(xml, + "role", role2text(rsc->role), + "next-role", role2text(rsc->next_role), + "dest", destination->details->uname, + NULL); + + } else if (origin == NULL) { + /* Starting a resource */ + crm_xml_add(xml, "node", destination->details->uname); + + } else if (need_role && (destination == NULL)) { + /* Stopping a promotable clone instance */ + pcmk__xe_set_props(xml, + "role", role2text(rsc->role), + "node", origin->details->uname, + NULL); + + } else if (destination == NULL) { + /* Stopping a resource */ + crm_xml_add(xml, "node", origin->details->uname); + + } else if (need_role && same_role && same_host) { + /* Recovering, restarting or re-promoting a promotable clone instance */ + pcmk__xe_set_props(xml, + "role", role2text(rsc->role), + "source", origin->details->uname, + NULL); + + } else if (same_role && same_host) { + /* Recovering or Restarting a normal resource */ + crm_xml_add(xml, "source", origin->details->uname); + + } else if (need_role && same_role) { + /* Moving a promotable clone instance */ + pcmk__xe_set_props(xml, + "source", origin->details->uname, + "dest", destination->details->uname, + "role", role2text(rsc->role), + NULL); + + } else if (same_role) { + /* Moving a normal resource */ + pcmk__xe_set_props(xml, + "source", origin->details->uname, + "dest", destination->details->uname, + NULL); + + } else if (same_host) { + /* Promoting or demoting a promotable clone instance */ + pcmk__xe_set_props(xml, + "role", role2text(rsc->role), + "next-role", role2text(rsc->next_role), + "source", origin->details->uname, + NULL); + + } else { + /* Moving and promoting/demoting */ + pcmk__xe_set_props(xml, + "role", role2text(rsc->role), + "source", origin->details->uname, + "next-role", role2text(rsc->next_role), + "dest", destination->details->uname, + NULL); + } + + if (source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) { + pcmk__xe_set_props(xml, + "reason", source->reason, + "blocked", "true", + NULL); + + } else if(source->reason) { + crm_xml_add(xml, "reason", source->reason); + + } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { + pcmk__xe_set_bool_attr(xml, "blocked", true); + + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool") +static int +rsc_is_colocated_with_list(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + bool recursive = va_arg(args, int); + + int rc = pcmk_rc_no_output; + + if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) { + return rc; + } + + /* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons + * directly rather than rsc->cmds->this_with_colocations(). + */ + pe__set_resource_flags(rsc, pe_rsc_detect_loop); + for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; + char *hdr = NULL; + + PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources %s is colocated with", rsc->id); + + if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) { + out->list_item(out, NULL, "%s (id=%s - loop)", + cons->primary->id, cons->id); + continue; + } + + hdr = colocations_header(cons->primary, cons, false); + out->list_item(out, NULL, "%s", hdr); + free(hdr); + + /* Empty list header just for indentation of information about this resource. */ + out->begin_list(out, NULL, NULL, NULL); + + out->message(out, "locations-list", cons->primary); + if (recursive) { + out->message(out, "rsc-is-colocated-with-list", + cons->primary, recursive); + } + + out->end_list(out); + } + + PCMK__OUTPUT_LIST_FOOTER(out, rc); + return rc; +} + +PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool") +static int +rsc_is_colocated_with_list_xml(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + bool recursive = va_arg(args, int); + + int rc = pcmk_rc_no_output; + + if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) { + return rc; + } + + /* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons + * directly rather than rsc->cmds->this_with_colocations(). + */ + pe__set_resource_flags(rsc, pe_rsc_detect_loop); + for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; + + if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) { + colocations_xml_node(out, cons->primary, cons); + continue; + } + + colocations_xml_node(out, cons->primary, cons); + do_locations_list_xml(out, cons->primary, false); + + if (recursive) { + out->message(out, "rsc-is-colocated-with-list", + cons->primary, recursive); + } + } + + return rc; +} + +PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool") +static int +rscs_colocated_with_list(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + bool recursive = va_arg(args, int); + + int rc = pcmk_rc_no_output; + + if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) { + return rc; + } + + /* We're listing constraints explicitly involving rsc, so use + * rsc->rsc_cons_lhs directly rather than + * rsc->cmds->with_this_colocations(). + */ + pe__set_resource_flags(rsc, pe_rsc_detect_loop); + for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; + char *hdr = NULL; + + PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources colocated with %s", rsc->id); + + if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) { + out->list_item(out, NULL, "%s (id=%s - loop)", + cons->dependent->id, cons->id); + continue; + } + + hdr = colocations_header(cons->dependent, cons, true); + out->list_item(out, NULL, "%s", hdr); + free(hdr); + + /* Empty list header just for indentation of information about this resource. */ + out->begin_list(out, NULL, NULL, NULL); + + out->message(out, "locations-list", cons->dependent); + if (recursive) { + out->message(out, "rscs-colocated-with-list", + cons->dependent, recursive); + } + + out->end_list(out); + } + + PCMK__OUTPUT_LIST_FOOTER(out, rc); + return rc; +} + +PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool") +static int +rscs_colocated_with_list_xml(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + bool recursive = va_arg(args, int); + + int rc = pcmk_rc_no_output; + + if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) { + return rc; + } + + /* We're listing constraints explicitly involving rsc, so use + * rsc->rsc_cons_lhs directly rather than + * rsc->cmds->with_this_colocations(). + */ + pe__set_resource_flags(rsc, pe_rsc_detect_loop); + for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; + + if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) { + colocations_xml_node(out, cons->dependent, cons); + continue; + } + + colocations_xml_node(out, cons->dependent, cons); + do_locations_list_xml(out, cons->dependent, false); + + if (recursive) { + out->message(out, "rscs-colocated-with-list", + cons->dependent, recursive); + } + } + + return rc; +} + +PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *") +static int +locations_list(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + + GList *lpc = NULL; + GList *list = rsc->rsc_location; + int rc = pcmk_rc_no_output; + + for (lpc = list; lpc != NULL; lpc = lpc->next) { + pe__location_t *cons = lpc->data; + + GList *lpc2 = NULL; + + for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { + pe_node_t *node = (pe_node_t *) lpc2->data; + + PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Locations"); + out->list_item(out, NULL, "Node %s (score=%s, id=%s, rsc=%s)", + pe__node_name(node), + pcmk_readable_score(node->weight), cons->id, + rsc->id); + } + } + + PCMK__OUTPUT_LIST_FOOTER(out, rc); + return rc; +} + +PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *") +static int +locations_list_xml(pcmk__output_t *out, va_list args) { + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + return do_locations_list_xml(out, rsc, true); +} + +PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *", + "pe_working_set_t *", "bool", "bool") +static int +locations_and_colocations(pcmk__output_t *out, va_list args) +{ + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); + bool recursive = va_arg(args, int); + bool force = va_arg(args, int); + + pcmk__unpack_constraints(data_set); + + // Constraints apply to group/clone, not member/instance + if (!force) { + rsc = uber_parent(rsc); + } + + out->message(out, "locations-list", rsc); + + pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop); + out->message(out, "rscs-colocated-with-list", rsc, recursive); + + pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop); + out->message(out, "rsc-is-colocated-with-list", rsc, recursive); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *", + "pe_working_set_t *", "bool", "bool") +static int +locations_and_colocations_xml(pcmk__output_t *out, va_list args) +{ + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); + bool recursive = va_arg(args, int); + bool force = va_arg(args, int); + + pcmk__unpack_constraints(data_set); + + // Constraints apply to group/clone, not member/instance + if (!force) { + rsc = uber_parent(rsc); + } + + pcmk__output_xml_create_parent(out, "constraints", NULL); + do_locations_list_xml(out, rsc, false); + + pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop); + out->message(out, "rscs-colocated-with-list", rsc, recursive); + + pe__clear_resource_flags_on_all(data_set, pe_rsc_detect_loop); + out->message(out, "rsc-is-colocated-with-list", rsc, recursive); + + pcmk__output_xml_pop_parent(out); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") +static int +health(pcmk__output_t *out, va_list args) +{ + const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); + const char *host_from = va_arg(args, const char *); + const char *fsa_state = va_arg(args, const char *); + const char *result = va_arg(args, const char *); + + return out->info(out, "Controller on %s in state %s: %s", + pcmk__s(host_from, "unknown node"), + pcmk__s(fsa_state, "unknown"), + pcmk__s(result, "unknown result")); +} + +PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") +static int +health_text(pcmk__output_t *out, va_list args) +{ + if (!out->is_quiet(out)) { + return health(out, args); + } else { + const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); + const char *host_from G_GNUC_UNUSED = va_arg(args, const char *); + const char *fsa_state = va_arg(args, const char *); + const char *result G_GNUC_UNUSED = va_arg(args, const char *); + + if (fsa_state != NULL) { + pcmk__formatted_printf(out, "%s\n", fsa_state); + return pcmk_rc_ok; + } + } + + return pcmk_rc_no_output; +} + +PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") +static int +health_xml(pcmk__output_t *out, va_list args) +{ + const char *sys_from = va_arg(args, const char *); + const char *host_from = va_arg(args, const char *); + const char *fsa_state = va_arg(args, const char *); + const char *result = va_arg(args, const char *); + + pcmk__output_create_xml_node(out, pcmk__s(sys_from, ""), + "node_name", pcmk__s(host_from, ""), + "state", pcmk__s(fsa_state, ""), + "result", pcmk__s(result, ""), + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", + "enum pcmk_pacemakerd_state", "const char *", "time_t") +static int +pacemakerd_health(pcmk__output_t *out, va_list args) +{ + const char *sys_from = va_arg(args, const char *); + enum pcmk_pacemakerd_state state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + const char *state_s = va_arg(args, const char *); + time_t last_updated = va_arg(args, time_t); + + char *last_updated_s = NULL; + int rc = pcmk_rc_ok; + + if (sys_from == NULL) { + if (state == pcmk_pacemakerd_state_remote) { + sys_from = "pacemaker-remoted"; + } else { + sys_from = CRM_SYSTEM_MCP; + } + } + + if (state_s == NULL) { + state_s = pcmk__pcmkd_state_enum2friendly(state); + } + + if (last_updated != 0) { + last_updated_s = pcmk__epoch2str(&last_updated, + crm_time_log_date + |crm_time_log_timeofday + |crm_time_log_with_timezone); + } + + rc = out->info(out, "Status of %s: '%s' (last updated %s)", + sys_from, state_s, + pcmk__s(last_updated_s, "at unknown time")); + + free(last_updated_s); + return rc; +} + +PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", + "enum pcmk_pacemakerd_state", "const char *", "time_t") +static int +pacemakerd_health_html(pcmk__output_t *out, va_list args) +{ + const char *sys_from = va_arg(args, const char *); + enum pcmk_pacemakerd_state state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + const char *state_s = va_arg(args, const char *); + time_t last_updated = va_arg(args, time_t); + + char *last_updated_s = NULL; + char *msg = NULL; + + if (sys_from == NULL) { + if (state == pcmk_pacemakerd_state_remote) { + sys_from = "pacemaker-remoted"; + } else { + sys_from = CRM_SYSTEM_MCP; + } + } + + if (state_s == NULL) { + state_s = pcmk__pcmkd_state_enum2friendly(state); + } + + if (last_updated != 0) { + last_updated_s = pcmk__epoch2str(&last_updated, + crm_time_log_date + |crm_time_log_timeofday + |crm_time_log_with_timezone); + } + + msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", + sys_from, state_s, + pcmk__s(last_updated_s, "at unknown time")); + pcmk__output_create_html_node(out, "li", NULL, NULL, msg); + + free(msg); + free(last_updated_s); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", + "enum pcmk_pacemakerd_state", "const char *", "time_t") +static int +pacemakerd_health_text(pcmk__output_t *out, va_list args) +{ + if (!out->is_quiet(out)) { + return pacemakerd_health(out, args); + } else { + const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); + enum pcmk_pacemakerd_state state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + const char *state_s = va_arg(args, const char *); + time_t last_updated G_GNUC_UNUSED = va_arg(args, time_t); + + if (state_s == NULL) { + state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); + } + pcmk__formatted_printf(out, "%s\n", state_s); + return pcmk_rc_ok; + } +} + +PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", + "enum pcmk_pacemakerd_state", "const char *", "time_t") +static int +pacemakerd_health_xml(pcmk__output_t *out, va_list args) +{ + const char *sys_from = va_arg(args, const char *); + enum pcmk_pacemakerd_state state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + const char *state_s = va_arg(args, const char *); + time_t last_updated = va_arg(args, time_t); + + char *last_updated_s = NULL; + + if (sys_from == NULL) { + if (state == pcmk_pacemakerd_state_remote) { + sys_from = "pacemaker-remoted"; + } else { + sys_from = CRM_SYSTEM_MCP; + } + } + + if (state_s == NULL) { + state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); + } + + if (last_updated != 0) { + last_updated_s = pcmk__epoch2str(&last_updated, + crm_time_log_date + |crm_time_log_timeofday + |crm_time_log_with_timezone); + } + + pcmk__output_create_xml_node(out, "pacemakerd", + "sys_from", sys_from, + "state", state_s, + "last_updated", last_updated_s, + NULL); + free(last_updated_s); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") +static int +profile_default(pcmk__output_t *out, va_list args) { + const char *xml_file = va_arg(args, const char *); + clock_t start = va_arg(args, clock_t); + clock_t end = va_arg(args, clock_t); + + out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file, + (end - start) / (float) CLOCKS_PER_SEC); + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") +static int +profile_xml(pcmk__output_t *out, va_list args) { + const char *xml_file = va_arg(args, const char *); + clock_t start = va_arg(args, clock_t); + clock_t end = va_arg(args, clock_t); + + char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC); + + pcmk__output_create_xml_node(out, "timing", + "file", xml_file, + "duration", duration, + NULL); + + free(duration); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("dc", "const char *") +static int +dc(pcmk__output_t *out, va_list args) +{ + const char *dc = va_arg(args, const char *); + + return out->info(out, "Designated Controller is: %s", + pcmk__s(dc, "not yet elected")); +} + +PCMK__OUTPUT_ARGS("dc", "const char *") +static int +dc_text(pcmk__output_t *out, va_list args) +{ + if (!out->is_quiet(out)) { + return dc(out, args); + } else { + const char *dc = va_arg(args, const char *); + + if (dc != NULL) { + pcmk__formatted_printf(out, "%s\n", pcmk__s(dc, "")); + return pcmk_rc_ok; + } + } + + return pcmk_rc_no_output; +} + +PCMK__OUTPUT_ARGS("dc", "const char *") +static int +dc_xml(pcmk__output_t *out, va_list args) +{ + const char *dc = va_arg(args, const char *); + + pcmk__output_create_xml_node(out, "dc", + "node_name", pcmk__s(dc, ""), + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") +static int +crmadmin_node(pcmk__output_t *out, va_list args) +{ + const char *type = va_arg(args, const char *); + const char *name = va_arg(args, const char *); + const char *id = va_arg(args, const char *); + bool bash_export = va_arg(args, int); + + if (bash_export) { + return out->info(out, "export %s=%s", + pcmk__s(name, "<null>"), pcmk__s(id, "")); + } else { + return out->info(out, "%s node: %s (%s)", type ? type : "cluster", + pcmk__s(name, "<null>"), pcmk__s(id, "<null>")); + } +} + +PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") +static int +crmadmin_node_text(pcmk__output_t *out, va_list args) +{ + if (!out->is_quiet(out)) { + return crmadmin_node(out, args); + } else { + const char *type G_GNUC_UNUSED = va_arg(args, const char *); + const char *name = va_arg(args, const char *); + const char *id G_GNUC_UNUSED = va_arg(args, const char *); + bool bash_export G_GNUC_UNUSED = va_arg(args, int); + + pcmk__formatted_printf(out, "%s\n", pcmk__s(name, "<null>")); + return pcmk_rc_ok; + } +} + +PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") +static int +crmadmin_node_xml(pcmk__output_t *out, va_list args) +{ + const char *type = va_arg(args, const char *); + const char *name = va_arg(args, const char *); + const char *id = va_arg(args, const char *); + bool bash_export G_GNUC_UNUSED = va_arg(args, int); + + pcmk__output_create_xml_node(out, "node", + "type", type ? type : "cluster", + "name", pcmk__s(name, ""), + "id", pcmk__s(id, ""), + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *", + "const char *", "guint", "const op_digest_cache_t *") +static int +digests_text(pcmk__output_t *out, va_list args) +{ + const pe_resource_t *rsc = va_arg(args, const pe_resource_t *); + const pe_node_t *node = va_arg(args, const pe_node_t *); + const char *task = va_arg(args, const char *); + guint interval_ms = va_arg(args, guint); + const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *); + + char *action_desc = NULL; + const char *rsc_desc = "unknown resource"; + const char *node_desc = "unknown node"; + + if (interval_ms != 0) { + action_desc = crm_strdup_printf("%ums-interval %s action", interval_ms, + ((task == NULL)? "unknown" : task)); + } else if (pcmk__str_eq(task, "monitor", pcmk__str_none)) { + action_desc = strdup("probe action"); + } else { + action_desc = crm_strdup_printf("%s action", + ((task == NULL)? "unknown" : task)); + } + if ((rsc != NULL) && (rsc->id != NULL)) { + rsc_desc = rsc->id; + } + if ((node != NULL) && (node->details->uname != NULL)) { + node_desc = node->details->uname; + } + out->begin_list(out, NULL, NULL, "Digests for %s %s on %s", + rsc_desc, action_desc, node_desc); + free(action_desc); + + if (digests == NULL) { + out->list_item(out, NULL, "none"); + out->end_list(out); + return pcmk_rc_ok; + } + if (digests->digest_all_calc != NULL) { + out->list_item(out, NULL, "%s (all parameters)", + digests->digest_all_calc); + } + if (digests->digest_secure_calc != NULL) { + out->list_item(out, NULL, "%s (non-private parameters)", + digests->digest_secure_calc); + } + if (digests->digest_restart_calc != NULL) { + out->list_item(out, NULL, "%s (non-reloadable parameters)", + digests->digest_restart_calc); + } + out->end_list(out); + return pcmk_rc_ok; +} + +static void +add_digest_xml(xmlNode *parent, const char *type, const char *digest, + xmlNode *digest_source) +{ + if (digest != NULL) { + xmlNodePtr digest_xml = create_xml_node(parent, "digest"); + + crm_xml_add(digest_xml, "type", ((type == NULL)? "unspecified" : type)); + crm_xml_add(digest_xml, "hash", digest); + if (digest_source != NULL) { + add_node_copy(digest_xml, digest_source); + } + } +} + +PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *", + "const char *", "guint", "const op_digest_cache_t *") +static int +digests_xml(pcmk__output_t *out, va_list args) +{ + const pe_resource_t *rsc = va_arg(args, const pe_resource_t *); + const pe_node_t *node = va_arg(args, const pe_node_t *); + const char *task = va_arg(args, const char *); + guint interval_ms = va_arg(args, guint); + const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *); + + char *interval_s = crm_strdup_printf("%ums", interval_ms); + xmlNode *xml = NULL; + + xml = pcmk__output_create_xml_node(out, "digests", + "resource", pcmk__s(rsc->id, ""), + "node", pcmk__s(node->details->uname, ""), + "task", pcmk__s(task, ""), + "interval", interval_s, + NULL); + free(interval_s); + if (digests != NULL) { + add_digest_xml(xml, "all", digests->digest_all_calc, + digests->params_all); + add_digest_xml(xml, "nonprivate", digests->digest_secure_calc, + digests->params_secure); + add_digest_xml(xml, "nonreloadable", digests->digest_restart_calc, + digests->params_restart); + } + return pcmk_rc_ok; +} + +#define STOP_SANITY_ASSERT(lineno) do { \ + if(current && current->details->unclean) { \ + /* It will be a pseudo op */ \ + } else if(stop == NULL) { \ + crm_err("%s:%d: No stop action exists for %s", \ + __func__, lineno, rsc->id); \ + CRM_ASSERT(stop != NULL); \ + } else if (pcmk_is_set(stop->flags, pe_action_optional)) { \ + crm_err("%s:%d: Action %s is still optional", \ + __func__, lineno, stop->uuid); \ + CRM_ASSERT(!pcmk_is_set(stop->flags, pe_action_optional)); \ + } \ + } while(0) + +PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *") +static int +rsc_action_default(pcmk__output_t *out, va_list args) +{ + pe_resource_t *rsc = va_arg(args, pe_resource_t *); + pe_node_t *current = va_arg(args, pe_node_t *); + pe_node_t *next = va_arg(args, pe_node_t *); + + GList *possible_matches = NULL; + char *key = NULL; + int rc = pcmk_rc_no_output; + bool moving = false; + + pe_node_t *start_node = NULL; + pe_action_t *start = NULL; + pe_action_t *stop = NULL; + pe_action_t *promote = NULL; + pe_action_t *demote = NULL; + + if (!pcmk_is_set(rsc->flags, pe_rsc_managed) + || (current == NULL && next == NULL)) { + pe_rsc_info(rsc, "Leave %s\t(%s%s)", + rsc->id, role2text(rsc->role), + !pcmk_is_set(rsc->flags, pe_rsc_managed)? " unmanaged" : ""); + return rc; + } + + moving = (current != NULL) && (next != NULL) + && (current->details != next->details); + + possible_matches = pe__resource_actions(rsc, next, RSC_START, false); + if (possible_matches) { + start = possible_matches->data; + g_list_free(possible_matches); + } + + if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { + start_node = NULL; + } else { + start_node = current; + } + possible_matches = pe__resource_actions(rsc, start_node, RSC_STOP, false); + if (possible_matches) { + stop = possible_matches->data; + g_list_free(possible_matches); + } else if (pcmk_is_set(rsc->flags, pe_rsc_stop_unexpected)) { + /* The resource is multiply active with multiple-active set to + * stop_unexpected, and not stopping on its current node, but it should + * be stopping elsewhere. + */ + possible_matches = pe__resource_actions(rsc, NULL, RSC_STOP, false); + if (possible_matches != NULL) { + stop = possible_matches->data; + g_list_free(possible_matches); + } + } + + possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, false); + if (possible_matches) { + promote = possible_matches->data; + g_list_free(possible_matches); + } + + possible_matches = pe__resource_actions(rsc, next, RSC_DEMOTE, false); + if (possible_matches) { + demote = possible_matches->data; + g_list_free(possible_matches); + } + + if (rsc->role == rsc->next_role) { + pe_action_t *migrate_op = NULL; + + CRM_CHECK(next != NULL, return rc); + + possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, false); + if (possible_matches) { + migrate_op = possible_matches->data; + } + + if ((migrate_op != NULL) && (current != NULL) + && pcmk_is_set(migrate_op->flags, pe_action_runnable)) { + rc = out->message(out, "rsc-action-item", "Migrate", rsc, current, + next, start, NULL); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { + rc = out->message(out, "rsc-action-item", "Reload", rsc, current, + next, start, NULL); + + } else if (start == NULL || pcmk_is_set(start->flags, pe_action_optional)) { + if ((demote != NULL) && (promote != NULL) + && !pcmk_is_set(demote->flags, pe_action_optional) + && !pcmk_is_set(promote->flags, pe_action_optional)) { + rc = out->message(out, "rsc-action-item", "Re-promote", rsc, + current, next, promote, demote); + } else { + pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, + role2text(rsc->role), pe__node_name(next)); + } + + } else if (!pcmk_is_set(start->flags, pe_action_runnable)) { + rc = out->message(out, "rsc-action-item", "Stop", rsc, current, + NULL, stop, (stop && stop->reason)? stop : start); + STOP_SANITY_ASSERT(__LINE__); + + } else if (moving && current) { + rc = out->message(out, "rsc-action-item", pcmk_is_set(rsc->flags, pe_rsc_failed)? "Recover" : "Move", + rsc, current, next, stop, NULL); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + rc = out->message(out, "rsc-action-item", "Recover", rsc, current, + NULL, stop, NULL); + STOP_SANITY_ASSERT(__LINE__); + + } else { + rc = out->message(out, "rsc-action-item", "Restart", rsc, current, + next, start, NULL); + /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ + } + + g_list_free(possible_matches); + return rc; + } + + if(stop + && (rsc->next_role == RSC_ROLE_STOPPED + || (start && !pcmk_is_set(start->flags, pe_action_runnable)))) { + + GList *gIter = NULL; + + key = stop_key(rsc); + for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + pe_action_t *stop_op = NULL; + + possible_matches = find_actions(rsc->actions, key, node); + if (possible_matches) { + stop_op = possible_matches->data; + g_list_free(possible_matches); + } + + if (stop_op && (stop_op->flags & pe_action_runnable)) { + STOP_SANITY_ASSERT(__LINE__); + } + + if (out->message(out, "rsc-action-item", "Stop", rsc, node, NULL, + stop_op, (stop_op && stop_op->reason)? stop_op : start) == pcmk_rc_ok) { + rc = pcmk_rc_ok; + } + } + + free(key); + + } else if ((stop != NULL) + && pcmk_all_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_stop)) { + /* 'stop' may be NULL if the failure was ignored */ + rc = out->message(out, "rsc-action-item", "Recover", rsc, current, + next, stop, start); + STOP_SANITY_ASSERT(__LINE__); + + } else if (moving) { + rc = out->message(out, "rsc-action-item", "Move", rsc, current, next, + stop, NULL); + STOP_SANITY_ASSERT(__LINE__); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { + rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next, + start, NULL); + + } else if (stop != NULL && !pcmk_is_set(stop->flags, pe_action_optional)) { + rc = out->message(out, "rsc-action-item", "Restart", rsc, current, + next, start, NULL); + STOP_SANITY_ASSERT(__LINE__); + + } else if (rsc->role == RSC_ROLE_PROMOTED) { + CRM_LOG_ASSERT(current != NULL); + rc = out->message(out, "rsc-action-item", "Demote", rsc, current, + next, demote, NULL); + + } else if (rsc->next_role == RSC_ROLE_PROMOTED) { + CRM_LOG_ASSERT(next); + rc = out->message(out, "rsc-action-item", "Promote", rsc, current, + next, promote, NULL); + + } else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) { + rc = out->message(out, "rsc-action-item", "Start", rsc, current, next, + start, NULL); + } + + return rc; +} + +PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *") +static int +node_action(pcmk__output_t *out, va_list args) +{ + const char *task = va_arg(args, const char *); + const char *node_name = va_arg(args, const char *); + const char *reason = va_arg(args, const char *); + + if (task == NULL) { + return pcmk_rc_no_output; + } else if (reason) { + out->list_item(out, NULL, "%s %s '%s'", task, node_name, reason); + } else { + crm_notice(" * %s %s", task, node_name); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *") +static int +node_action_xml(pcmk__output_t *out, va_list args) +{ + const char *task = va_arg(args, const char *); + const char *node_name = va_arg(args, const char *); + const char *reason = va_arg(args, const char *); + + if (task == NULL) { + return pcmk_rc_no_output; + } else if (reason) { + pcmk__output_create_xml_node(out, "node_action", + "task", task, + "node", node_name, + "reason", reason, + NULL); + } else { + crm_notice(" * %s %s", task, node_name); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *", + "const char *", "bool", "bool") +static int +node_info_default(pcmk__output_t *out, va_list args) +{ + int node_id = va_arg(args, int); + const char *node_name = va_arg(args, const char *); + const char *uuid = va_arg(args, const char *); + const char *state = va_arg(args, const char *); + bool have_quorum = (bool) va_arg(args, int); + bool is_remote = (bool) va_arg(args, int); + + return out->info(out, + "Node %d: %s " + "(uuid=%s, state=%s, have_quorum=%s, is_remote=%s)", + node_id, pcmk__s(node_name, "unknown"), + pcmk__s(uuid, "unknown"), pcmk__s(state, "unknown"), + pcmk__btoa(have_quorum), pcmk__btoa(is_remote)); +} + +PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *", + "const char *", "bool", "bool") +static int +node_info_xml(pcmk__output_t *out, va_list args) +{ + int node_id = va_arg(args, int); + const char *node_name = va_arg(args, const char *); + const char *uuid = va_arg(args, const char *); + const char *state = va_arg(args, const char *); + bool have_quorum = (bool) va_arg(args, int); + bool is_remote = (bool) va_arg(args, int); + + char *id_s = crm_strdup_printf("%d", node_id); + + pcmk__output_create_xml_node(out, "node-info", + "nodeid", id_s, + XML_ATTR_UNAME, node_name, + XML_ATTR_ID, uuid, + XML_NODE_IS_PEER, state, + XML_ATTR_HAVE_QUORUM, pcmk__btoa(have_quorum), + XML_NODE_IS_REMOTE, pcmk__btoa(is_remote), + NULL); + free(id_s); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr") +static int +inject_cluster_action(pcmk__output_t *out, va_list args) +{ + const char *node = va_arg(args, const char *); + const char *task = va_arg(args, const char *); + xmlNodePtr rsc = va_arg(args, xmlNodePtr); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + if(rsc) { + out->list_item(out, NULL, "Cluster action: %s for %s on %s", task, ID(rsc), node); + } else { + out->list_item(out, NULL, "Cluster action: %s on %s", task, node); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr") +static int +inject_cluster_action_xml(pcmk__output_t *out, va_list args) +{ + const char *node = va_arg(args, const char *); + const char *task = va_arg(args, const char *); + xmlNodePtr rsc = va_arg(args, xmlNodePtr); + + xmlNodePtr xml_node = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + xml_node = pcmk__output_create_xml_node(out, "cluster_action", + "task", task, + "node", node, + NULL); + + if (rsc) { + crm_xml_add(xml_node, "id", ID(rsc)); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *") +static int +inject_fencing_action(pcmk__output_t *out, va_list args) +{ + const char *target = va_arg(args, const char *); + const char *op = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + out->list_item(out, NULL, "Fencing %s (%s)", target, op); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *") +static int +inject_fencing_action_xml(pcmk__output_t *out, va_list args) +{ + const char *target = va_arg(args, const char *); + const char *op = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + pcmk__output_create_xml_node(out, "fencing_action", + "target", target, + "op", op, + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr") +static int +inject_attr(pcmk__output_t *out, va_list args) +{ + const char *name = va_arg(args, const char *); + const char *value = va_arg(args, const char *); + xmlNodePtr cib_node = va_arg(args, xmlNodePtr); + + xmlChar *node_path = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + node_path = xmlGetNodePath(cib_node); + + out->list_item(out, NULL, "Injecting attribute %s=%s into %s '%s'", + name, value, node_path, ID(cib_node)); + + free(node_path); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr") +static int +inject_attr_xml(pcmk__output_t *out, va_list args) +{ + const char *name = va_arg(args, const char *); + const char *value = va_arg(args, const char *); + xmlNodePtr cib_node = va_arg(args, xmlNodePtr); + + xmlChar *node_path = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + node_path = xmlGetNodePath(cib_node); + + pcmk__output_create_xml_node(out, "inject_attr", + "name", name, + "value", value, + "node_path", node_path, + "cib_node", ID(cib_node), + NULL); + free(node_path); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-spec", "const char *") +static int +inject_spec(pcmk__output_t *out, va_list args) +{ + const char *spec = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + out->list_item(out, NULL, "Injecting %s into the configuration", spec); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-spec", "const char *") +static int +inject_spec_xml(pcmk__output_t *out, va_list args) +{ + const char *spec = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + pcmk__output_create_xml_node(out, "inject_spec", + "spec", spec, + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") +static int +inject_modify_config(pcmk__output_t *out, va_list args) +{ + const char *quorum = va_arg(args, const char *); + const char *watchdog = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + out->begin_list(out, NULL, NULL, "Performing Requested Modifications"); + + if (quorum) { + out->list_item(out, NULL, "Setting quorum: %s", quorum); + } + + if (watchdog) { + out->list_item(out, NULL, "Setting watchdog: %s", watchdog); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") +static int +inject_modify_config_xml(pcmk__output_t *out, va_list args) +{ + const char *quorum = va_arg(args, const char *); + const char *watchdog = va_arg(args, const char *); + + xmlNodePtr node = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + node = pcmk__output_xml_create_parent(out, "modifications", NULL); + + if (quorum) { + crm_xml_add(node, "quorum", quorum); + } + + if (watchdog) { + crm_xml_add(node, "watchdog", watchdog); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *") +static int +inject_modify_node(pcmk__output_t *out, va_list args) +{ + const char *action = va_arg(args, const char *); + const char *node = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + if (pcmk__str_eq(action, "Online", pcmk__str_none)) { + out->list_item(out, NULL, "Bringing node %s online", node); + return pcmk_rc_ok; + } else if (pcmk__str_eq(action, "Offline", pcmk__str_none)) { + out->list_item(out, NULL, "Taking node %s offline", node); + return pcmk_rc_ok; + } else if (pcmk__str_eq(action, "Failing", pcmk__str_none)) { + out->list_item(out, NULL, "Failing node %s", node); + return pcmk_rc_ok; + } + + return pcmk_rc_no_output; +} + +PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *") +static int +inject_modify_node_xml(pcmk__output_t *out, va_list args) +{ + const char *action = va_arg(args, const char *); + const char *node = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + pcmk__output_create_xml_node(out, "modify_node", + "action", action, + "node", node, + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *") +static int +inject_modify_ticket(pcmk__output_t *out, va_list args) +{ + const char *action = va_arg(args, const char *); + const char *ticket = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + if (pcmk__str_eq(action, "Standby", pcmk__str_none)) { + out->list_item(out, NULL, "Making ticket %s standby", ticket); + } else { + out->list_item(out, NULL, "%s ticket %s", action, ticket); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *") +static int +inject_modify_ticket_xml(pcmk__output_t *out, va_list args) +{ + const char *action = va_arg(args, const char *); + const char *ticket = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + pcmk__output_create_xml_node(out, "modify_ticket", + "action", action, + "ticket", ticket, + NULL); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") +static int +inject_pseudo_action(pcmk__output_t *out, va_list args) +{ + const char *node = va_arg(args, const char *); + const char *task = va_arg(args, const char *); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + out->list_item(out, NULL, "Pseudo action: %s%s%s", task, node ? " on " : "", + node ? node : ""); + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") +static int +inject_pseudo_action_xml(pcmk__output_t *out, va_list args) +{ + const char *node = va_arg(args, const char *); + const char *task = va_arg(args, const char *); + + xmlNodePtr xml_node = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + xml_node = pcmk__output_create_xml_node(out, "pseudo_action", + "task", task, + NULL); + if (node) { + crm_xml_add(xml_node, "node", node); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", + "const char *", "guint") +static int +inject_rsc_action(pcmk__output_t *out, va_list args) +{ + const char *rsc = va_arg(args, const char *); + const char *operation = va_arg(args, const char *); + const char *node = va_arg(args, const char *); + guint interval_ms = va_arg(args, guint); + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + if (interval_ms) { + out->list_item(out, NULL, "Resource action: %-15s %s=%u on %s", + rsc, operation, interval_ms, node); + } else { + out->list_item(out, NULL, "Resource action: %-15s %s on %s", + rsc, operation, node); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", + "const char *", "guint") +static int +inject_rsc_action_xml(pcmk__output_t *out, va_list args) +{ + const char *rsc = va_arg(args, const char *); + const char *operation = va_arg(args, const char *); + const char *node = va_arg(args, const char *); + guint interval_ms = va_arg(args, guint); + + xmlNodePtr xml_node = NULL; + + if (out->is_quiet(out)) { + return pcmk_rc_no_output; + } + + xml_node = pcmk__output_create_xml_node(out, "rsc_action", + "resource", rsc, + "op", operation, + "node", node, + NULL); + + if (interval_ms) { + char *interval_s = pcmk__itoa(interval_ms); + + crm_xml_add(xml_node, "interval", interval_s); + free(interval_s); + } + + return pcmk_rc_ok; +} + +#define CHECK_RC(retcode, retval) \ + if (retval == pcmk_rc_ok) { \ + retcode = pcmk_rc_ok; \ + } + +PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", + "enum pcmk_pacemakerd_state", "crm_exit_t", + "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", + "uint32_t", "const char *", "GList *", "GList *") +int +pcmk__cluster_status_text(pcmk__output_t *out, va_list args) +{ + pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); + enum pcmk_pacemakerd_state pcmkd_state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + crm_exit_t history_rc = va_arg(args, crm_exit_t); + stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); + enum pcmk__fence_history fence_history = va_arg(args, int); + uint32_t section_opts = va_arg(args, uint32_t); + uint32_t show_opts = va_arg(args, uint32_t); + const char *prefix = va_arg(args, const char *); + GList *unames = va_arg(args, GList *); + GList *resources = va_arg(args, GList *); + + int rc = pcmk_rc_no_output; + bool already_printed_failure = false; + + CHECK_RC(rc, out->message(out, "cluster-summary", data_set, pcmkd_state, + section_opts, show_opts)); + + if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { + CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames, + resources, show_opts, rc == pcmk_rc_ok)); + } + + /* Print resources section, if needed */ + if (pcmk_is_set(section_opts, pcmk_section_resources)) { + CHECK_RC(rc, out->message(out, "resource-list", data_set, show_opts, + true, unames, resources, rc == pcmk_rc_ok)); + } + + /* print Node Attributes section if requested */ + if (pcmk_is_set(section_opts, pcmk_section_attributes)) { + CHECK_RC(rc, out->message(out, "node-attribute-list", data_set, + show_opts, rc == pcmk_rc_ok, unames, resources)); + } + + /* If requested, print resource operations (which includes failcounts) + * or just failcounts + */ + if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { + CHECK_RC(rc, out->message(out, "node-summary", data_set, unames, + resources, section_opts, show_opts, rc == pcmk_rc_ok)); + } + + /* If there were any failed actions, print them */ + if (pcmk_is_set(section_opts, pcmk_section_failures) + && xml_has_children(data_set->failed)) { + + CHECK_RC(rc, out->message(out, "failed-action-list", data_set, unames, + resources, show_opts, rc == pcmk_rc_ok)); + } + + /* Print failed stonith actions */ + if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && + fence_history != pcmk__fence_history_none) { + if (history_rc == 0) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, + GINT_TO_POINTER(st_failed)); + + if (hp) { + CHECK_RC(rc, out->message(out, "failed-fencing-list", + stonith_history, unames, section_opts, + show_opts, rc == pcmk_rc_ok)); + } + } else { + PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + + already_printed_failure = true; + } + } + + /* Print tickets if requested */ + if (pcmk_is_set(section_opts, pcmk_section_tickets)) { + CHECK_RC(rc, out->message(out, "ticket-list", data_set, rc == pcmk_rc_ok)); + } + + /* Print negative location constraints if requested */ + if (pcmk_is_set(section_opts, pcmk_section_bans)) { + CHECK_RC(rc, out->message(out, "ban-list", data_set, prefix, resources, + show_opts, rc == pcmk_rc_ok)); + } + + /* Print stonith history */ + if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) && + fence_history != pcmk__fence_history_none) { + if (history_rc != 0) { + if (!already_printed_failure) { + PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + } + } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, + GINT_TO_POINTER(st_failed)); + + if (hp) { + CHECK_RC(rc, out->message(out, "fencing-list", hp, unames, + section_opts, show_opts, + rc == pcmk_rc_ok)); + } + } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); + + if (hp) { + CHECK_RC(rc, out->message(out, "pending-fencing-list", hp, + unames, section_opts, show_opts, + rc == pcmk_rc_ok)); + } + } + } + + return rc; +} + +PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", + "enum pcmk_pacemakerd_state", "crm_exit_t", + "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", + "uint32_t", "const char *", "GList *", "GList *") +static int +cluster_status_xml(pcmk__output_t *out, va_list args) +{ + pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); + enum pcmk_pacemakerd_state pcmkd_state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + crm_exit_t history_rc = va_arg(args, crm_exit_t); + stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); + enum pcmk__fence_history fence_history = va_arg(args, int); + uint32_t section_opts = va_arg(args, uint32_t); + uint32_t show_opts = va_arg(args, uint32_t); + const char *prefix = va_arg(args, const char *); + GList *unames = va_arg(args, GList *); + GList *resources = va_arg(args, GList *); + + out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts, + show_opts); + + /*** NODES ***/ + if (pcmk_is_set(section_opts, pcmk_section_nodes)) { + out->message(out, "node-list", data_set->nodes, unames, resources, + show_opts, false); + } + + /* Print resources section, if needed */ + if (pcmk_is_set(section_opts, pcmk_section_resources)) { + /* XML output always displays full details. */ + uint32_t full_show_opts = show_opts & ~pcmk_show_brief; + + out->message(out, "resource-list", data_set, full_show_opts, + false, unames, resources, false); + } + + /* print Node Attributes section if requested */ + if (pcmk_is_set(section_opts, pcmk_section_attributes)) { + out->message(out, "node-attribute-list", data_set, show_opts, false, + unames, resources); + } + + /* If requested, print resource operations (which includes failcounts) + * or just failcounts + */ + if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { + out->message(out, "node-summary", data_set, unames, + resources, section_opts, show_opts, false); + } + + /* If there were any failed actions, print them */ + if (pcmk_is_set(section_opts, pcmk_section_failures) + && xml_has_children(data_set->failed)) { + + out->message(out, "failed-action-list", data_set, unames, resources, + show_opts, false); + } + + /* Print stonith history */ + if (pcmk_is_set(section_opts, pcmk_section_fencing_all) && + fence_history != pcmk__fence_history_none) { + out->message(out, "full-fencing-list", history_rc, stonith_history, + unames, section_opts, show_opts, false); + } + + /* Print tickets if requested */ + if (pcmk_is_set(section_opts, pcmk_section_tickets)) { + out->message(out, "ticket-list", data_set, false); + } + + /* Print negative location constraints if requested */ + if (pcmk_is_set(section_opts, pcmk_section_bans)) { + out->message(out, "ban-list", data_set, prefix, resources, show_opts, + false); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", + "enum pcmk_pacemakerd_state", "crm_exit_t", + "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", + "uint32_t", "const char *", "GList *", "GList *") +static int +cluster_status_html(pcmk__output_t *out, va_list args) +{ + pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); + enum pcmk_pacemakerd_state pcmkd_state = + (enum pcmk_pacemakerd_state) va_arg(args, int); + crm_exit_t history_rc = va_arg(args, crm_exit_t); + stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); + enum pcmk__fence_history fence_history = va_arg(args, int); + uint32_t section_opts = va_arg(args, uint32_t); + uint32_t show_opts = va_arg(args, uint32_t); + const char *prefix = va_arg(args, const char *); + GList *unames = va_arg(args, GList *); + GList *resources = va_arg(args, GList *); + bool already_printed_failure = false; + + out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts, + show_opts); + + /*** NODE LIST ***/ + if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { + out->message(out, "node-list", data_set->nodes, unames, resources, + show_opts, false); + } + + /* Print resources section, if needed */ + if (pcmk_is_set(section_opts, pcmk_section_resources)) { + out->message(out, "resource-list", data_set, show_opts, true, unames, + resources, false); + } + + /* print Node Attributes section if requested */ + if (pcmk_is_set(section_opts, pcmk_section_attributes)) { + out->message(out, "node-attribute-list", data_set, show_opts, false, + unames, resources); + } + + /* If requested, print resource operations (which includes failcounts) + * or just failcounts + */ + if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { + out->message(out, "node-summary", data_set, unames, + resources, section_opts, show_opts, false); + } + + /* If there were any failed actions, print them */ + if (pcmk_is_set(section_opts, pcmk_section_failures) + && xml_has_children(data_set->failed)) { + + out->message(out, "failed-action-list", data_set, unames, resources, + show_opts, false); + } + + /* Print failed stonith actions */ + if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && + fence_history != pcmk__fence_history_none) { + if (history_rc == 0) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, + GINT_TO_POINTER(st_failed)); + + if (hp) { + out->message(out, "failed-fencing-list", stonith_history, unames, + section_opts, show_opts, false); + } + } else { + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + } + } + + /* Print stonith history */ + if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) && + fence_history != pcmk__fence_history_none) { + if (history_rc != 0) { + if (!already_printed_failure) { + out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); + out->list_item(out, NULL, "Failed to get fencing history: %s", + crm_exit_str(history_rc)); + out->end_list(out); + } + } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, + GINT_TO_POINTER(st_failed)); + + if (hp) { + out->message(out, "fencing-list", hp, unames, section_opts, + show_opts, false); + } + } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { + stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); + + if (hp) { + out->message(out, "pending-fencing-list", hp, unames, + section_opts, show_opts, false); + } + } + } + + /* Print tickets if requested */ + if (pcmk_is_set(section_opts, pcmk_section_tickets)) { + out->message(out, "ticket-list", data_set, false); + } + + /* Print negative location constraints if requested */ + if (pcmk_is_set(section_opts, pcmk_section_bans)) { + out->message(out, "ban-list", data_set, prefix, resources, show_opts, + false); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *", + "const char *", "const char *") +static int +attribute_default(pcmk__output_t *out, va_list args) +{ + const char *scope = va_arg(args, const char *); + const char *instance = va_arg(args, const char *); + const char *name = va_arg(args, const char *); + const char *value = va_arg(args, const char *); + const char *host = va_arg(args, const char *); + + GString *s = g_string_sized_new(50); + + if (!pcmk__str_empty(scope)) { + pcmk__g_strcat(s, "scope=\"", scope, "\" ", NULL); + } + + if (!pcmk__str_empty(instance)) { + pcmk__g_strcat(s, "id=\"", instance, "\" ", NULL); + } + + pcmk__g_strcat(s, "name=\"", pcmk__s(name, ""), "\" ", NULL); + + if (!pcmk__str_empty(host)) { + pcmk__g_strcat(s, "host=\"", host, "\" ", NULL); + } + + pcmk__g_strcat(s, "value=\"", pcmk__s(value, ""), "\"", NULL); + + out->info(out, "%s", s->str); + g_string_free(s, TRUE); + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *", + "const char *", "const char *") +static int +attribute_xml(pcmk__output_t *out, va_list args) +{ + const char *scope = va_arg(args, const char *); + const char *instance = va_arg(args, const char *); + const char *name = va_arg(args, const char *); + const char *value = va_arg(args, const char *); + const char *host = va_arg(args, const char *); + + xmlNodePtr node = NULL; + + node = pcmk__output_create_xml_node(out, "attribute", + "name", name, + "value", value ? value : "", + NULL); + + if (!pcmk__str_empty(scope)) { + crm_xml_add(node, "scope", scope); + } + + if (!pcmk__str_empty(instance)) { + crm_xml_add(node, "id", instance); + } + + if (!pcmk__str_empty(host)) { + crm_xml_add(node, "host", host); + } + + return pcmk_rc_ok; +} + +PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *") +static int +rule_check_default(pcmk__output_t *out, va_list args) +{ + const char *rule_id = va_arg(args, const char *); + int result = va_arg(args, int); + const char *error = va_arg(args, const char *); + + switch (result) { + case pcmk_rc_within_range: + return out->info(out, "Rule %s is still in effect", rule_id); + case pcmk_rc_ok: + return out->info(out, "Rule %s satisfies conditions", rule_id); + case pcmk_rc_after_range: + return out->info(out, "Rule %s is expired", rule_id); + case pcmk_rc_before_range: + return out->info(out, "Rule %s has not yet taken effect", rule_id); + case pcmk_rc_op_unsatisfied: + return out->info(out, "Rule %s does not satisfy conditions", + rule_id); + default: + out->err(out, + "Could not determine whether rule %s is in effect: %s", + rule_id, ((error != NULL)? error : "unexpected error")); + return pcmk_rc_ok; + } +} + +PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *") +static int +rule_check_xml(pcmk__output_t *out, va_list args) +{ + const char *rule_id = va_arg(args, const char *); + int result = va_arg(args, int); + const char *error = va_arg(args, const char *); + + char *rc_str = pcmk__itoa(pcmk_rc2exitc(result)); + + pcmk__output_create_xml_node(out, "rule-check", + "rule-id", rule_id, + "rc", rc_str, + NULL); + free(rc_str); + + switch (result) { + case pcmk_rc_within_range: + case pcmk_rc_ok: + case pcmk_rc_after_range: + case pcmk_rc_before_range: + case pcmk_rc_op_unsatisfied: + return pcmk_rc_ok; + default: + out->err(out, + "Could not determine whether rule %s is in effect: %s", + rule_id, ((error != NULL)? error : "unexpected error")); + return pcmk_rc_ok; + } +} + +PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") +static int +result_code_none(pcmk__output_t *out, va_list args) +{ + return pcmk_rc_no_output; +} + +PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") +static int +result_code_text(pcmk__output_t *out, va_list args) +{ + int code = va_arg(args, int); + const char *name = va_arg(args, const char *); + const char *desc = va_arg(args, const char *); + + static int code_width = 0; + + if (out->is_quiet(out)) { + /* If out->is_quiet(), don't print the code. Print name and/or desc in a + * compact format for text output, or print nothing at all for none-type + * output. + */ + if ((name != NULL) && (desc != NULL)) { + pcmk__formatted_printf(out, "%s - %s\n", name, desc); + + } else if ((name != NULL) || (desc != NULL)) { + pcmk__formatted_printf(out, "%s\n", ((name != NULL)? name : desc)); + } + return pcmk_rc_ok; + } + + /* Get length of longest (most negative) standard Pacemaker return code + * This should be longer than all the values of any other type of return + * code. + */ + if (code_width == 0) { + long long most_negative = pcmk_rc_error - (long long) pcmk__n_rc + 1; + code_width = (int) snprintf(NULL, 0, "%lld", most_negative); + } + + if ((name != NULL) && (desc != NULL)) { + static int name_width = 0; + + if (name_width == 0) { + // Get length of longest standard Pacemaker return code name + for (int lpc = 0; lpc < pcmk__n_rc; lpc++) { + int len = (int) strlen(pcmk_rc_name(pcmk_rc_error - lpc)); + name_width = QB_MAX(name_width, len); + } + } + return out->info(out, "% *d: %-*s %s", code_width, code, name_width, + name, desc); + } + + if ((name != NULL) || (desc != NULL)) { + return out->info(out, "% *d: %s", code_width, code, + ((name != NULL)? name : desc)); + } + + return out->info(out, "% *d", code_width, code); +} + +PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") +static int +result_code_xml(pcmk__output_t *out, va_list args) +{ + int code = va_arg(args, int); + const char *name = va_arg(args, const char *); + const char *desc = va_arg(args, const char *); + + char *code_str = pcmk__itoa(code); + + pcmk__output_create_xml_node(out, "result-code", + "code", code_str, + XML_ATTR_NAME, name, + XML_ATTR_DESC, desc, + NULL); + free(code_str); + return pcmk_rc_ok; +} + +static pcmk__message_entry_t fmt_functions[] = { + { "attribute", "default", attribute_default }, + { "attribute", "xml", attribute_xml }, + { "cluster-status", "default", pcmk__cluster_status_text }, + { "cluster-status", "html", cluster_status_html }, + { "cluster-status", "xml", cluster_status_xml }, + { "crmadmin-node", "default", crmadmin_node }, + { "crmadmin-node", "text", crmadmin_node_text }, + { "crmadmin-node", "xml", crmadmin_node_xml }, + { "dc", "default", dc }, + { "dc", "text", dc_text }, + { "dc", "xml", dc_xml }, + { "digests", "default", digests_text }, + { "digests", "xml", digests_xml }, + { "health", "default", health }, + { "health", "text", health_text }, + { "health", "xml", health_xml }, + { "inject-attr", "default", inject_attr }, + { "inject-attr", "xml", inject_attr_xml }, + { "inject-cluster-action", "default", inject_cluster_action }, + { "inject-cluster-action", "xml", inject_cluster_action_xml }, + { "inject-fencing-action", "default", inject_fencing_action }, + { "inject-fencing-action", "xml", inject_fencing_action_xml }, + { "inject-modify-config", "default", inject_modify_config }, + { "inject-modify-config", "xml", inject_modify_config_xml }, + { "inject-modify-node", "default", inject_modify_node }, + { "inject-modify-node", "xml", inject_modify_node_xml }, + { "inject-modify-ticket", "default", inject_modify_ticket }, + { "inject-modify-ticket", "xml", inject_modify_ticket_xml }, + { "inject-pseudo-action", "default", inject_pseudo_action }, + { "inject-pseudo-action", "xml", inject_pseudo_action_xml }, + { "inject-rsc-action", "default", inject_rsc_action }, + { "inject-rsc-action", "xml", inject_rsc_action_xml }, + { "inject-spec", "default", inject_spec }, + { "inject-spec", "xml", inject_spec_xml }, + { "locations-list", "default", locations_list }, + { "locations-list", "xml", locations_list_xml }, + { "node-action", "default", node_action }, + { "node-action", "xml", node_action_xml }, + { "node-info", "default", node_info_default }, + { "node-info", "xml", node_info_xml }, + { "pacemakerd-health", "default", pacemakerd_health }, + { "pacemakerd-health", "html", pacemakerd_health_html }, + { "pacemakerd-health", "text", pacemakerd_health_text }, + { "pacemakerd-health", "xml", pacemakerd_health_xml }, + { "profile", "default", profile_default, }, + { "profile", "xml", profile_xml }, + { "result-code", "none", result_code_none }, + { "result-code", "text", result_code_text }, + { "result-code", "xml", result_code_xml }, + { "rsc-action", "default", rsc_action_default }, + { "rsc-action-item", "default", rsc_action_item }, + { "rsc-action-item", "xml", rsc_action_item_xml }, + { "rsc-is-colocated-with-list", "default", rsc_is_colocated_with_list }, + { "rsc-is-colocated-with-list", "xml", rsc_is_colocated_with_list_xml }, + { "rscs-colocated-with-list", "default", rscs_colocated_with_list }, + { "rscs-colocated-with-list", "xml", rscs_colocated_with_list_xml }, + { "rule-check", "default", rule_check_default }, + { "rule-check", "xml", rule_check_xml }, + { "locations-and-colocations", "default", locations_and_colocations }, + { "locations-and-colocations", "xml", locations_and_colocations_xml }, + + { NULL, NULL, NULL } +}; + +void +pcmk__register_lib_messages(pcmk__output_t *out) { + pcmk__register_messages(out, fmt_functions); +} diff --git a/lib/pacemaker/pcmk_resource.c b/lib/pacemaker/pcmk_resource.c new file mode 100644 index 0000000..ee4c904 --- /dev/null +++ b/lib/pacemaker/pcmk_resource.c @@ -0,0 +1,173 @@ +/* + * Copyright 2021-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <errno.h> +#include <glib.h> +#include <libxml/tree.h> + +#include <crm/common/mainloop.h> +#include <crm/common/results.h> +#include <crm/common/output_internal.h> +#include <crm/pengine/internal.h> + +#include <pacemaker.h> +#include <pacemaker-internal.h> + +// Search path for resource operation history (takes node name and resource ID) +#define XPATH_OP_HISTORY "//" XML_CIB_TAG_STATUS \ + "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" + +static xmlNode * +best_op(const pe_resource_t *rsc, const pe_node_t *node, + pe_working_set_t *data_set) +{ + char *xpath = NULL; + xmlNode *history = NULL; + xmlNode *best = NULL; + bool best_effective_op = false; + guint best_interval = 0; + bool best_failure = false; + const char *best_digest = NULL; + + // Find node's resource history + xpath = crm_strdup_printf(XPATH_OP_HISTORY, node->details->uname, rsc->id); + history = get_xpath_object(xpath, data_set->input, LOG_NEVER); + free(xpath); + + // Examine each history entry + for (xmlNode *lrm_rsc_op = first_named_child(history, XML_LRM_TAG_RSC_OP); + lrm_rsc_op != NULL; lrm_rsc_op = crm_next_same_xml(lrm_rsc_op)) { + + const char *digest = crm_element_value(lrm_rsc_op, + XML_LRM_ATTR_RESTART_DIGEST); + guint interval_ms = 0; + const char *task = crm_element_value(lrm_rsc_op, XML_LRM_ATTR_TASK); + bool effective_op = false; + bool failure = pcmk__ends_with(ID(lrm_rsc_op), "_last_failure_0"); + + + crm_element_value_ms(lrm_rsc_op, XML_LRM_ATTR_INTERVAL, &interval_ms); + effective_op = interval_ms == 0 + && pcmk__strcase_any_of(task, RSC_STATUS, + RSC_START, RSC_PROMOTE, + RSC_MIGRATED, NULL); + + if (best == NULL) { + goto is_best; + } + + if (best_effective_op) { + // Do not use an ineffective op if there's an effective one. + if (!effective_op) { + continue; + } + // Do not use an ineffective non-recurring op if there's a recurring one. + } else if (best_interval != 0 + && !effective_op + && interval_ms == 0) { + continue; + } + + // Do not use last failure if there's a successful one. + if (!best_failure && failure) { + continue; + } + + // Do not use an op without a restart digest if there's one with. + if (best_digest != NULL && digest == NULL) { + continue; + } + + // Do not use an older op if there's a newer one. + if (pe__is_newer_op(best, lrm_rsc_op, true) > 0) { + continue; + } + +is_best: + best = lrm_rsc_op; + best_effective_op = effective_op; + best_interval = interval_ms; + best_failure = failure; + best_digest = digest; + } + return best; +} + +/*! + * \internal + * \brief Calculate and output resource operation digests + * + * \param[in,out] out Output object + * \param[in,out] rsc Resource to calculate digests for + * \param[in] node Node whose operation history should be used + * \param[in] overrides Hash table of configuration parameters to override + * + * \return Standard Pacemaker return code + */ +int +pcmk__resource_digests(pcmk__output_t *out, pe_resource_t *rsc, + const pe_node_t *node, GHashTable *overrides) +{ + const char *task = NULL; + xmlNode *xml_op = NULL; + op_digest_cache_t *digests = NULL; + guint interval_ms = 0; + int rc = pcmk_rc_ok; + + if ((out == NULL) || (rsc == NULL) || (node == NULL)) { + return EINVAL; + } + if (rsc->variant != pe_native) { + // Only primitives get operation digests + return EOPNOTSUPP; + } + + // Find XML of operation history to use + xml_op = best_op(rsc, node, rsc->cluster); + + // Generate an operation key + if (xml_op != NULL) { + task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); + } + if (task == NULL) { // Assume start if no history is available + task = RSC_START; + interval_ms = 0; + } + + // Calculate and show digests + digests = pe__calculate_digests(rsc, task, &interval_ms, node, xml_op, + overrides, true, rsc->cluster); + rc = out->message(out, "digests", rsc, node, task, interval_ms, digests); + + pe__free_digests(digests); + return rc; +} + +int +pcmk_resource_digests(xmlNodePtr *xml, pe_resource_t *rsc, + const pe_node_t *node, GHashTable *overrides, + pe_working_set_t *data_set) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + pcmk__register_lib_messages(out); + rc = pcmk__resource_digests(out, rsc, node, overrides); + pcmk__xml_output_finish(out, xml); + return rc; +} diff --git a/lib/pacemaker/pcmk_result_code.c b/lib/pacemaker/pcmk_result_code.c new file mode 100644 index 0000000..4f50276 --- /dev/null +++ b/lib/pacemaker/pcmk_result_code.c @@ -0,0 +1,167 @@ +/* + * Copyright 2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/cib/internal.h> +#include <crm/crm.h> + +#include <pacemaker.h> +#include <pacemaker-internal.h> + +#include <inttypes.h> // PRIx32 +#include <stdint.h> // uint32_t + +/*! + * \internal + * \brief Display the name and/or description of a result code + * + * \param[in,out] out Output object + * \param[in] code The result code + * \param[in] type Interpret \c code as this type of result code. + * Supported values: \c pcmk_result_legacy, + * \c pcmk_result_rc, \c pcmk_result_exitcode. + * \param[in] flags Group of \c pcmk_rc_disp_flags + * + * \return Standard Pacemaker return code + */ +int +pcmk__show_result_code(pcmk__output_t *out, int code, + enum pcmk_result_type type, uint32_t flags) +{ + int rc = pcmk_rc_ok; + bool quiet_orig = out->quiet; + const char *name = NULL; + const char *desc = NULL; + + rc = pcmk_result_get_strings(code, type, &name, &desc); + if (rc != pcmk_rc_ok) { + out->err(out, "Error looking up result code %d", code); + return rc; + } + + // out->quiet controls whether the code is shown (if quiet is supported) + out->quiet = !pcmk_is_set(flags, pcmk_rc_disp_code); + + out->message(out, "result-code", code, + pcmk_is_set(flags, pcmk_rc_disp_name)? name : NULL, + pcmk_is_set(flags, pcmk_rc_disp_desc)? desc : NULL); + out->quiet = quiet_orig; + + return rc; +} + +// Documented in header +int +pcmk_show_result_code(xmlNodePtr *xml, int code, enum pcmk_result_type type, + uint32_t flags) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__show_result_code(out, code, type, flags); + pcmk__xml_output_finish(out, xml); + return rc; +} + +/*! + * \internal + * \brief List all valid result codes in a particular family + * + * \param[in,out] out Output object + * \param[in] type The family of result codes to list. Supported + * values: \c pcmk_result_legacy, \c pcmk_result_rc, + * \c pcmk_result_exitcode. + * \param[in] flags Group of \c pcmk_rc_disp_flags + * + * \return Standard Pacemaker return code + */ +int +pcmk__list_result_codes(pcmk__output_t *out, enum pcmk_result_type type, + uint32_t flags) +{ + int rc = pcmk_rc_ok; + int start = 0; + int end = 0; + int code = 0; + + bool quiet_orig = out->quiet; + const char *name = NULL; + const char *desc = NULL; + + rc = pcmk__result_bounds(type, &start, &end); + if (rc != pcmk_rc_ok) { + out->err(out, + "Failed to get result code bounds for result code type " + "%#010x" PRIx32, (uint32_t) type); + return rc; + } + + code = start; + while (code <= end) { + int local_rc = pcmk_rc_ok; + + if (code == (pcmk_rc_error + 1)) { + /* Values between pcmk_rc_error and pcmk_rc_ok are reserved for + * callers, so skip them + */ + code = pcmk_rc_ok; + continue; + } + + // Shouldn't affect the return code of the whole list operation + local_rc = pcmk_result_get_strings(code, type, &name, &desc); + + if ((local_rc != pcmk_rc_ok) || (name == NULL) + || pcmk__str_any_of(name, "Unknown", "CRM_EX_UNKNOWN", NULL)) { + + code++; + continue; + } + + // out->quiet controls whether the code is shown (if quiet is supported) + out->quiet = !pcmk_is_set(flags, pcmk_rc_disp_code); + + out->message(out, "result-code", code, + pcmk_is_set(flags, pcmk_rc_disp_name)? name : NULL, + pcmk_is_set(flags, pcmk_rc_disp_desc)? desc : NULL); + out->quiet = quiet_orig; + + code++; + } + + return rc; +} + +// Documented in header +int +pcmk_list_result_codes(xmlNodePtr *xml, enum pcmk_result_type type, + uint32_t flags) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__list_result_codes(out, type, flags); + pcmk__xml_output_finish(out, xml); + return rc; +} diff --git a/lib/pacemaker/pcmk_rule.c b/lib/pacemaker/pcmk_rule.c new file mode 100644 index 0000000..b8ca453 --- /dev/null +++ b/lib/pacemaker/pcmk_rule.c @@ -0,0 +1,295 @@ +/* + * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/cib/internal.h> +#include <crm/common/cib.h> +#include <crm/common/iso8601.h> +#include <crm/msg_xml.h> +#include <crm/pengine/rules_internal.h> +#include <pacemaker-internal.h> + +/*! + * \internal + * \brief Evaluate a date expression for a specific time + * + * \param[in] expr date_expression XML + * \param[in] now Time for which to evaluate expression + * + * \return Standard Pacemaker return code + */ +static int +eval_date_expression(const xmlNode *expr, crm_time_t *now) +{ + pe_rule_eval_data_t rule_data = { + .node_hash = NULL, + .role = RSC_ROLE_UNKNOWN, + .now = now, + .match_data = NULL, + .rsc_data = NULL, + .op_data = NULL + }; + + return pe__eval_date_expr(expr, &rule_data, NULL); +} + +/*! + * \internal + * \brief Initialize the cluster working set for checking rules + * + * Make our own copies of the CIB XML and date/time object, if they're not + * \c NULL. This way we don't have to take ownership of the objects passed via + * the API. + * + * \param[in,out] out Output object + * \param[in] input The CIB XML to check (if \c NULL, use current CIB) + * \param[in] date Check whether the rule is in effect at this date + * and time (if \c NULL, use current date and time) + * \param[out] data_set Where to store the cluster working set + * + * \return Standard Pacemaker return code + */ +static int +init_rule_check(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date, + pe_working_set_t **data_set) +{ + // Allows for cleaner syntax than dereferencing the data_set argument + pe_working_set_t *new_data_set = NULL; + + new_data_set = pe_new_working_set(); + if (new_data_set == NULL) { + return ENOMEM; + } + + pe__set_working_set_flags(new_data_set, + pe_flag_no_counts|pe_flag_no_compat); + + // Populate the working set instance + + // Make our own copy of the given input or fetch the CIB and use that + if (input != NULL) { + new_data_set->input = copy_xml(input); + if (new_data_set->input == NULL) { + out->err(out, "Failed to copy input XML"); + pe_free_working_set(new_data_set); + return ENOMEM; + } + + } else { + int rc = cib__signon_query(out, NULL, &(new_data_set->input)); + + if (rc != pcmk_rc_ok) { + pe_free_working_set(new_data_set); + return rc; + } + } + + // Make our own copy of the given crm_time_t object; otherwise + // cluster_status() populates with the current time + if (date != NULL) { + // pcmk_copy_time() guarantees non-NULL + new_data_set->now = pcmk_copy_time(date); + } + + // Unpack everything + cluster_status(new_data_set); + *data_set = new_data_set; + + return pcmk_rc_ok; +} + +#define XPATH_NODE_RULE "//" XML_TAG_RULE "[@" XML_ATTR_ID "='%s']" + +/*! + * \internal + * \brief Check whether a given rule is in effect + * + * \param[in] data_set Cluster working set + * \param[in] rule_id The ID of the rule to check + * \param[out] error Where to store a rule evaluation error message + * + * \return Standard Pacemaker return code + */ +static int +eval_rule(pe_working_set_t *data_set, const char *rule_id, const char **error) +{ + xmlNodePtr cib_constraints = NULL; + xmlNodePtr match = NULL; + xmlXPathObjectPtr xpath_obj = NULL; + char *xpath = NULL; + int rc = pcmk_rc_ok; + int num_results = 0; + + *error = NULL; + + /* Rules are under the constraints node in the XML, so first find that. */ + cib_constraints = pcmk_find_cib_element(data_set->input, + XML_CIB_TAG_CONSTRAINTS); + + /* Get all rules matching the given ID that are also simple enough for us + * to check. For the moment, these rules must only have a single + * date_expression child and: + * - Do not have a date_spec operation, or + * - Have a date_spec operation that contains years= but does not contain + * moon=. + * + * We do this in steps to provide better error messages. First, check that + * there's any rule with the given ID. + */ + xpath = crm_strdup_printf(XPATH_NODE_RULE, rule_id); + xpath_obj = xpath_search(cib_constraints, xpath); + num_results = numXpathResults(xpath_obj); + + free(xpath); + freeXpathObject(xpath_obj); + + if (num_results == 0) { + *error = "Rule not found"; + return ENXIO; + } + + if (num_results > 1) { + // Should not be possible; schema prevents this + *error = "Found more than one rule with matching ID"; + return pcmk_rc_duplicate_id; + } + + /* Next, make sure it has exactly one date_expression. */ + xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression", rule_id); + xpath_obj = xpath_search(cib_constraints, xpath); + num_results = numXpathResults(xpath_obj); + + free(xpath); + freeXpathObject(xpath_obj); + + if (num_results != 1) { + if (num_results == 0) { + *error = "Rule does not have a date expression"; + } else { + *error = "Rule has more than one date expression"; + } + return EOPNOTSUPP; + } + + /* Then, check that it's something we actually support. */ + xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression[" + "@" XML_EXPR_ATTR_OPERATION "!='date_spec']", + rule_id); + xpath_obj = xpath_search(cib_constraints, xpath); + num_results = numXpathResults(xpath_obj); + + free(xpath); + + if (num_results == 0) { + freeXpathObject(xpath_obj); + + xpath = crm_strdup_printf(XPATH_NODE_RULE "//date_expression[" + "@" XML_EXPR_ATTR_OPERATION "='date_spec' " + "and date_spec/@years " + "and not(date_spec/@moon)]", rule_id); + xpath_obj = xpath_search(cib_constraints, xpath); + num_results = numXpathResults(xpath_obj); + + free(xpath); + + if (num_results == 0) { + freeXpathObject(xpath_obj); + *error = "Rule must either not use date_spec, or use date_spec " + "with years= but not moon="; + return EOPNOTSUPP; + } + } + + match = getXpathResult(xpath_obj, 0); + + /* We should have ensured this with the xpath query above, but double- + * checking can't hurt. + */ + CRM_ASSERT(match != NULL); + CRM_ASSERT(find_expression_type(match) == time_expr); + + rc = eval_date_expression(match, data_set->now); + if (rc == pcmk_rc_undetermined) { + /* pe__eval_date_expr() should return this only if something is + * malformed or missing + */ + *error = "Error parsing rule"; + } + + freeXpathObject(xpath_obj); + return rc; +} + +/*! + * \internal + * \brief Check whether each rule in a list is in effect + * + * \param[in,out] out Output object + * \param[in] input The CIB XML to check (if \c NULL, use current CIB) + * \param[in] date Check whether the rule is in effect at this date and + * time (if \c NULL, use current date and time) + * \param[in] rule_ids The IDs of the rules to check, as a <tt>NULL</tt>- + * terminated list. + * + * \return Standard Pacemaker return code + */ +int +pcmk__check_rules(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date, + const char **rule_ids) +{ + pe_working_set_t *data_set = NULL; + int rc = pcmk_rc_ok; + + CRM_ASSERT(out != NULL); + + if (rule_ids == NULL) { + // Trivial case; every rule specified is in effect + return pcmk_rc_ok; + } + + rc = init_rule_check(out, input, date, &data_set); + if (rc != pcmk_rc_ok) { + return rc; + } + + for (const char **rule_id = rule_ids; *rule_id != NULL; rule_id++) { + const char *error = NULL; + int last_rc = eval_rule(data_set, *rule_id, &error); + + out->message(out, "rule-check", *rule_id, last_rc, error); + + if (last_rc != pcmk_rc_ok) { + rc = last_rc; + } + } + + pe_free_working_set(data_set); + return rc; +} + +// Documented in pacemaker.h +int +pcmk_check_rules(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date, + const char **rule_ids) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pcmk__register_lib_messages(out); + + rc = pcmk__check_rules(out, input, date, rule_ids); + pcmk__xml_output_finish(out, xml); + return rc; +} diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c new file mode 100644 index 0000000..06d7f00 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -0,0 +1,1919 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdio.h> +#include <sys/param.h> +#include <glib.h> + +#include <crm/lrmd_internal.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Get the action flags relevant to ordering constraints + * + * \param[in,out] action Action to check + * \param[in] node Node that *other* action in the ordering is on + * (used only for clone resource actions) + * + * \return Action flags that should be used for orderings + */ +static enum pe_action_flags +action_flags_for_ordering(pe_action_t *action, const pe_node_t *node) +{ + bool runnable = false; + enum pe_action_flags flags; + + // For non-resource actions, return the action flags + if (action->rsc == NULL) { + return action->flags; + } + + /* For non-clone resources, or a clone action not assigned to a node, + * return the flags as determined by the resource method without a node + * specified. + */ + flags = action->rsc->cmds->action_flags(action, NULL); + if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) { + return flags; + } + + /* Otherwise (i.e., for clone resource actions on a specific node), first + * remember whether the non-node-specific action is runnable. + */ + runnable = pcmk_is_set(flags, pe_action_runnable); + + // Then recheck the resource method with the node + flags = action->rsc->cmds->action_flags(action, node); + + /* For clones in ordering constraints, the node-specific "runnable" doesn't + * matter, just the non-node-specific setting (i.e., is the action runnable + * anywhere). + * + * This applies only to runnable, and only for ordering constraints. This + * function shouldn't be used for other types of constraints without + * changes. Not very satisfying, but it's logical and appears to work well. + */ + if (runnable && !pcmk_is_set(flags, pe_action_runnable)) { + pe__set_raw_action_flags(flags, action->rsc->id, + pe_action_runnable); + } + return flags; +} + +/*! + * \internal + * \brief Get action UUID that should be used with a resource ordering + * + * When an action is ordered relative to an action for a collective resource + * (clone, group, or bundle), it actually needs to be ordered after all + * instances of the collective have completed the relevant action (for example, + * given "start CLONE then start RSC", RSC must wait until all instances of + * CLONE have started). Given the UUID and resource of the first action in an + * ordering, this returns the UUID of the action that should actually be used + * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0"). + * + * \param[in] first_uuid UUID of first action in ordering + * \param[in] first_rsc Resource of first action in ordering + * + * \return Newly allocated copy of UUID to use with ordering + * \note It is the caller's responsibility to free the return value. + */ +static char * +action_uuid_for_ordering(const char *first_uuid, const pe_resource_t *first_rsc) +{ + guint interval_ms = 0; + char *uuid = NULL; + char *rid = NULL; + char *first_task_str = NULL; + enum action_tasks first_task = no_action; + enum action_tasks remapped_task = no_action; + + // Only non-notify actions for collective resources need remapping + if ((strstr(first_uuid, "notify") != NULL) + || (first_rsc->variant < pe_group)) { + goto done; + } + + // Only non-recurring actions need remapping + CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms)); + if (interval_ms > 0) { + goto done; + } + + first_task = text2task(first_task_str); + switch (first_task) { + case stop_rsc: + case start_rsc: + case action_notify: + case action_promote: + case action_demote: + remapped_task = first_task + 1; + break; + case stopped_rsc: + case started_rsc: + case action_notified: + case action_promoted: + case action_demoted: + remapped_task = first_task; + break; + case monitor_rsc: + case shutdown_crm: + case stonith_node: + break; + default: + crm_err("Unknown action '%s' in ordering", first_task_str); + break; + } + + if (remapped_task != no_action) { + /* If a (clone) resource has notifications enabled, we want to order + * relative to when all notifications have been sent for the remapped + * task. Only outermost resources or those in bundles have + * notifications. + */ + if (pcmk_is_set(first_rsc->flags, pe_rsc_notify) + && ((first_rsc->parent == NULL) + || (pe_rsc_is_clone(first_rsc) + && (first_rsc->parent->variant == pe_container)))) { + uuid = pcmk__notify_key(rid, "confirmed-post", + task2text(remapped_task)); + } else { + uuid = pcmk__op_key(rid, task2text(remapped_task), 0); + } + pe_rsc_trace(first_rsc, + "Remapped action UUID %s to %s for ordering purposes", + first_uuid, uuid); + } + +done: + if (uuid == NULL) { + uuid = strdup(first_uuid); + CRM_ASSERT(uuid != NULL); + } + free(first_task_str); + free(rid); + return uuid; +} + +/*! + * \internal + * \brief Get actual action that should be used with an ordering + * + * When an action is ordered relative to an action for a collective resource + * (clone, group, or bundle), it actually needs to be ordered after all + * instances of the collective have completed the relevant action (for example, + * given "start CLONE then start RSC", RSC must wait until all instances of + * CLONE have started). Given the first action in an ordering, this returns the + * the action that should actually be used for ordering (for example, the + * started action instead of the start action). + * + * \param[in] action First action in an ordering + * + * \return Actual action that should be used for the ordering + */ +static pe_action_t * +action_for_ordering(pe_action_t *action) +{ + pe_action_t *result = action; + pe_resource_t *rsc = action->rsc; + + if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) { + char *uuid = action_uuid_for_ordering(action->uuid, rsc); + + result = find_first_action(rsc->actions, uuid, NULL, NULL); + if (result == NULL) { + crm_warn("Not remapping %s to %s because %s does not have " + "remapped action", action->uuid, uuid, rsc->id); + result = action; + } + free(uuid); + } + return result; +} + +/*! + * \internal + * \brief Update flags for ordering's actions appropriately for ordering's flags + * + * \param[in,out] first First action in an ordering + * \param[in,out] then Then action in an ordering + * \param[in] first_flags Action flags for \p first for ordering purposes + * \param[in] then_flags Action flags for \p then for ordering purposes + * \param[in,out] order Action wrapper for \p first in ordering + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags + */ +static uint32_t +update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then, + enum pe_action_flags first_flags, + enum pe_action_flags then_flags, + pe_action_wrapper_t *order, + pe_working_set_t *data_set) +{ + uint32_t changed = pcmk__updated_none; + + /* The node will only be used for clones. If interleaved, node will be NULL, + * otherwise the ordering scope will be limited to the node. Normally, the + * whole 'then' clone should restart if 'first' is restarted, so then->node + * is needed. + */ + pe_node_t *node = then->node; + + if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) { + /* For unfencing, only instances of 'then' on the same node as 'first' + * (the unfencing operation) should restart, so reset node to + * first->node, at which point this case is handled like a normal + * pe_order_implies_then. + */ + pe__clear_order_flags(order->type, pe_order_implies_then_on_node); + pe__set_order_flags(order->type, pe_order_implies_then); + node = first->node; + pe_rsc_trace(then->rsc, + "%s then %s: mapped pe_order_implies_then_on_node to " + "pe_order_implies_then on %s", + first->uuid, then->uuid, pe__node_name(node)); + } + + if (pcmk_is_set(order->type, pe_order_implies_then)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags & pe_action_optional, + pe_action_optional, + pe_order_implies_then, + data_set); + } else if (!pcmk_is_set(first_flags, pe_action_optional) + && pcmk_is_set(then->flags, pe_action_optional)) { + pe__clear_action_flags(then, pe_action_optional); + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) { + enum pe_action_flags restart = pe_action_optional|pe_action_runnable; + + changed |= then->rsc->cmds->update_ordered_actions(first, then, node, + first_flags, restart, + pe_order_restart, + data_set); + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_implies_first)) { + if (first->rsc != NULL) { + changed |= first->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_optional, + pe_order_implies_first, + data_set); + } else if (!pcmk_is_set(first_flags, pe_action_optional) + && pcmk_is_set(first->flags, pe_action_runnable)) { + pe__clear_action_flags(first, pe_action_runnable); + pcmk__set_updated_flags(changed, first, pcmk__updated_first); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags & pe_action_optional, + pe_action_optional, + pe_order_promoted_implies_first, + data_set); + } + pe_rsc_trace(then->rsc, + "%s then %s: %s after pe_order_promoted_implies_first", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_one_or_more)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_runnable, + pe_order_one_or_more, + data_set); + + } else if (pcmk_is_set(first_flags, pe_action_runnable)) { + // We have another runnable instance of "first" + then->runnable_before++; + + /* Mark "then" as runnable if it requires a certain number of + * "before" instances to be runnable, and they now are. + */ + if ((then->runnable_before >= then->required_runnable_before) + && !pcmk_is_set(then->flags, pe_action_runnable)) { + + pe__set_action_flags(then, pe_action_runnable); + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) { + if (!pcmk_is_set(first_flags, pe_action_runnable) + && (first->rsc->running_on != NULL)) { + + pe_rsc_trace(then->rsc, + "%s then %s: ignoring because first is stopping", + first->uuid, then->uuid); + order->type = pe_order_none; + } else { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_runnable, + pe_order_runnable_left, + data_set); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_runnable_left)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_runnable, + pe_order_runnable_left, + data_set); + + } else if (!pcmk_is_set(first_flags, pe_action_runnable) + && pcmk_is_set(then->flags, pe_action_runnable)) { + + pe__clear_action_flags(then, pe_action_runnable); + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_optional, + pe_order_implies_first_migratable, + data_set); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after " + "pe_order_implies_first_migratable", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_pseudo_left)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_optional, + pe_order_pseudo_left, + data_set); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_optional)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_runnable, + pe_order_optional, + data_set); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(order->type, pe_order_asymmetrical)) { + if (then->rsc != NULL) { + changed |= then->rsc->cmds->update_ordered_actions(first, then, + node, + first_flags, + pe_action_runnable, + pe_order_asymmetrical, + data_set); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + if (pcmk_is_set(first->flags, pe_action_runnable) + && pcmk_is_set(order->type, pe_order_implies_then_printed) + && !pcmk_is_set(first_flags, pe_action_optional)) { + + pe_rsc_trace(then->rsc, "%s will be in graph because %s is required", + then->uuid, first->uuid); + pe__set_action_flags(then, pe_action_print_always); + // Don't bother marking 'then' as changed just for this + } + + if (pcmk_is_set(order->type, pe_order_implies_first_printed) + && !pcmk_is_set(then_flags, pe_action_optional)) { + + pe_rsc_trace(then->rsc, "%s will be in graph because %s is required", + first->uuid, then->uuid); + pe__set_action_flags(first, pe_action_print_always); + // Don't bother marking 'first' as changed just for this + } + + if (pcmk_any_flags_set(order->type, pe_order_implies_then + |pe_order_implies_first + |pe_order_restart) + && (first->rsc != NULL) + && !pcmk_is_set(first->rsc->flags, pe_rsc_managed) + && pcmk_is_set(first->rsc->flags, pe_rsc_block) + && !pcmk_is_set(first->flags, pe_action_runnable) + && pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) { + + if (pcmk_is_set(then->flags, pe_action_runnable)) { + pe__clear_action_flags(then, pe_action_runnable); + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first " + "is blocked, unmanaged, unrunnable stop", + first->uuid, then->uuid, + (changed? "changed" : "unchanged")); + } + + return changed; +} + +// Convenience macros for logging action properties + +#define action_type_str(flags) \ + (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") + +#define action_optional_str(flags) \ + (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") + +#define action_runnable_str(flags) \ + (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") + +#define action_node_str(a) \ + (((a)->node == NULL)? "no node" : (a)->node->details->uname) + +/*! + * \internal + * \brief Update an action's flags for all orderings where it is "then" + * + * \param[in,out] then Action to update + * \param[in,out] data_set Cluster working set + */ +void +pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set) +{ + GList *lpc = NULL; + uint32_t changed = pcmk__updated_none; + int last_flags = then->flags; + + pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s", + action_type_str(then->flags), then->uuid, + action_optional_str(then->flags), + action_runnable_str(then->flags), action_node_str(then)); + + if (pcmk_is_set(then->flags, pe_action_requires_any)) { + /* Initialize current known "runnable before" actions. As + * update_action_for_ordering_flags() is called for each of then's + * before actions, this number will increment as runnable 'first' + * actions are encountered. + */ + then->runnable_before = 0; + + if (then->required_runnable_before == 0) { + /* @COMPAT This ordering constraint uses the deprecated + * "require-all=false" attribute. Treat it like "clone-min=1". + */ + then->required_runnable_before = 1; + } + + /* The pe_order_one_or_more clause of update_action_for_ordering_flags() + * (called below) will reset runnable if appropriate. + */ + pe__clear_action_flags(then, pe_action_runnable); + } + + for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { + pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data; + pe_action_t *first = other->action; + + pe_node_t *then_node = then->node; + pe_node_t *first_node = first->node; + + if ((first->rsc != NULL) + && (first->rsc->variant == pe_group) + && pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { + + first_node = first->rsc->fns->location(first->rsc, NULL, FALSE); + if (first_node != NULL) { + pe_rsc_trace(first->rsc, "Found %s for 'first' %s", + pe__node_name(first_node), first->uuid); + } + } + + if ((then->rsc != NULL) + && (then->rsc->variant == pe_group) + && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) { + + then_node = then->rsc->fns->location(then->rsc, NULL, FALSE); + if (then_node != NULL) { + pe_rsc_trace(then->rsc, "Found %s for 'then' %s", + pe__node_name(then_node), then->uuid); + } + } + + // Disable constraint if it only applies when on same node, but isn't + if (pcmk_is_set(other->type, pe_order_same_node) + && (first_node != NULL) && (then_node != NULL) + && (first_node->details != then_node->details)) { + + pe_rsc_trace(then->rsc, + "Disabled ordering %s on %s then %s on %s: not same node", + other->action->uuid, pe__node_name(first_node), + then->uuid, pe__node_name(then_node)); + other->type = pe_order_none; + continue; + } + + pcmk__clear_updated_flags(changed, then, pcmk__updated_first); + + if ((first->rsc != NULL) + && pcmk_is_set(other->type, pe_order_then_cancels_first) + && !pcmk_is_set(then->flags, pe_action_optional)) { + + /* 'then' is required, so we must abandon 'first' + * (e.g. a required stop cancels any agent reload). + */ + pe__set_action_flags(other->action, pe_action_optional); + if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) { + pe__clear_resource_flags(first->rsc, pe_rsc_reload); + } + } + + if ((first->rsc != NULL) && (then->rsc != NULL) + && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) { + first = action_for_ordering(first); + } + if (first != other->action) { + pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s", + then->uuid, first->uuid, other->action->uuid); + } + + pe_rsc_trace(then->rsc, + "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s", + first->uuid, first->flags, then->uuid, then->flags, + other->type, action_node_str(first)); + + if (first == other->action) { + /* 'first' was not remapped (e.g. from 'start' to 'running'), which + * could mean it is a non-resource action, a primitive resource + * action, or already expanded. + */ + enum pe_action_flags first_flags, then_flags; + + first_flags = action_flags_for_ordering(first, then_node); + then_flags = action_flags_for_ordering(then, first_node); + + changed |= update_action_for_ordering_flags(first, then, + first_flags, then_flags, + other, data_set); + + /* 'first' was for a complex resource (clone, group, etc), + * create a new dependency if necessary + */ + } else if (order_actions(first, then, other->type)) { + /* This was the first time 'first' and 'then' were associated, + * start again to get the new actions_before list + */ + pcmk__set_updated_flags(changed, then, pcmk__updated_then); + pe_rsc_trace(then->rsc, + "Disabled ordering %s then %s in favor of %s then %s", + other->action->uuid, then->uuid, first->uuid, + then->uuid); + other->type = pe_order_none; + } + + + if (pcmk_is_set(changed, pcmk__updated_first)) { + crm_trace("Re-processing %s and its 'after' actions " + "because it changed", first->uuid); + for (GList *lpc2 = first->actions_after; lpc2 != NULL; + lpc2 = lpc2->next) { + pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data; + + pcmk__update_action_for_orderings(other->action, data_set); + } + pcmk__update_action_for_orderings(first, data_set); + } + } + + if (pcmk_is_set(then->flags, pe_action_requires_any)) { + if (last_flags == then->flags) { + pcmk__clear_updated_flags(changed, then, pcmk__updated_then); + } else { + pcmk__set_updated_flags(changed, then, pcmk__updated_then); + } + } + + if (pcmk_is_set(changed, pcmk__updated_then)) { + crm_trace("Re-processing %s and its 'after' actions because it changed", + then->uuid); + if (pcmk_is_set(last_flags, pe_action_runnable) + && !pcmk_is_set(then->flags, pe_action_runnable)) { + pcmk__block_colocation_dependents(then, data_set); + } + pcmk__update_action_for_orderings(then, data_set); + for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { + pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data; + + pcmk__update_action_for_orderings(other->action, data_set); + } + } +} + +static inline bool +is_primitive_action(const pe_action_t *action) +{ + return action && action->rsc && (action->rsc->variant == pe_native); +} + +/*! + * \internal + * \brief Clear a single action flag and set reason text + * + * \param[in,out] action Action whose flag should be cleared + * \param[in] flag Action flag that should be cleared + * \param[in] reason Action that is the reason why flag is being cleared + */ +#define clear_action_flag_because(action, flag, reason) do { \ + if (pcmk_is_set((action)->flags, (flag))) { \ + pe__clear_action_flags(action, flag); \ + if ((action)->rsc != (reason)->rsc) { \ + char *reason_text = pe__action2reason((reason), (flag)); \ + pe_action_set_reason((action), reason_text, \ + ((flag) == pe_action_migrate_runnable)); \ + free(reason_text); \ + } \ + } \ + } while (0) + +/*! + * \internal + * \brief Update actions in an asymmetric ordering + * + * If the "first" action in an asymmetric ordering is unrunnable, make the + * "second" action unrunnable as well, if appropriate. + * + * \param[in] first 'First' action in an asymmetric ordering + * \param[in,out] then 'Then' action in an asymmetric ordering + */ +static void +handle_asymmetric_ordering(const pe_action_t *first, pe_action_t *then) +{ + /* Only resource actions after an unrunnable 'first' action need updates for + * asymmetric ordering. + */ + if ((then->rsc == NULL) || pcmk_is_set(first->flags, pe_action_runnable)) { + return; + } + + // Certain optional 'then' actions are unaffected by unrunnable 'first' + if (pcmk_is_set(then->flags, pe_action_optional)) { + enum rsc_role_e then_rsc_role = then->rsc->fns->state(then->rsc, TRUE); + + if ((then_rsc_role == RSC_ROLE_STOPPED) + && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_none)) { + /* If 'then' should stop after 'first' but is already stopped, the + * ordering is irrelevant. + */ + return; + } else if ((then_rsc_role >= RSC_ROLE_STARTED) + && pcmk__str_eq(then->task, RSC_START, pcmk__str_none) + && pe__rsc_running_on_only(then->rsc, then->node)) { + /* Similarly if 'then' should start after 'first' but is already + * started on a single node. + */ + return; + } + } + + // 'First' can't run, so 'then' can't either + clear_action_flag_because(then, pe_action_optional, first); + clear_action_flag_because(then, pe_action_runnable, first); +} + +/*! + * \internal + * \brief Set action bits appropriately when pe_restart_order is used + * + * \param[in,out] first 'First' action in an ordering with pe_restart_order + * \param[in,out] then 'Then' action in an ordering with pe_restart_order + * \param[in] filter What action flags to care about + * + * \note pe_restart_order is set for "stop resource before starting it" and + * "stop later group member before stopping earlier group member" + */ +static void +handle_restart_ordering(pe_action_t *first, pe_action_t *then, uint32_t filter) +{ + const char *reason = NULL; + + CRM_ASSERT(is_primitive_action(first)); + CRM_ASSERT(is_primitive_action(then)); + + // We need to update the action in two cases: + + // ... if 'then' is required + if (pcmk_is_set(filter, pe_action_optional) + && !pcmk_is_set(then->flags, pe_action_optional)) { + reason = "restart"; + } + + /* ... if 'then' is unrunnable action on same resource (if a resource + * should restart but can't start, we still want to stop) + */ + if (pcmk_is_set(filter, pe_action_runnable) + && !pcmk_is_set(then->flags, pe_action_runnable) + && pcmk_is_set(then->rsc->flags, pe_rsc_managed) + && (first->rsc == then->rsc)) { + reason = "stop"; + } + + if (reason == NULL) { + return; + } + + pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", + first->uuid, then->uuid, reason); + + // Make 'first' required if it is runnable + if (pcmk_is_set(first->flags, pe_action_runnable)) { + clear_action_flag_because(first, pe_action_optional, then); + } + + // Make 'first' required if 'then' is required + if (!pcmk_is_set(then->flags, pe_action_optional)) { + clear_action_flag_because(first, pe_action_optional, then); + } + + // Make 'first' unmigratable if 'then' is unmigratable + if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) { + clear_action_flag_because(first, pe_action_migrate_runnable, then); + } + + // Make 'then' unrunnable if 'first' is required but unrunnable + if (!pcmk_is_set(first->flags, pe_action_optional) + && !pcmk_is_set(first->flags, pe_action_runnable)) { + clear_action_flag_because(then, pe_action_runnable, first); + } +} + +/*! + * \internal + * \brief Update two actions according to an ordering between them + * + * Given information about an ordering of two actions, update the actions' flags + * (and runnable_before members if appropriate) as appropriate for the ordering. + * Effects may cascade to other orderings involving the actions as well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this node + * (ignored) + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +uint32_t +pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set) +{ + uint32_t changed = pcmk__updated_none; + uint32_t then_flags = then->flags; + uint32_t first_flags = first->flags; + + if (pcmk_is_set(type, pe_order_asymmetrical)) { + handle_asymmetric_ordering(first, then); + } + + if (pcmk_is_set(type, pe_order_implies_first) + && !pcmk_is_set(then_flags, pe_action_optional)) { + // Then is required, and implies first should be, too + + if (pcmk_is_set(filter, pe_action_optional) + && !pcmk_is_set(flags, pe_action_optional) + && pcmk_is_set(first_flags, pe_action_optional)) { + clear_action_flag_because(first, pe_action_optional, then); + } + + if (pcmk_is_set(flags, pe_action_migrate_runnable) + && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) { + clear_action_flag_because(first, pe_action_migrate_runnable, then); + } + } + + if (pcmk_is_set(type, pe_order_promoted_implies_first) + && (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED) + && pcmk_is_set(filter, pe_action_optional) + && !pcmk_is_set(then->flags, pe_action_optional)) { + + clear_action_flag_because(first, pe_action_optional, then); + + if (pcmk_is_set(first->flags, pe_action_migrate_runnable) + && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) { + clear_action_flag_because(first, pe_action_migrate_runnable, + then); + } + } + + if (pcmk_is_set(type, pe_order_implies_first_migratable) + && pcmk_is_set(filter, pe_action_optional)) { + + if (!pcmk_all_flags_set(then->flags, + pe_action_migrate_runnable|pe_action_runnable)) { + clear_action_flag_because(first, pe_action_runnable, then); + } + + if (!pcmk_is_set(then->flags, pe_action_optional)) { + clear_action_flag_because(first, pe_action_optional, then); + } + } + + if (pcmk_is_set(type, pe_order_pseudo_left) + && pcmk_is_set(filter, pe_action_optional) + && !pcmk_is_set(first->flags, pe_action_runnable)) { + + clear_action_flag_because(then, pe_action_migrate_runnable, first); + pe__clear_action_flags(then, pe_action_pseudo); + } + + if (pcmk_is_set(type, pe_order_runnable_left) + && pcmk_is_set(filter, pe_action_runnable) + && pcmk_is_set(then->flags, pe_action_runnable) + && !pcmk_is_set(flags, pe_action_runnable)) { + + clear_action_flag_because(then, pe_action_runnable, first); + clear_action_flag_because(then, pe_action_migrate_runnable, first); + } + + if (pcmk_is_set(type, pe_order_implies_then) + && pcmk_is_set(filter, pe_action_optional) + && pcmk_is_set(then->flags, pe_action_optional) + && !pcmk_is_set(flags, pe_action_optional) + && !pcmk_is_set(first->flags, pe_action_migrate_runnable)) { + + clear_action_flag_because(then, pe_action_optional, first); + } + + if (pcmk_is_set(type, pe_order_restart)) { + handle_restart_ordering(first, then, filter); + } + + if (then_flags != then->flags) { + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + pe_rsc_trace(then->rsc, + "%s on %s: flags are now %#.6x (was %#.6x) " + "because of 'first' %s (%#.6x)", + then->uuid, pe__node_name(then->node), + then->flags, then_flags, first->uuid, first->flags); + + if ((then->rsc != NULL) && (then->rsc->parent != NULL)) { + // Required to handle "X_stop then X_start" for cloned groups + pcmk__update_action_for_orderings(then, data_set); + } + } + + if (first_flags != first->flags) { + pcmk__set_updated_flags(changed, first, pcmk__updated_first); + pe_rsc_trace(first->rsc, + "%s on %s: flags are now %#.6x (was %#.6x) " + "because of 'then' %s (%#.6x)", + first->uuid, pe__node_name(first->node), + first->flags, first_flags, then->uuid, then->flags); + } + + return changed; +} + +/*! + * \internal + * \brief Trace-log an action (optionally with its dependent actions) + * + * \param[in] pre_text If not NULL, prefix the log with this plus ": " + * \param[in] action Action to log + * \param[in] details If true, recursively log dependent actions + */ +void +pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details) +{ + const char *node_uname = NULL; + const char *node_uuid = NULL; + const char *desc = NULL; + + CRM_CHECK(action != NULL, return); + + if (!pcmk_is_set(action->flags, pe_action_pseudo)) { + if (action->node != NULL) { + node_uname = action->node->details->uname; + node_uuid = action->node->details->id; + } else { + node_uname = "<none>"; + } + } + + switch (text2task(action->task)) { + case stonith_node: + case shutdown_crm: + if (pcmk_is_set(action->flags, pe_action_pseudo)) { + desc = "Pseudo "; + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + desc = "Optional "; + } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { + desc = "!!Non-Startable!! "; + } else if (pcmk_is_set(action->flags, pe_action_processed)) { + desc = ""; + } else { + desc = "(Provisional) "; + } + crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", + ((pre_text == NULL)? "" : pre_text), + ((pre_text == NULL)? "" : ": "), + desc, action->id, action->uuid, + (node_uname? "\ton " : ""), (node_uname? node_uname : ""), + (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), + (node_uuid? ")" : "")); + break; + default: + if (pcmk_is_set(action->flags, pe_action_optional)) { + desc = "Optional "; + } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { + desc = "Pseudo "; + } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { + desc = "!!Non-Startable!! "; + } else if (pcmk_is_set(action->flags, pe_action_processed)) { + desc = ""; + } else { + desc = "(Provisional) "; + } + crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", + ((pre_text == NULL)? "" : pre_text), + ((pre_text == NULL)? "" : ": "), + desc, action->id, action->uuid, + (action->rsc? action->rsc->id : "<none>"), + (node_uname? "\ton " : ""), (node_uname? node_uname : ""), + (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), + (node_uuid? ")" : "")); + break; + } + + if (details) { + const GList *iter = NULL; + const pe_action_wrapper_t *other = NULL; + + crm_trace("\t\t====== Preceding Actions"); + for (iter = action->actions_before; iter != NULL; iter = iter->next) { + other = (const pe_action_wrapper_t *) iter->data; + pcmk__log_action("\t\t", other->action, false); + } + crm_trace("\t\t====== Subsequent Actions"); + for (iter = action->actions_after; iter != NULL; iter = iter->next) { + other = (const pe_action_wrapper_t *) iter->data; + pcmk__log_action("\t\t", other->action, false); + } + crm_trace("\t\t====== End"); + + } else { + crm_trace("\t\t(before=%d, after=%d)", + g_list_length(action->actions_before), + g_list_length(action->actions_after)); + } +} + +/*! + * \internal + * \brief Create a new shutdown action for a node + * + * \param[in,out] node Node being shut down + * + * \return Newly created shutdown action for \p node + */ +pe_action_t * +pcmk__new_shutdown_action(pe_node_t *node) +{ + char *shutdown_id = NULL; + pe_action_t *shutdown_op = NULL; + + CRM_ASSERT(node != NULL); + + shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, + node->details->uname); + + shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE, + TRUE, node->details->data_set); + + pcmk__order_stops_before_shutdown(node, shutdown_op); + add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); + return shutdown_op; +} + +/*! + * \internal + * \brief Calculate and add an operation digest to XML + * + * Calculate an operation digest, which enables us to later determine when a + * restart is needed due to the resource's parameters being changed, and add it + * to given XML. + * + * \param[in] op Operation result from executor + * \param[in,out] update XML to add digest to + */ +static void +add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update) +{ + char *digest = NULL; + xmlNode *args_xml = NULL; + + if (op->params == NULL) { + return; + } + args_xml = create_xml_node(NULL, XML_TAG_PARAMS); + g_hash_table_foreach(op->params, hash2field, args_xml); + pcmk__filter_op_for_digest(args_xml); + digest = calculate_operation_digest(args_xml, NULL); + crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); + free_xml(args_xml); + free(digest); +} + +#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + +/*! + * \internal + * \brief Create XML for resource operation history update + * + * \param[in,out] parent Parent XML node to add to + * \param[in,out] op Operation event data + * \param[in] caller_version DC feature set + * \param[in] target_rc Expected result of operation + * \param[in] node Name of node on which operation was performed + * \param[in] origin Arbitrary description of update source + * + * \return Newly created XML node for history update + */ +xmlNode * +pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, + const char *caller_version, int target_rc, + const char *node, const char *origin) +{ + char *key = NULL; + char *magic = NULL; + char *op_id = NULL; + char *op_id_additional = NULL; + char *local_user_data = NULL; + const char *exit_reason = NULL; + + xmlNode *xml_op = NULL; + const char *task = NULL; + + CRM_CHECK(op != NULL, return NULL); + crm_trace("Creating history XML for %s-interval %s action for %s on %s " + "(DC version: %s, origin: %s)", + pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, + ((node == NULL)? "no node" : node), caller_version, origin); + + task = op->op_type; + + /* Record a successful agent reload as a start, and a failed one as a + * monitor, to make life easier for the scheduler when determining the + * current state. + * + * @COMPAT We should check "reload" here only if the operation was for a + * pre-OCF-1.1 resource agent, but we don't know that here, and we should + * only ever get results for actions scheduled by us, so we can reasonably + * assume any "reload" is actually a pre-1.1 agent reload. + */ + if (pcmk__str_any_of(task, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, + NULL)) { + if (op->op_status == PCMK_EXEC_DONE) { + task = CRMD_ACTION_START; + } else { + task = CRMD_ACTION_STATUS; + } + } + + key = pcmk__op_key(op->rsc_id, task, op->interval_ms); + if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) { + const char *n_type = crm_meta_value(op->params, "notify_type"); + const char *n_task = crm_meta_value(op->params, "notify_operation"); + + CRM_LOG_ASSERT(n_type != NULL); + CRM_LOG_ASSERT(n_task != NULL); + op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); + + if (op->op_status != PCMK_EXEC_PENDING) { + /* Ignore notify errors. + * + * @TODO It might be better to keep the correct result here, and + * ignore it in process_graph_event(). + */ + lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + } + + /* Migration history is preserved separately, which usually matters for + * multiple nodes and is important for future cluster transitions. + */ + } else if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + op_id = strdup(key); + + } else if (did_rsc_op_fail(op, target_rc)) { + op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); + if (op->interval_ms == 0) { + // Ensure 'last' gets updated, in case record-pending is true + op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); + } + exit_reason = op->exit_reason; + + } else if (op->interval_ms > 0) { + op_id = strdup(key); + + } else { + op_id = pcmk__op_key(op->rsc_id, "last", 0); + } + + again: + xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id); + if (xml_op == NULL) { + xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); + } + + if (op->user_data == NULL) { + crm_debug("Generating fake transition key for: " PCMK__OP_FMT + " %d from %s", op->rsc_id, op->op_type, op->interval_ms, + op->call_id, origin); + local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, + FAKE_TE_ID); + op->user_data = local_user_data; + } + + if (magic == NULL) { + magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc, + (const char *) op->user_data); + } + + crm_xml_add(xml_op, XML_ATTR_ID, op_id); + crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); + crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); + crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); + crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); + crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); + crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); + crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason); + crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ + + crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); + crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); + crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); + crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms); + + if (compare_version("2.1", caller_version) <= 0) { + if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { + crm_trace("Timing data (" PCMK__OP_FMT + "): last=%u change=%u exec=%u queue=%u", + op->rsc_id, op->op_type, op->interval_ms, + op->t_run, op->t_rcchange, op->exec_time, op->queue_time); + + if ((op->interval_ms != 0) && (op->t_rcchange != 0)) { + // Recurring ops may have changed rc after initial run + crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, + (long long) op->t_rcchange); + } else { + crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, + (long long) op->t_run); + } + + crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); + crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); + } + } + + if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { + /* + * Record migrate_source and migrate_target always for migrate ops. + */ + const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; + + crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); + + name = XML_LRM_ATTR_MIGRATE_TARGET; + crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); + } + + add_op_digest_to_xml(op, xml_op); + + if (op_id_additional) { + free(op_id); + op_id = op_id_additional; + op_id_additional = NULL; + goto again; + } + + if (local_user_data) { + free(local_user_data); + op->user_data = NULL; + } + free(magic); + free(op_id); + free(key); + return xml_op; +} + +/*! + * \internal + * \brief Check whether an action shutdown-locks a resource to a node + * + * If the shutdown-lock cluster property is set, resources will not be recovered + * on a different node if cleanly stopped, and may start only on that same node. + * This function checks whether that applies to a given action, so that the + * transition graph can be marked appropriately. + * + * \param[in] action Action to check + * + * \return true if \p action locks its resource to the action's node, + * otherwise false + */ +bool +pcmk__action_locks_rsc_to_node(const pe_action_t *action) +{ + // Only resource actions taking place on resource's lock node are locked + if ((action == NULL) || (action->rsc == NULL) + || (action->rsc->lock_node == NULL) || (action->node == NULL) + || (action->node->details != action->rsc->lock_node->details)) { + return false; + } + + /* During shutdown, only stops are locked (otherwise, another action such as + * a demote would cause the controller to clear the lock) + */ + if (action->node->details->shutdown && (action->task != NULL) + && (strcmp(action->task, RSC_STOP) != 0)) { + return false; + } + + return true; +} + +/* lowest to highest */ +static gint +sort_action_id(gconstpointer a, gconstpointer b) +{ + const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a; + const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b; + + if (a == NULL) { + return 1; + } + if (b == NULL) { + return -1; + } + if (action_wrapper1->action->id < action_wrapper2->action->id) { + return 1; + } + if (action_wrapper1->action->id > action_wrapper2->action->id) { + return -1; + } + return 0; +} + +/*! + * \internal + * \brief Remove any duplicate action inputs, merging action flags + * + * \param[in,out] action Action whose inputs should be checked + */ +void +pcmk__deduplicate_action_inputs(pe_action_t *action) +{ + GList *item = NULL; + GList *next = NULL; + pe_action_wrapper_t *last_input = NULL; + + action->actions_before = g_list_sort(action->actions_before, + sort_action_id); + for (item = action->actions_before; item != NULL; item = next) { + pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data; + + next = item->next; + if ((last_input != NULL) + && (input->action->id == last_input->action->id)) { + crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", + input->action->uuid, input->action->id, + action->uuid, action->id); + + /* For the purposes of scheduling, the ordering flags no longer + * matter, but crm_simulate looks at certain ones when creating a + * dot graph. Combining the flags is sufficient for that purpose. + */ + last_input->type |= input->type; + if (input->state == pe_link_dumped) { + last_input->state = pe_link_dumped; + } + + free(item->data); + action->actions_before = g_list_delete_link(action->actions_before, + item); + } else { + last_input = input; + input->state = pe_link_not_dumped; + } + } +} + +/*! + * \internal + * \brief Output all scheduled actions + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__output_actions(pe_working_set_t *data_set) +{ + pcmk__output_t *out = data_set->priv; + + // Output node (non-resource) actions + for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) { + char *node_name = NULL; + char *task = NULL; + pe_action_t *action = (pe_action_t *) iter->data; + + if (action->rsc != NULL) { + continue; // Resource actions will be output later + + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + continue; // This action was not scheduled + } + + if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { + task = strdup("Shutdown"); + + } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { + const char *op = g_hash_table_lookup(action->meta, "stonith_action"); + + task = crm_strdup_printf("Fence (%s)", op); + + } else { + continue; // Don't display other node action types + } + + if (pe__is_guest_node(action->node)) { + node_name = crm_strdup_printf("%s (resource: %s)", + pe__node_name(action->node), + action->node->details->remote_rsc->container->id); + } else if (action->node != NULL) { + node_name = crm_strdup_printf("%s", pe__node_name(action->node)); + } + + out->message(out, "node-action", task, node_name, action->reason); + + free(node_name); + free(task); + } + + // Output resource actions + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + rsc->cmds->output_actions(rsc); + } +} + +/*! + * \internal + * \brief Check whether action from resource history is still in configuration + * + * \param[in] rsc Resource that action is for + * \param[in] task Action's name + * \param[in] interval_ms Action's interval (in milliseconds) + * + * \return true if action is still in resource configuration, otherwise false + */ +static bool +action_in_config(const pe_resource_t *rsc, const char *task, guint interval_ms) +{ + char *key = pcmk__op_key(rsc->id, task, interval_ms); + bool config = (find_rsc_op_entry(rsc, key) != NULL); + + free(key); + return config; +} + +/*! + * \internal + * \brief Get action name needed to compare digest for configuration changes + * + * \param[in] task Action name from history + * \param[in] interval_ms Action interval (in milliseconds) + * + * \return Action name whose digest should be compared + */ +static const char * +task_for_digest(const char *task, guint interval_ms) +{ + /* Certain actions need to be compared against the parameters used to start + * the resource. + */ + if ((interval_ms == 0) + && pcmk__str_any_of(task, RSC_STATUS, RSC_MIGRATED, RSC_PROMOTE, NULL)) { + task = RSC_START; + } + return task; +} + +/*! + * \internal + * \brief Check whether only sanitized parameters to an action changed + * + * When collecting CIB files for troubleshooting, crm_report will mask + * sensitive resource parameters. If simulations were run using that, affected + * resources would appear to need a restart, which would complicate + * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive + * parameters. This function used that digest to check whether only masked + * parameters are different. + * + * \param[in] xml_op Resource history entry with secure digest + * \param[in] digest_data Operation digest information being compared + * \param[in] data_set Cluster working set + * + * \return true if only sanitized parameters changed, otherwise false + */ +static bool +only_sanitized_changed(const xmlNode *xml_op, + const op_digest_cache_t *digest_data, + const pe_working_set_t *data_set) +{ + const char *digest_secure = NULL; + + if (!pcmk_is_set(data_set->flags, pe_flag_sanitized)) { + // The scheduler is not being run as a simulation + return false; + } + + digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); + + return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL) + && (digest_data->digest_secure_calc != NULL) + && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0); +} + +/*! + * \internal + * \brief Force a restart due to a configuration change + * + * \param[in,out] rsc Resource that action is for + * \param[in] task Name of action whose configuration changed + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in,out] node Node where resource should be restarted + */ +static void +force_restart(pe_resource_t *rsc, const char *task, guint interval_ms, + pe_node_t *node) +{ + char *key = pcmk__op_key(rsc->id, task, interval_ms); + pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE, + rsc->cluster); + + pe_action_set_reason(required, "resource definition change", true); + trigger_unfencing(rsc, node, "Device parameters changed", NULL, + rsc->cluster); +} + +/*! + * \internal + * \brief Schedule a reload of a resource on a node + * + * \param[in,out] rsc Resource to reload + * \param[in] node Where resource should be reloaded + */ +static void +schedule_reload(pe_resource_t *rsc, const pe_node_t *node) +{ + pe_action_t *reload = NULL; + + // For collective resources, just call recursively for children + if (rsc->variant > pe_native) { + g_list_foreach(rsc->children, (GFunc) schedule_reload, (gpointer) node); + return; + } + + // Skip the reload in certain situations + if ((node == NULL) + || !pcmk_is_set(rsc->flags, pe_rsc_managed) + || pcmk_is_set(rsc->flags, pe_rsc_failed)) { + pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s", + rsc->id, + pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " unmanaged", + pcmk_is_set(rsc->flags, pe_rsc_failed)? " failed" : "", + (node == NULL)? "inactive" : node->details->uname); + return; + } + + /* If a resource's configuration changed while a start was pending, + * force a full restart instead of a reload. + */ + if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { + pe_rsc_trace(rsc, "%s: preventing agent reload because start pending", + rsc->id); + custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE, + rsc->cluster); + return; + } + + // Schedule the reload + pe__set_resource_flags(rsc, pe_rsc_reload); + reload = custom_action(rsc, reload_key(rsc), CRMD_ACTION_RELOAD_AGENT, node, + FALSE, TRUE, rsc->cluster); + pe_action_set_reason(reload, "resource definition change", FALSE); + + // Set orderings so that a required stop or demote cancels the reload + pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, + pe_order_optional|pe_order_then_cancels_first, + rsc->cluster); + pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, + pe_order_optional|pe_order_then_cancels_first, + rsc->cluster); +} + +/*! + * \internal + * \brief Handle any configuration change for an action + * + * Given an action from resource history, if the resource's configuration + * changed since the action was done, schedule any actions needed (restart, + * reload, unfencing, rescheduling recurring actions, etc.). + * + * \param[in,out] rsc Resource that action is for + * \param[in,out] node Node that action was on + * \param[in] xml_op Action XML from resource history + * + * \return true if action configuration changed, otherwise false + */ +bool +pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, + const xmlNode *xml_op) +{ + guint interval_ms = 0; + const char *task = NULL; + const op_digest_cache_t *digest_data = NULL; + + CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL), + return false); + + task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + CRM_CHECK(task != NULL, return false); + + crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); + + // If this is a recurring action, check whether it has been orphaned + if (interval_ms > 0) { + if (action_in_config(rsc, task, interval_ms)) { + pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration", + pcmk__readable_interval(interval_ms), task, rsc->id, + pe__node_name(node)); + } else if (pcmk_is_set(rsc->cluster->flags, + pe_flag_stop_action_orphans)) { + pcmk__schedule_cancel(rsc, + crm_element_value(xml_op, XML_LRM_ATTR_CALLID), + task, interval_ms, node, "orphan"); + return true; + } else { + pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned", + pcmk__readable_interval(interval_ms), task, rsc->id, + pe__node_name(node)); + return true; + } + } + + crm_trace("Checking %s-interval %s for %s on %s for configuration changes", + pcmk__readable_interval(interval_ms), task, rsc->id, + pe__node_name(node)); + task = task_for_digest(task, interval_ms); + digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); + + if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) { + if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) { + pcmk__output_t *out = rsc->cluster->priv; + + out->info(out, + "Only 'private' parameters to %s-interval %s for %s " + "on %s changed: %s", + pcmk__readable_interval(interval_ms), task, rsc->id, + pe__node_name(node), + crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); + } + return false; + } + + switch (digest_data->rc) { + case RSC_DIGEST_RESTART: + crm_log_xml_debug(digest_data->params_restart, "params:restart"); + force_restart(rsc, task, interval_ms, node); + return true; + + case RSC_DIGEST_ALL: + case RSC_DIGEST_UNKNOWN: + // Changes that can potentially be handled by an agent reload + + if (interval_ms > 0) { + /* Recurring actions aren't reloaded per se, they are just + * re-scheduled so the next run uses the new parameters. + * The old instance will be cancelled automatically. + */ + crm_log_xml_debug(digest_data->params_all, "params:reschedule"); + pcmk__reschedule_recurring(rsc, task, interval_ms, node); + + } else if (crm_element_value(xml_op, + XML_LRM_ATTR_RESTART_DIGEST) != NULL) { + // Agent supports reload, so use it + trigger_unfencing(rsc, node, + "Device parameters changed (reload)", NULL, + rsc->cluster); + crm_log_xml_debug(digest_data->params_all, "params:reload"); + schedule_reload(rsc, node); + + } else { + pe_rsc_trace(rsc, + "Restarting %s because agent doesn't support reload", + rsc->id); + crm_log_xml_debug(digest_data->params_restart, + "params:restart"); + force_restart(rsc, task, interval_ms, node); + } + return true; + + default: + break; + } + return false; +} + +/*! + * \internal + * \brief Create a list of resource's action history entries, sorted by call ID + * + * \param[in] rsc_entry Resource's <lrm_rsc_op> status XML + * \param[out] start_index Where to store index of start-like action, if any + * \param[out] stop_index Where to store index of stop action, if any + */ +static GList * +rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index) +{ + GList *ops = NULL; + + for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP); + rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) { + ops = g_list_prepend(ops, rsc_op); + } + ops = g_list_sort(ops, sort_op_by_callid); + calculate_active_ops(ops, start_index, stop_index); + return ops; +} + +/*! + * \internal + * \brief Process a resource's action history from the CIB status + * + * Given a resource's action history, if the resource's configuration + * changed since the actions were done, schedule any actions needed (restart, + * reload, unfencing, rescheduling recurring actions, clean-up, etc.). + * (This also cancels recurring actions for maintenance mode, which is not + * entirely related but convenient to do here.) + * + * \param[in] rsc_entry Resource's <lrm_rsc_op> status XML + * \param[in,out] rsc Resource whose history is being processed + * \param[in,out] node Node whose history is being processed + */ +static void +process_rsc_history(const xmlNode *rsc_entry, pe_resource_t *rsc, + pe_node_t *node) +{ + int offset = -1; + int stop_index = 0; + int start_index = 0; + GList *sorted_op_list = NULL; + + if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + if (pe_rsc_is_anon_clone(pe__const_top_resource(rsc, false))) { + pe_rsc_trace(rsc, + "Skipping configuration check " + "for orphaned clone instance %s", + rsc->id); + } else { + pe_rsc_trace(rsc, + "Skipping configuration check and scheduling clean-up " + "for orphaned resource %s", rsc->id); + pcmk__schedule_cleanup(rsc, node, false); + } + return; + } + + if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { + if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) { + pcmk__schedule_cleanup(rsc, node, false); + } + pe_rsc_trace(rsc, + "Skipping configuration check for %s " + "because no longer active on %s", + rsc->id, pe__node_name(node)); + return; + } + + pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s", + rsc->id, pe__node_name(node)); + + if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) { + pcmk__schedule_cleanup(rsc, node, false); + } + + sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index); + if (start_index < stop_index) { + return; // Resource is stopped + } + + for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { + xmlNode *rsc_op = (xmlNode *) iter->data; + const char *task = NULL; + guint interval_ms = 0; + + if (++offset < start_index) { + // Skip actions that happened before a start + continue; + } + + task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); + crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); + + if ((interval_ms > 0) + && (pcmk_is_set(rsc->flags, pe_rsc_maintenance) + || node->details->maintenance)) { + // Maintenance mode cancels recurring operations + pcmk__schedule_cancel(rsc, + crm_element_value(rsc_op, XML_LRM_ATTR_CALLID), + task, interval_ms, node, "maintenance mode"); + + } else if ((interval_ms > 0) + || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START, + RSC_PROMOTE, RSC_MIGRATED, NULL)) { + /* If a resource operation failed, and the operation's definition + * has changed, clear any fail count so they can be retried fresh. + */ + + if (pe__bundle_needs_remote_name(rsc)) { + /* We haven't allocated resources to nodes yet, so if the + * REMOTE_CONTAINER_HACK is used, we may calculate the digest + * based on the literal "#uname" value rather than the properly + * substituted value. That would mistakenly make the action + * definition appear to have been changed. Defer the check until + * later in this case. + */ + pe__add_param_check(rsc_op, rsc, node, pe_check_active, + rsc->cluster); + + } else if (pcmk__check_action_config(rsc, node, rsc_op) + && (pe_get_failcount(node, rsc, NULL, pe_fc_effective, + NULL) != 0)) { + pe__clear_failcount(rsc, node, "action definition changed", + rsc->cluster); + } + } + } + g_list_free(sorted_op_list); +} + +/*! + * \internal + * \brief Process a node's action history from the CIB status + * + * Given a node's resource history, if the resource's configuration changed + * since the actions were done, schedule any actions needed (restart, + * reload, unfencing, rescheduling recurring actions, clean-up, etc.). + * (This also cancels recurring actions for maintenance mode, which is not + * entirely related but convenient to do here.) + * + * \param[in,out] node Node whose history is being processed + * \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML + */ +static void +process_node_history(pe_node_t *node, const xmlNode *lrm_rscs) +{ + crm_trace("Processing node history for %s", pe__node_name(node)); + for (const xmlNode *rsc_entry = first_named_child(lrm_rscs, + XML_LRM_TAG_RESOURCE); + rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { + + if (xml_has_children(rsc_entry)) { + GList *result = pcmk__rscs_matching_id(ID(rsc_entry), + node->details->data_set); + + for (GList *iter = result; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + if (rsc->variant == pe_native) { + process_rsc_history(rsc_entry, rsc, node); + } + } + g_list_free(result); + } + } +} + +// XPath to find a node's resource history +#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ + "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES + +/*! + * \internal + * \brief Process any resource configuration changes in the CIB status + * + * Go through all nodes' resource history, and if a resource's configuration + * changed since its actions were done, schedule any actions needed (restart, + * reload, unfencing, rescheduling recurring actions, clean-up, etc.). + * (This also cancels recurring actions for maintenance mode, which is not + * entirely related but convenient to do here.) + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__handle_rsc_config_changes(pe_working_set_t *data_set) +{ + crm_trace("Check resource and action configuration for changes"); + + /* Rather than iterate through the status section, iterate through the nodes + * and search for the appropriate status subsection for each. This skips + * orphaned nodes and lets us eliminate some cases before searching the XML. + */ + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + + /* Don't bother checking actions for a node that can't run actions ... + * unless it's in maintenance mode, in which case we still need to + * cancel any existing recurring monitors. + */ + if (node->details->maintenance + || pcmk__node_available(node, false, false)) { + + char *xpath = NULL; + xmlNode *history = NULL; + + xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname); + history = get_xpath_object(xpath, data_set->input, LOG_NEVER); + free(xpath); + + process_node_history(node, history); + } + } +} diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c new file mode 100644 index 0000000..5682744 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -0,0 +1,876 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +#define PE__VARIANT_BUNDLE 1 +#include <lib/pengine/variant.h> + +static bool +is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) +{ + for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + if (node->details == replica->node->details) { + return TRUE; + } + } + return FALSE; +} + +/*! + * \internal + * \brief Assign a bundle resource to a node + * + * \param[in,out] rsc Resource to assign to a node + * \param[in] prefer Node to prefer, if all else is equal + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node + */ +pe_node_t * +pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) +{ + GList *containers = NULL; + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return NULL); + + get_bundle_variant_data(bundle_data, rsc); + + pe__set_resource_flags(rsc, pe_rsc_allocating); + containers = pe__bundle_containers(rsc); + + pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), + rsc, __func__, rsc->allowed_nodes, rsc->cluster); + + containers = g_list_sort(containers, pcmk__cmp_instance); + pcmk__assign_instances(rsc, containers, bundle_data->nreplicas, + bundle_data->nreplicas_per_host); + g_list_free(containers); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + pe_node_t *container_host = NULL; + + CRM_ASSERT(replica); + if (replica->ip) { + pe_rsc_trace(rsc, "Allocating bundle %s IP %s", + rsc->id, replica->ip->id); + replica->ip->cmds->assign(replica->ip, prefer); + } + + container_host = replica->container->allocated_to; + if (replica->remote && pe__is_guest_or_remote_node(container_host)) { + /* We need 'nested' connection resources to be on the same + * host because pacemaker-remoted only supports a single + * active connection + */ + pcmk__new_colocation("child-remote-with-docker-remote", NULL, + INFINITY, replica->remote, + container_host->details->remote_rsc, NULL, + NULL, true, rsc->cluster); + } + + if (replica->remote) { + pe_rsc_trace(rsc, "Allocating bundle %s connection %s", + rsc->id, replica->remote->id); + replica->remote->cmds->assign(replica->remote, prefer); + } + + // Explicitly allocate replicas' children before bundle child + if (replica->child) { + pe_node_t *node = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, replica->child->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { + if (node->details != replica->node->details) { + node->weight = -INFINITY; + } else if (!pcmk__threshold_reached(replica->child, node, + NULL)) { + node->weight = INFINITY; + } + } + + pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); + pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", + rsc->id, replica->child->id); + replica->child->cmds->assign(replica->child, replica->node); + pe__clear_resource_flags(replica->child->parent, + pe_rsc_allocating); + } + } + + if (bundle_data->child) { + pe_node_t *node = NULL; + GHashTableIter iter; + g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { + if (is_bundle_node(bundle_data, node)) { + node->weight = 0; + } else { + node->weight = -INFINITY; + } + } + pe_rsc_trace(rsc, "Allocating bundle %s child %s", + rsc->id, bundle_data->child->id); + bundle_data->child->cmds->assign(bundle_data->child, prefer); + } + + pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); + return NULL; +} + + +void +pcmk__bundle_create_actions(pe_resource_t *rsc) +{ + pe_action_t *action = NULL; + GList *containers = NULL; + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return); + + containers = pe__bundle_containers(rsc); + get_bundle_variant_data(bundle_data, rsc); + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + CRM_ASSERT(replica); + if (replica->ip) { + replica->ip->cmds->create_actions(replica->ip); + } + if (replica->container) { + replica->container->cmds->create_actions(replica->container); + } + if (replica->remote) { + replica->remote->cmds->create_actions(replica->remote); + } + } + + pcmk__create_instance_actions(rsc, containers); + + if (bundle_data->child) { + bundle_data->child->cmds->create_actions(bundle_data->child); + + if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { + /* promote */ + pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true); + action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true); + action->priority = INFINITY; + + /* demote */ + pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true); + action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true); + action->priority = INFINITY; + } + } + + g_list_free(containers); +} + +void +pcmk__bundle_internal_constraints(pe_resource_t *rsc) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return); + + get_bundle_variant_data(bundle_data, rsc); + + if (bundle_data->child) { + pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child, + RSC_START, pe_order_implies_first_printed); + pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child, + RSC_STOP, pe_order_implies_first_printed); + + if (bundle_data->child->children) { + pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc, + RSC_STARTED, + pe_order_implies_then_printed); + pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc, + RSC_STOPPED, + pe_order_implies_then_printed); + } else { + pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc, + RSC_STARTED, + pe_order_implies_then_printed); + pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc, + RSC_STOPPED, + pe_order_implies_then_printed); + } + } + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + CRM_ASSERT(replica); + CRM_ASSERT(replica->container); + + replica->container->cmds->internal_constraints(replica->container); + + pcmk__order_starts(rsc, replica->container, + pe_order_runnable_left|pe_order_implies_first_printed); + + if (replica->child) { + pcmk__order_stops(rsc, replica->child, + pe_order_implies_first_printed); + } + pcmk__order_stops(rsc, replica->container, + pe_order_implies_first_printed); + pcmk__order_resource_actions(replica->container, RSC_START, rsc, + RSC_STARTED, + pe_order_implies_then_printed); + pcmk__order_resource_actions(replica->container, RSC_STOP, rsc, + RSC_STOPPED, + pe_order_implies_then_printed); + + if (replica->ip) { + replica->ip->cmds->internal_constraints(replica->ip); + + // Start IP then container + pcmk__order_starts(replica->ip, replica->container, + pe_order_runnable_left|pe_order_preserve); + pcmk__order_stops(replica->container, replica->ip, + pe_order_implies_first|pe_order_preserve); + + pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, + replica->container, NULL, NULL, true, + rsc->cluster); + } + + if (replica->remote) { + /* This handles ordering and colocating remote relative to container + * (via "resource-with-container"). Since IP is also ordered and + * colocated relative to the container, we don't need to do anything + * explicit here with IP. + */ + replica->remote->cmds->internal_constraints(replica->remote); + } + + if (replica->child) { + CRM_ASSERT(replica->remote); + + // "Start remote then child" is implicit in scheduler's remote logic + } + + } + + if (bundle_data->child) { + bundle_data->child->cmds->internal_constraints(bundle_data->child); + if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { + pcmk__promotable_restart_ordering(rsc); + + /* child demoted before global demoted */ + pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc, + RSC_DEMOTED, + pe_order_implies_then_printed); + + /* global demote before child demote */ + pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child, + RSC_DEMOTE, + pe_order_implies_first_printed); + + /* child promoted before global promoted */ + pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc, + RSC_PROMOTED, + pe_order_implies_then_printed); + + /* global promote before child promote */ + pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child, + RSC_PROMOTE, + pe_order_implies_first_printed); + } + } +} + +static pe_resource_t * +compatible_replica_for_node(const pe_resource_t *rsc_lh, + const pe_node_t *candidate, + const pe_resource_t *rsc, enum rsc_role_e filter, + gboolean current) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(candidate != NULL, return NULL); + get_bundle_variant_data(bundle_data, rsc); + + crm_trace("Looking for compatible child from %s for %s on %s", + rsc_lh->id, rsc->id, pe__node_name(candidate)); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + if (pcmk__instance_matches(replica->container, candidate, filter, + current)) { + crm_trace("Pairing %s with %s on %s", + rsc_lh->id, replica->container->id, + pe__node_name(candidate)); + return replica->container; + } + } + + crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); + return NULL; +} + +static pe_resource_t * +compatible_replica(const pe_resource_t *rsc_lh, const pe_resource_t *rsc, + enum rsc_role_e filter, gboolean current, + pe_working_set_t *data_set) +{ + GList *scratch = NULL; + pe_resource_t *pair = NULL; + pe_node_t *active_node_lh = NULL; + + active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); + if (active_node_lh) { + return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, + current); + } + + scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); + scratch = pcmk__sort_nodes(scratch, NULL); + + for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + + pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); + if (pair) { + goto done; + } + } + + pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); + done: + g_list_free(scratch); + return pair; +} + +int copies_per_node(pe_resource_t * rsc) +{ + /* Strictly speaking, there should be a 'copies_per_node' addition + * to the resource function table and each case would be a + * function. However that would be serious overkill to return an + * int. In fact, it seems to me that both function tables + * could/should be replaced by resources.{c,h} full of + * rsc_{some_operation} functions containing a switch as below + * which calls out to functions named {variant}_{some_operation} + * as needed. + */ + switch(rsc->variant) { + case pe_unknown: + return 0; + case pe_native: + case pe_group: + return 1; + case pe_clone: + { + const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); + + if (max_clones_node == NULL) { + return 1; + + } else { + int max_i; + + pcmk__scan_min_int(max_clones_node, &max_i, 0); + return max_i; + } + } + case pe_container: + { + pe__bundle_variant_data_t *data = NULL; + get_bundle_variant_data(data, rsc); + return data->nreplicas_per_host; + } + } + return 0; +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint, apply its score to the dependent's + * allowed node weights (if we are still placing resources) or priority (if + * we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + * \param[in] for_dependent true if called on behalf of dependent + */ +void +pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent) +{ + GList *allocated_primaries = NULL; + pe__bundle_variant_data_t *bundle_data = NULL; + + /* This should never be called for the bundle itself as a dependent. + * Instead, we add its colocation constraints to its replicas and call the + * apply_coloc_score() for the replicas as dependents. + */ + CRM_ASSERT(!for_dependent); + + CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), + return); + CRM_ASSERT(dependent->variant == pe_native); + + if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { + pe_rsc_trace(primary, "%s is still provisional", primary->id); + return; + + } else if (colocation->dependent->variant > pe_group) { + pe_resource_t *primary_replica = compatible_replica(dependent, primary, + RSC_ROLE_UNKNOWN, + FALSE, + dependent->cluster); + + if (primary_replica) { + pe_rsc_debug(primary, "Pairing %s with %s", + dependent->id, primary_replica->id); + dependent->cmds->apply_coloc_score(dependent, primary_replica, + colocation, true); + + } else if (colocation->score >= INFINITY) { + crm_notice("Cannot pair %s with instance of %s", + dependent->id, primary->id); + pcmk__assign_resource(dependent, NULL, true); + + } else { + pe_rsc_debug(primary, "Cannot pair %s with instance of %s", + dependent->id, primary->id); + } + + return; + } + + get_bundle_variant_data(bundle_data, primary); + pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", + colocation->id, dependent->id, primary->id, colocation->score); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + if (colocation->score < INFINITY) { + replica->container->cmds->apply_coloc_score(dependent, + replica->container, + colocation, false); + + } else { + pe_node_t *chosen = replica->container->fns->location(replica->container, + NULL, FALSE); + + if ((chosen == NULL) + || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { + continue; + } + if ((colocation->primary_role >= RSC_ROLE_PROMOTED) + && (replica->child == NULL)) { + continue; + } + if ((colocation->primary_role >= RSC_ROLE_PROMOTED) + && (replica->child->next_role < RSC_ROLE_PROMOTED)) { + continue; + } + + pe_rsc_trace(primary, "Allowing %s: %s %d", + colocation->id, pe__node_name(chosen), chosen->weight); + allocated_primaries = g_list_prepend(allocated_primaries, chosen); + } + } + + if (colocation->score >= INFINITY) { + node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE); + } + g_list_free(allocated_primaries); +} + +// Bundle implementation of resource_alloc_functions_t:with_this_colocations() +void +pcmk__with_bundle_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_container) + && (orig_rsc != NULL) && (list != NULL), + return); + + if (rsc == orig_rsc) { // Colocations are wanted for bundle itself + pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); + + // Only the bundle replicas' containers get the bundle's constraints + } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) { + pcmk__add_collective_constraints(list, orig_rsc, rsc, true); + } +} + +// Bundle implementation of resource_alloc_functions_t:this_with_colocations() +void +pcmk__bundle_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_container) + && (orig_rsc != NULL) && (list != NULL), + return); + + if (rsc == orig_rsc) { // Colocations are wanted for bundle itself + pcmk__add_this_with_list(list, rsc->rsc_cons); + + // Only the bundle replicas' containers get the bundle's constraints + } else if (pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) { + pcmk__add_collective_constraints(list, orig_rsc, rsc, false); + } +} + +enum pe_action_flags +pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node) +{ + GList *containers = NULL; + enum pe_action_flags flags = 0; + pe__bundle_variant_data_t *data = NULL; + + get_bundle_variant_data(data, action->rsc); + if(data->child) { + enum action_tasks task = get_complex_task(data->child, action->task); + switch(task) { + case no_action: + case action_notify: + case action_notified: + case action_promote: + case action_promoted: + case action_demote: + case action_demoted: + return pcmk__collective_action_flags(action, + data->child->children, + node); + default: + break; + } + } + + containers = pe__bundle_containers(action->rsc); + flags = pcmk__collective_action_flags(action, containers, node); + g_list_free(containers); + return flags; +} + +/*! + * \internal + * \brief Get containerized resource corresponding to a given bundle container + * + * \param[in] instance Collective instance that might be a bundle container + * + * \return Bundled resource instance inside \p instance if it is a bundle + * container instance, otherwise NULL + */ +const pe_resource_t * +pcmk__get_rsc_in_container(const pe_resource_t *instance) +{ + const pe__bundle_variant_data_t *data = NULL; + const pe_resource_t *top = pe__const_top_resource(instance, true); + + if ((top == NULL) || (top->variant != pe_container)) { + return NULL; + } + get_bundle_variant_data(data, top); + + for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) { + const pe__bundle_replica_t *replica = iter->data; + + if (instance == replica->container) { + return replica->child; + } + } + return NULL; +} + +void +pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + get_bundle_variant_data(bundle_data, rsc); + + pcmk__apply_location(rsc, constraint); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + if (replica->container) { + replica->container->cmds->apply_location(replica->container, + constraint); + } + if (replica->ip) { + replica->ip->cmds->apply_location(replica->ip, constraint); + } + } + + if (bundle_data->child + && ((constraint->role_filter == RSC_ROLE_UNPROMOTED) + || (constraint->role_filter == RSC_ROLE_PROMOTED))) { + bundle_data->child->cmds->apply_location(bundle_data->child, + constraint); + bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, + constraint); + } +} + +/*! + * \internal + * \brief Add a resource's actions to the transition graph + * + * \param[in,out] rsc Resource whose actions should be added + */ +void +pcmk__bundle_expand(pe_resource_t *rsc) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return); + + get_bundle_variant_data(bundle_data, rsc); + + if (bundle_data->child) { + bundle_data->child->cmds->add_actions_to_graph(bundle_data->child); + } + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + CRM_ASSERT(replica); + if (replica->remote && replica->container + && pe__bundle_needs_remote_name(replica->remote)) { + + /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that + * run pacemaker-remoted inside, without needing a separate IP for + * the container. This is done by configuring the inner remote's + * connection host as the magic string "#uname", then + * replacing it with the underlying host when needed. + */ + xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", + replica->remote->xml, LOG_ERR); + const char *calculated_addr = NULL; + + // Replace the value in replica->remote->xml (if appropriate) + calculated_addr = pe__add_bundle_remote_name(replica->remote, + rsc->cluster, + nvpair, "value"); + if (calculated_addr) { + /* Since this is for the bundle as a resource, and not any + * particular action, replace the value in the default + * parameters (not evaluated for node). create_graph_action() + * will grab it from there to replace it in node-evaluated + * parameters. + */ + GHashTable *params = pe_rsc_params(replica->remote, + NULL, rsc->cluster); + + g_hash_table_replace(params, + strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), + strdup(calculated_addr)); + } else { + /* The only way to get here is if the remote connection is + * neither currently running nor scheduled to run. That means we + * won't be doing any operations that require addr (only start + * requires it; we additionally use it to compare digests when + * unpacking status, promote, and migrate_from history, but + * that's already happened by this point). + */ + crm_info("Unable to determine address for bundle %s remote connection", + rsc->id); + } + } + if (replica->ip) { + replica->ip->cmds->add_actions_to_graph(replica->ip); + } + if (replica->container) { + replica->container->cmds->add_actions_to_graph(replica->container); + } + if (replica->remote) { + replica->remote->cmds->add_actions_to_graph(replica->remote); + } + } +} + +/*! + * \internal + * + * \brief Schedule any probes needed for a resource on a node + * + * \param[in,out] rsc Resource to create probe for + * \param[in,out] node Node to create probe on + * + * \return true if any probe was created, otherwise false + */ +bool +pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node) +{ + bool any_created = false; + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return false); + + get_bundle_variant_data(bundle_data, rsc); + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + CRM_ASSERT(replica); + if ((replica->ip != NULL) + && replica->ip->cmds->create_probe(replica->ip, node)) { + any_created = true; + } + if ((replica->child != NULL) && (node->details == replica->node->details) + && replica->child->cmds->create_probe(replica->child, node)) { + any_created = true; + } + if ((replica->container != NULL) + && replica->container->cmds->create_probe(replica->container, + node)) { + any_created = true; + + /* If we're limited to one replica per host (due to + * the lack of an IP range probably), then we don't + * want any of our peer containers starting until + * we've established that no other copies are already + * running. + * + * Partly this is to ensure that nreplicas_per_host is + * observed, but also to ensure that the containers + * don't fail to start because the necessary port + * mappings (which won't include an IP for uniqueness) + * are already taken + */ + + for (GList *tIter = bundle_data->replicas; + tIter && (bundle_data->nreplicas_per_host == 1); + tIter = tIter->next) { + pe__bundle_replica_t *other = tIter->data; + + if ((other != replica) && (other != NULL) + && (other->container != NULL)) { + + pcmk__new_ordering(replica->container, + pcmk__op_key(replica->container->id, RSC_STATUS, 0), + NULL, other->container, + pcmk__op_key(other->container->id, RSC_START, 0), + NULL, + pe_order_optional|pe_order_same_node, + rsc->cluster); + } + } + } + if ((replica->container != NULL) && (replica->remote != NULL) + && replica->remote->cmds->create_probe(replica->remote, node)) { + + /* Do not probe the remote resource until we know where the + * container is running. This is required for REMOTE_CONTAINER_HACK + * to correctly probe remote resources. + */ + char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, + 0); + pe_action_t *probe = find_first_action(replica->remote->actions, + probe_uuid, NULL, node); + + free(probe_uuid); + if (probe != NULL) { + any_created = true; + crm_trace("Ordering %s probe on %s", + replica->remote->id, pe__node_name(node)); + pcmk__new_ordering(replica->container, + pcmk__op_key(replica->container->id, RSC_START, 0), + NULL, replica->remote, NULL, probe, + pe_order_probe, rsc->cluster); + } + } + } + return any_created; +} + +void +pcmk__output_bundle_actions(pe_resource_t *rsc) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + + CRM_CHECK(rsc != NULL, return); + + get_bundle_variant_data(bundle_data, rsc); + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { + pe__bundle_replica_t *replica = gIter->data; + + CRM_ASSERT(replica); + if (replica->ip != NULL) { + replica->ip->cmds->output_actions(replica->ip); + } + if (replica->container != NULL) { + replica->container->cmds->output_actions(replica->container); + } + if (replica->remote != NULL) { + replica->remote->cmds->output_actions(replica->remote); + } + if (replica->child != NULL) { + replica->child->cmds->output_actions(replica->child); + } + } +} + +// Bundle implementation of resource_alloc_functions_t:add_utilization() +void +pcmk__bundle_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization) +{ + pe__bundle_variant_data_t *bundle_data = NULL; + pe__bundle_replica_t *replica = NULL; + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return; + } + + get_bundle_variant_data(bundle_data, rsc); + if (bundle_data->replicas == NULL) { + return; + } + + /* All bundle replicas are identical, so using the utilization of the first + * is sufficient for any. Only the implicit container resource can have + * utilization values. + */ + replica = (pe__bundle_replica_t *) bundle_data->replicas->data; + if (replica->container != NULL) { + replica->container->cmds->add_utilization(replica->container, orig_rsc, + all_rscs, utilization); + } +} + +// Bundle implementation of resource_alloc_functions_t:shutdown_lock() +void +pcmk__bundle_shutdown_lock(pe_resource_t *rsc) +{ + return; // Bundles currently don't support shutdown locks +} diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c new file mode 100644 index 0000000..934f512 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -0,0 +1,643 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Assign a clone resource's instances to nodes + * + * \param[in,out] rsc Clone resource to assign + * \param[in] prefer Node to prefer, if all else is equal + * + * \return NULL (clones are not assigned to a single node) + */ +pe_node_t * +pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer) +{ + CRM_ASSERT(pe_rsc_is_clone(rsc)); + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return NULL; // Assignment has already been done + } + + // Detect assignment loops + if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); + return NULL; + } + pe__set_resource_flags(rsc, pe_rsc_allocating); + + // If this clone is promotable, consider nodes' promotion scores + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + pcmk__add_promotion_scores(rsc); + } + + /* If this clone is colocated with any other resources, assign those first. + * Since the this_with_colocations() method boils down to a copy of rsc_cons + * for clones, we can use that here directly for efficiency. + */ + for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) { + pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data; + + pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first", + rsc->id, constraint->id, constraint->primary->id); + constraint->primary->cmds->assign(constraint->primary, prefer); + } + + /* If any resources are colocated with this one, consider their preferences. + * Because the with_this_colocations() method boils down to a copy of + * rsc_cons_lhs for clones, we can use that here directly for efficiency. + */ + g_list_foreach(rsc->rsc_cons_lhs, pcmk__add_dependent_scores, rsc); + + pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), + rsc, __func__, rsc->allowed_nodes, rsc->cluster); + + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance); + pcmk__assign_instances(rsc, rsc->children, pe__clone_max(rsc), + pe__clone_node_max(rsc)); + + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + pcmk__set_instance_roles(rsc); + } + + pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); + pe_rsc_trace(rsc, "Assigned clone %s", rsc->id); + return NULL; +} + +static pe_action_t * +find_rsc_action(pe_resource_t *rsc, const char *task) +{ + pe_action_t *match = NULL; + GList *actions = pe__resource_actions(rsc, NULL, task, FALSE); + + for (GList *item = actions; item != NULL; item = item->next) { + pe_action_t *op = (pe_action_t *) item->data; + + if (!pcmk_is_set(op->flags, pe_action_optional)) { + if (match != NULL) { + // More than one match, don't return any + match = NULL; + break; + } + match = op; + } + } + g_list_free(actions); + return match; +} + +/*! + * \internal + * \brief Order starts and stops of an ordered clone's instances + * + * \param[in,out] rsc Clone resource + */ +static void +order_instance_starts_stops(pe_resource_t *rsc) +{ + pe_action_t *last_stop = NULL; + pe_action_t *last_start = NULL; + + // Instances must be ordered by ascending instance number, so sort them + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); + + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child = (pe_resource_t *) iter->data; + pe_action_t *action = NULL; + + // Order this instance's stop after previous instance's stop + // @TODO: Should instances be stopped in reverse order instead? + action = find_rsc_action(child, RSC_STOP); + if (action != NULL) { + if (last_stop != NULL) { + order_actions(action, last_stop, pe_order_optional); + } + last_stop = action; + } + + // Order this instance's start after previous instance's start + action = find_rsc_action(child, RSC_START); + if (action != NULL) { + if (last_start != NULL) { + order_actions(last_start, action, pe_order_optional); + } + last_start = action; + } + } +} + +void +clone_create_actions(pe_resource_t *rsc) +{ + pe_rsc_debug(rsc, "Creating actions for clone %s", rsc->id); + pcmk__create_instance_actions(rsc, rsc->children); + if (pe__clone_is_ordered(rsc)) { + order_instance_starts_stops(rsc); + } + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + pcmk__create_promotable_actions(rsc); + } +} + +void +clone_internal_constraints(pe_resource_t *rsc) +{ + pe_resource_t *last_rsc = NULL; + GList *gIter; + bool ordered = pe__clone_is_ordered(rsc); + + pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); + pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, + pe_order_runnable_left); + pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, + pe_order_runnable_left); + + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, + pe_order_runnable_left); + } + + if (ordered) { + /* we have to maintain a consistent sorted child list when building order constraints */ + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); + } + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + child_rsc->cmds->internal_constraints(child_rsc); + + pcmk__order_starts(rsc, child_rsc, + pe_order_runnable_left|pe_order_implies_first_printed); + pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED, + pe_order_implies_then_printed); + if (ordered && (last_rsc != NULL)) { + pcmk__order_starts(last_rsc, child_rsc, pe_order_optional); + } + + pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed); + pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED, + pe_order_implies_then_printed); + if (ordered && (last_rsc != NULL)) { + pcmk__order_stops(child_rsc, last_rsc, pe_order_optional); + } + + last_rsc = child_rsc; + } + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + pcmk__order_promotable_instances(rsc); + } +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint, apply its score to the dependent's + * allowed node weights (if we are still placing resources) or priority (if + * we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + * \param[in] for_dependent true if called on behalf of dependent + */ +void +pcmk__clone_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent) +{ + GList *gIter = NULL; + gboolean do_interleave = FALSE; + const char *interleave_s = NULL; + + /* This should never be called for the clone itself as a dependent. Instead, + * we add its colocation constraints to its instances and call the + * apply_coloc_score() for the instances as dependents. + */ + CRM_ASSERT(!for_dependent); + + CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), + return); + CRM_CHECK(dependent->variant == pe_native, return); + + pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", + colocation->id, dependent->id, primary->id, colocation->score); + + if (pcmk_is_set(primary->flags, pe_rsc_promotable)) { + if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { + // We haven't placed the primary yet, so we can't apply colocation + pe_rsc_trace(primary, "%s is still provisional", primary->id); + return; + + } else if (colocation->primary_role == RSC_ROLE_UNKNOWN) { + // This isn't a role-specfic colocation, so handle normally + pe_rsc_trace(primary, "Handling %s as a clone colocation", + colocation->id); + + } else if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) { + // We're placing the dependent + pcmk__update_dependent_with_promotable(primary, dependent, + colocation); + return; + + } else if (colocation->dependent_role == RSC_ROLE_PROMOTED) { + // We're choosing roles for the dependent + pcmk__update_promotable_dependent_priority(primary, dependent, + colocation); + return; + } + } + + // Only the dependent needs to be marked for interleave + interleave_s = g_hash_table_lookup(colocation->dependent->meta, + XML_RSC_ATTR_INTERLEAVE); + if (crm_is_true(interleave_s) + && (colocation->dependent->variant > pe_group)) { + /* @TODO Do we actually care about multiple primary copies sharing a + * dependent copy anymore? + */ + if (copies_per_node(colocation->dependent) != copies_per_node(colocation->primary)) { + pcmk__config_err("Cannot interleave %s and %s because they do not " + "support the same number of instances per node", + colocation->dependent->id, + colocation->primary->id); + + } else { + do_interleave = TRUE; + } + } + + if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { + pe_rsc_trace(primary, "%s is still provisional", primary->id); + return; + + } else if (do_interleave) { + pe_resource_t *primary_instance = NULL; + + primary_instance = pcmk__find_compatible_instance(dependent, primary, + RSC_ROLE_UNKNOWN, + false); + if (primary_instance != NULL) { + pe_rsc_debug(primary, "Pairing %s with %s", + dependent->id, primary_instance->id); + dependent->cmds->apply_coloc_score(dependent, primary_instance, + colocation, true); + + } else if (colocation->score >= INFINITY) { + crm_notice("Cannot pair %s with instance of %s", + dependent->id, primary->id); + pcmk__assign_resource(dependent, NULL, true); + + } else { + pe_rsc_debug(primary, "Cannot pair %s with instance of %s", + dependent->id, primary->id); + } + + return; + + } else if (colocation->score >= INFINITY) { + GList *affected_nodes = NULL; + + gIter = primary->children; + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); + + if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { + pe_rsc_trace(primary, "Allowing %s: %s %d", + colocation->id, pe__node_name(chosen), + chosen->weight); + affected_nodes = g_list_prepend(affected_nodes, chosen); + } + } + + node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE); + g_list_free(affected_nodes); + return; + } + + gIter = primary->children; + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation, + false); + } +} + +// Clone implementation of resource_alloc_functions_t:with_this_colocations() +void +pcmk__with_clone_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); + + if (rsc == orig_rsc) { // Colocations are wanted for clone itself + pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); + } else { + pcmk__add_collective_constraints(list, orig_rsc, rsc, true); + } +} + +// Clone implementation of resource_alloc_functions_t:this_with_colocations() +void +pcmk__clone_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); + + if (rsc == orig_rsc) { // Colocations are wanted for clone itself + pcmk__add_this_with_list(list, rsc->rsc_cons); + } else { + pcmk__add_collective_constraints(list, orig_rsc, rsc, false); + } +} + +enum pe_action_flags +clone_action_flags(pe_action_t *action, const pe_node_t *node) +{ + return pcmk__collective_action_flags(action, action->rsc->children, node); +} + +void +clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) +{ + GList *gIter = rsc->children; + + pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); + + pcmk__apply_location(rsc, constraint); + + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + child_rsc->cmds->apply_location(child_rsc, constraint); + } +} + +/*! + * \internal + * \brief Add a resource's actions to the transition graph + * + * \param[in,out] rsc Resource whose actions should be added + */ +void +clone_expand(pe_resource_t *rsc) +{ + GList *gIter = NULL; + + g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL); + + pe__create_clone_notifications(rsc); + + /* Now that the notifcations have been created we can expand the children */ + + gIter = rsc->children; + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + child_rsc->cmds->add_actions_to_graph(child_rsc); + } + + pcmk__add_rsc_actions_to_graph(rsc); + + /* The notifications are in the graph now, we can destroy the notify_data */ + pe__free_clone_notification_data(rsc); +} + +// Check whether a resource or any of its children is known on node +static bool +rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) +{ + if (rsc->children) { + for (GList *child_iter = rsc->children; child_iter != NULL; + child_iter = child_iter->next) { + + pe_resource_t *child = (pe_resource_t *) child_iter->data; + + if (rsc_known_on(child, node)) { + return TRUE; + } + } + + } else if (rsc->known_on) { + GHashTableIter iter; + pe_node_t *known_node = NULL; + + g_hash_table_iter_init(&iter, rsc->known_on); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { + if (node->details == known_node->details) { + return TRUE; + } + } + } + return FALSE; +} + +// Look for an instance of clone that is known on node +static pe_resource_t * +find_instance_on(const pe_resource_t *clone, const pe_node_t *node) +{ + for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + + if (rsc_known_on(child, node)) { + return child; + } + } + return NULL; +} + +// For anonymous clones, only a single instance needs to be probed +static bool +probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, + pe_working_set_t *data_set) +{ + // First, check if we probed an instance on this node last time + pe_resource_t *child = find_instance_on(rsc, node); + + // Otherwise, check if we plan to start an instance on this node + if (child == NULL) { + for (GList *child_iter = rsc->children; child_iter && !child; + child_iter = child_iter->next) { + + pe_node_t *local_node = NULL; + pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; + + if (child_rsc) { /* make clang analyzer happy */ + local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); + if (local_node && (local_node->details == node->details)) { + child = child_rsc; + } + } + } + } + + // Otherwise, use the first clone instance + if (child == NULL) { + child = rsc->children->data; + } + CRM_ASSERT(child); + return child->cmds->create_probe(child, node); +} + +/*! + * \internal + * + * \brief Schedule any probes needed for a resource on a node + * + * \param[in,out] rsc Resource to create probe for + * \param[in,out] node Node to create probe on + * + * \return true if any probe was created, otherwise false + */ +bool +clone_create_probe(pe_resource_t *rsc, pe_node_t *node) +{ + CRM_ASSERT(rsc); + + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); + if (rsc->children == NULL) { + pe_warn("Clone %s has no children", rsc->id); + return false; + } + + if (rsc->exclusive_discover) { + pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); + if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { + /* exclusive discover is enabled and this node is not marked + * as a node this resource should be discovered on + * + * remove the node from allowed_nodes so that the + * notification contains only nodes that we might ever run + * on + */ + g_hash_table_remove(rsc->allowed_nodes, node->details->id); + + /* Bit of a shortcut - might as well take it */ + return false; + } + } + + if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { + return pcmk__probe_resource_list(rsc->children, node); + } else { + return probe_anonymous_clone(rsc, node, rsc->cluster); + } +} + +void +clone_append_meta(const pe_resource_t *rsc, xmlNode *xml) +{ + char *name = NULL; + + name = crm_meta_name(XML_RSC_ATTR_UNIQUE); + crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); + free(name); + + name = crm_meta_name(XML_RSC_ATTR_NOTIFY); + crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); + free(name); + + name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); + crm_xml_add_int(xml, name, pe__clone_max(rsc)); + free(name); + + name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); + crm_xml_add_int(xml, name, pe__clone_node_max(rsc)); + free(name); + + if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { + int promoted_max = pe__clone_promoted_max(rsc); + int promoted_node_max = pe__clone_promoted_node_max(rsc); + + name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); + crm_xml_add_int(xml, name, promoted_max); + free(name); + + name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); + crm_xml_add_int(xml, name, promoted_node_max); + free(name); + + /* @COMPAT Maintain backward compatibility with resource agents that + * expect the old names (deprecated since 2.0.0). + */ + name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY); + crm_xml_add_int(xml, name, promoted_max); + free(name); + + name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY); + crm_xml_add_int(xml, name, promoted_node_max); + free(name); + } +} + +// Clone implementation of resource_alloc_functions_t:add_utilization() +void +pcmk__clone_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization) +{ + bool existing = false; + pe_resource_t *child = NULL; + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return; + } + + // Look for any child already existing in the list + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + child = (pe_resource_t *) iter->data; + if (g_list_find(all_rscs, child)) { + existing = true; // Keep checking remaining children + } else { + // If this is a clone of a group, look for group's members + for (GList *member_iter = child->children; member_iter != NULL; + member_iter = member_iter->next) { + + pe_resource_t *member = (pe_resource_t *) member_iter->data; + + if (g_list_find(all_rscs, member) != NULL) { + // Add *child's* utilization, not group member's + child->cmds->add_utilization(child, orig_rsc, all_rscs, + utilization); + existing = true; + break; + } + } + } + } + + if (!existing && (rsc->children != NULL)) { + // If nothing was found, still add first child's utilization + child = (pe_resource_t *) rsc->children->data; + + child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); + } +} + +// Clone implementation of resource_alloc_functions_t:shutdown_lock() +void +pcmk__clone_shutdown_lock(pe_resource_t *rsc) +{ + return; // Clones currently don't support shutdown locks +} diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c new file mode 100644 index 0000000..eeef4f1 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_colocation.c @@ -0,0 +1,1595 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <glib.h> + +#include <crm/crm.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> + +#include "crm/common/util.h" +#include "crm/common/xml_internal.h" +#include "crm/msg_xml.h" +#include "libpacemaker_private.h" + +#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ + __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \ + if (__rsc == NULL) { \ + pcmk__config_err("%s: No resource found for %s", __set, __name); \ + return; \ + } \ + } while(0) + +// Used to temporarily mark a node as unusable +#define INFINITY_HACK (INFINITY * -100) + +static gint +cmp_dependent_priority(gconstpointer a, gconstpointer b) +{ + const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a; + const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b; + + if (a == NULL) { + return 1; + } + if (b == NULL) { + return -1; + } + + CRM_ASSERT(rsc_constraint1->dependent != NULL); + CRM_ASSERT(rsc_constraint1->primary != NULL); + + if (rsc_constraint1->dependent->priority > rsc_constraint2->dependent->priority) { + return -1; + } + + if (rsc_constraint1->dependent->priority < rsc_constraint2->dependent->priority) { + return 1; + } + + /* Process clones before primitives and groups */ + if (rsc_constraint1->dependent->variant > rsc_constraint2->dependent->variant) { + return -1; + } + if (rsc_constraint1->dependent->variant < rsc_constraint2->dependent->variant) { + return 1; + } + + /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable + * clones (probably unnecessary, but avoids having to update regression + * tests) + */ + if (rsc_constraint1->dependent->variant == pe_clone) { + if (pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable) + && !pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) { + return -1; + } else if (!pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable) + && pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) { + return 1; + } + } + + return strcmp(rsc_constraint1->dependent->id, + rsc_constraint2->dependent->id); +} + +static gint +cmp_primary_priority(gconstpointer a, gconstpointer b) +{ + const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a; + const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b; + + if (a == NULL) { + return 1; + } + if (b == NULL) { + return -1; + } + + CRM_ASSERT(rsc_constraint1->dependent != NULL); + CRM_ASSERT(rsc_constraint1->primary != NULL); + + if (rsc_constraint1->primary->priority > rsc_constraint2->primary->priority) { + return -1; + } + + if (rsc_constraint1->primary->priority < rsc_constraint2->primary->priority) { + return 1; + } + + /* Process clones before primitives and groups */ + if (rsc_constraint1->primary->variant > rsc_constraint2->primary->variant) { + return -1; + } else if (rsc_constraint1->primary->variant < rsc_constraint2->primary->variant) { + return 1; + } + + /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable + * clones (probably unnecessary, but avoids having to update regression + * tests) + */ + if (rsc_constraint1->primary->variant == pe_clone) { + if (pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable) + && !pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) { + return -1; + } else if (!pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable) + && pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) { + return 1; + } + } + + return strcmp(rsc_constraint1->primary->id, rsc_constraint2->primary->id); +} + +/*! + * \internal + * \brief Add a "this with" colocation constraint to a sorted list + * + * \param[in,out] list List of constraints to add \p colocation to + * \param[in] colocation Colocation constraint to add to \p list + * + * \note The list will be sorted using cmp_primary_priority(). + */ +void +pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation) +{ + CRM_ASSERT((list != NULL) && (colocation != NULL)); + + crm_trace("Adding colocation %s (%s with %s%s%s @%d) " + "to 'this with' list", + colocation->id, colocation->dependent->id, + colocation->primary->id, + (colocation->node_attribute == NULL)? "" : " using ", + pcmk__s(colocation->node_attribute, ""), + colocation->score); + *list = g_list_insert_sorted(*list, (gpointer) colocation, + cmp_primary_priority); +} + +/*! + * \internal + * \brief Add a list of "this with" colocation constraints to a list + * + * \param[in,out] list List of constraints to add \p addition to + * \param[in] addition List of colocation constraints to add to \p list + * + * \note The lists must be pre-sorted by cmp_primary_priority(). + */ +void +pcmk__add_this_with_list(GList **list, GList *addition) +{ + CRM_CHECK((list != NULL), return); + + if (*list == NULL) { // Trivial case for efficiency + crm_trace("Copying %u 'this with' colocations to new list", + g_list_length(addition)); + *list = g_list_copy(addition); + } else { + while (addition != NULL) { + pcmk__add_this_with(list, addition->data); + addition = addition->next; + } + } +} + +/*! + * \internal + * \brief Add a "with this" colocation constraint to a sorted list + * + * \param[in,out] list List of constraints to add \p colocation to + * \param[in] colocation Colocation constraint to add to \p list + * + * \note The list will be sorted using cmp_dependent_priority(). + */ +void +pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation) +{ + CRM_ASSERT((list != NULL) && (colocation != NULL)); + + crm_trace("Adding colocation %s (%s with %s%s%s @%d) " + "to 'with this' list", + colocation->id, colocation->dependent->id, + colocation->primary->id, + (colocation->node_attribute == NULL)? "" : " using ", + pcmk__s(colocation->node_attribute, ""), + colocation->score); + *list = g_list_insert_sorted(*list, (gpointer) colocation, + cmp_dependent_priority); +} + +/*! + * \internal + * \brief Add a list of "with this" colocation constraints to a list + * + * \param[in,out] list List of constraints to add \p addition to + * \param[in] addition List of colocation constraints to add to \p list + * + * \note The lists must be pre-sorted by cmp_dependent_priority(). + */ +void +pcmk__add_with_this_list(GList **list, GList *addition) +{ + CRM_CHECK((list != NULL), return); + + if (*list == NULL) { // Trivial case for efficiency + crm_trace("Copying %u 'with this' colocations to new list", + g_list_length(addition)); + *list = g_list_copy(addition); + } else { + while (addition != NULL) { + pcmk__add_with_this(list, addition->data); + addition = addition->next; + } + } +} + +/*! + * \internal + * \brief Add orderings necessary for an anti-colocation constraint + * + * \param[in,out] first_rsc One resource in an anti-colocation + * \param[in] first_role Anti-colocation role of \p first_rsc + * \param[in] then_rsc Other resource in the anti-colocation + * \param[in] then_role Anti-colocation role of \p then_rsc + */ +static void +anti_colocation_order(pe_resource_t *first_rsc, int first_role, + pe_resource_t *then_rsc, int then_role) +{ + const char *first_tasks[] = { NULL, NULL }; + const char *then_tasks[] = { NULL, NULL }; + + /* Actions to make first_rsc lose first_role */ + if (first_role == RSC_ROLE_PROMOTED) { + first_tasks[0] = CRMD_ACTION_DEMOTE; + + } else { + first_tasks[0] = CRMD_ACTION_STOP; + + if (first_role == RSC_ROLE_UNPROMOTED) { + first_tasks[1] = CRMD_ACTION_PROMOTE; + } + } + + /* Actions to make then_rsc gain then_role */ + if (then_role == RSC_ROLE_PROMOTED) { + then_tasks[0] = CRMD_ACTION_PROMOTE; + + } else { + then_tasks[0] = CRMD_ACTION_START; + + if (then_role == RSC_ROLE_UNPROMOTED) { + then_tasks[1] = CRMD_ACTION_DEMOTE; + } + } + + for (int first_lpc = 0; + (first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) { + + for (int then_lpc = 0; + (then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) { + + pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc], + then_rsc, then_tasks[then_lpc], + pe_order_anti_colocation); + } + } +} + +/*! + * \internal + * \brief Add a new colocation constraint to a cluster working set + * + * \param[in] id XML ID for this constraint + * \param[in] node_attr Colocate by this attribute (NULL for #uname) + * \param[in] score Constraint score + * \param[in,out] dependent Resource to be colocated + * \param[in,out] primary Resource to colocate \p dependent with + * \param[in] dependent_role Current role of \p dependent + * \param[in] primary_role Current role of \p primary + * \param[in] influence Whether colocation constraint has influence + * \param[in,out] data_set Cluster working set to add constraint to + */ +void +pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, + bool influence, pe_working_set_t *data_set) +{ + pcmk__colocation_t *new_con = NULL; + + if (score == 0) { + crm_trace("Ignoring colocation '%s' because score is 0", id); + return; + } + if ((dependent == NULL) || (primary == NULL)) { + pcmk__config_err("Ignoring colocation '%s' because resource " + "does not exist", id); + return; + } + + new_con = calloc(1, sizeof(pcmk__colocation_t)); + if (new_con == NULL) { + return; + } + + if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S, + pcmk__str_null_matches|pcmk__str_casei)) { + dependent_role = RSC_ROLE_UNKNOWN_S; + } + + if (pcmk__str_eq(primary_role, RSC_ROLE_STARTED_S, + pcmk__str_null_matches|pcmk__str_casei)) { + primary_role = RSC_ROLE_UNKNOWN_S; + } + + new_con->id = id; + new_con->dependent = dependent; + new_con->primary = primary; + new_con->score = score; + new_con->dependent_role = text2role(dependent_role); + new_con->primary_role = text2role(primary_role); + new_con->node_attribute = node_attr; + new_con->influence = influence; + + if (node_attr == NULL) { + node_attr = CRM_ATTR_UNAME; + } + + pe_rsc_trace(dependent, "%s ==> %s (%s %d)", + dependent->id, primary->id, node_attr, score); + + pcmk__add_this_with(&(dependent->rsc_cons), new_con); + pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con); + + data_set->colocation_constraints = g_list_append(data_set->colocation_constraints, + new_con); + + if (score <= -INFINITY) { + anti_colocation_order(dependent, new_con->dependent_role, primary, + new_con->primary_role); + anti_colocation_order(primary, new_con->primary_role, dependent, + new_con->dependent_role); + } +} + +/*! + * \internal + * \brief Return the boolean influence corresponding to configuration + * + * \param[in] coloc_id Colocation XML ID (for error logging) + * \param[in] rsc Resource involved in constraint (for default) + * \param[in] influence_s String value of influence option + * + * \return true if string evaluates true, false if string evaluates false, + * or value of resource's critical option if string is NULL or invalid + */ +static bool +unpack_influence(const char *coloc_id, const pe_resource_t *rsc, + const char *influence_s) +{ + if (influence_s != NULL) { + int influence_i = 0; + + if (crm_str_to_boolean(influence_s, &influence_i) < 0) { + pcmk__config_err("Constraint '%s' has invalid value for " + XML_COLOC_ATTR_INFLUENCE " (using default)", + coloc_id); + } else { + return (influence_i != 0); + } + } + return pcmk_is_set(rsc->flags, pe_rsc_critical); +} + +static void +unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + const char *influence_s, pe_working_set_t *data_set) +{ + xmlNode *xml_rsc = NULL; + pe_resource_t *with = NULL; + pe_resource_t *resource = NULL; + const char *set_id = ID(set); + const char *role = crm_element_value(set, "role"); + const char *ordering = crm_element_value(set, "ordering"); + int local_score = score; + bool sequential = false; + + const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); + + if (score_s) { + local_score = char2score(score_s); + } + if (local_score == 0) { + crm_trace("Ignoring colocation '%s' for set '%s' because score is 0", + coloc_id, set_id); + return; + } + + if (ordering == NULL) { + ordering = "group"; + } + + if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) { + return; + + } else if ((local_score > 0) + && pcmk__str_eq(ordering, "group", pcmk__str_casei)) { + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); + if (with != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); + pcmk__new_colocation(set_id, NULL, local_score, resource, + with, role, role, + unpack_influence(coloc_id, resource, + influence_s), data_set); + } + with = resource; + } + + } else if (local_score > 0) { + pe_resource_t *last = NULL; + + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); + if (last != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", + last->id, resource->id); + pcmk__new_colocation(set_id, NULL, local_score, last, + resource, role, role, + unpack_influence(coloc_id, last, + influence_s), data_set); + } + + last = resource; + } + + } else { + /* Anti-colocating with every prior resource is + * the only way to ensure the intuitive result + * (i.e. that no one in the set can run with anyone else in the set) + */ + + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + xmlNode *xml_rsc_with = NULL; + bool influence = true; + + EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); + influence = unpack_influence(coloc_id, resource, influence_s); + + for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc_with != NULL; + xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { + + if (pcmk__str_eq(resource->id, ID(xml_rsc_with), + pcmk__str_casei)) { + break; + } + EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with)); + pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, + with->id); + pcmk__new_colocation(set_id, NULL, local_score, + resource, with, role, role, + influence, data_set); + } + } + } +} + +static void +colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + const char *influence_s, pe_working_set_t *data_set) +{ + xmlNode *xml_rsc = NULL; + pe_resource_t *rsc_1 = NULL; + pe_resource_t *rsc_2 = NULL; + + const char *role_1 = crm_element_value(set1, "role"); + const char *role_2 = crm_element_value(set2, "role"); + + int rc = pcmk_rc_ok; + bool sequential = false; + + if (score == 0) { + crm_trace("Ignoring colocation '%s' between sets because score is 0", + id); + return; + } + + rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential); + if (rc != pcmk_rc_ok || sequential) { + // Get the first one + xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + if (xml_rsc != NULL) { + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + } + } + + rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential); + if (rc != pcmk_rc_ok || sequential) { + // Get the last one + const char *rid = NULL; + + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + rid = ID(xml_rsc); + } + EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); + } + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, + unpack_influence(id, rsc_1, influence_s), + data_set); + + } else if (rsc_1 != NULL) { + bool influence = unpack_influence(id, rsc_1, influence_s); + + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, influence, data_set); + } + + } else if (rsc_2 != NULL) { + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, + unpack_influence(id, rsc_1, influence_s), + data_set); + } + + } else { + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + xmlNode *xml_rsc_2 = NULL; + bool influence = true; + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + influence = unpack_influence(id, rsc_1, influence_s); + + for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; + xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, + role_1, role_2, influence, + data_set); + } + } + } +} + +static void +unpack_simple_colocation(xmlNode *xml_obj, const char *id, + const char *influence_s, pe_working_set_t *data_set) +{ + int score_i = 0; + + const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + const char *dependent_id = crm_element_value(xml_obj, + XML_COLOC_ATTR_SOURCE); + const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); + const char *dependent_role = crm_element_value(xml_obj, + XML_COLOC_ATTR_SOURCE_ROLE); + const char *primary_role = crm_element_value(xml_obj, + XML_COLOC_ATTR_TARGET_ROLE); + const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR); + + // @COMPAT: Deprecated since 2.1.5 + const char *dependent_instance = crm_element_value(xml_obj, + XML_COLOC_ATTR_SOURCE_INSTANCE); + // @COMPAT: Deprecated since 2.1.5 + const char *primary_instance = crm_element_value(xml_obj, + XML_COLOC_ATTR_TARGET_INSTANCE); + + pe_resource_t *dependent = pcmk__find_constraint_resource(data_set->resources, + dependent_id); + pe_resource_t *primary = pcmk__find_constraint_resource(data_set->resources, + primary_id); + + if (dependent_instance != NULL) { + pe_warn_once(pe_wo_coloc_inst, + "Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is " + "deprecated and will be removed in a future release."); + } + + if (primary_instance != NULL) { + pe_warn_once(pe_wo_coloc_inst, + "Support for " XML_COLOC_ATTR_TARGET_INSTANCE " is " + "deprecated and will be removed in a future release."); + } + + if (dependent == NULL) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "does not exist", id, dependent_id); + return; + + } else if (primary == NULL) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "does not exist", id, primary_id); + return; + + } else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "is not a clone but instance '%s' was requested", + id, dependent_id, dependent_instance); + return; + + } else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "is not a clone but instance '%s' was requested", + id, primary_id, primary_instance); + return; + } + + if (dependent_instance != NULL) { + dependent = find_clone_instance(dependent, dependent_instance); + if (dependent == NULL) { + pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " + "does not have an instance '%s'", + id, dependent_id, dependent_instance); + return; + } + } + + if (primary_instance != NULL) { + primary = find_clone_instance(primary, primary_instance); + if (primary == NULL) { + pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " + "does not have an instance '%s'", + "'%s'", id, primary_id, primary_instance); + return; + } + } + + if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) { + pcmk__config_warn("The colocation constraint '" + XML_CONS_ATTR_SYMMETRICAL + "' attribute has been removed"); + } + + if (score) { + score_i = char2score(score); + } + + pcmk__new_colocation(id, attr, score_i, dependent, primary, + dependent_role, primary_role, + unpack_influence(id, dependent, influence_s), data_set); +} + +// \return Standard Pacemaker return code +static int +unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml, + pe_working_set_t *data_set) +{ + const char *id = NULL; + const char *dependent_id = NULL; + const char *primary_id = NULL; + const char *dependent_role = NULL; + const char *primary_role = NULL; + + pe_resource_t *dependent = NULL; + pe_resource_t *primary = NULL; + + pe_tag_t *dependent_tag = NULL; + pe_tag_t *primary_tag = NULL; + + xmlNode *dependent_set = NULL; + xmlNode *primary_set = NULL; + bool any_sets = false; + + *expanded_xml = NULL; + + CRM_CHECK(xml_obj != NULL, return EINVAL); + + id = ID(xml_obj); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return pcmk_rc_unpack_error; + } + + // Check whether there are any resource sets with template or tag references + *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); + if (*expanded_xml != NULL) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); + return pcmk_rc_ok; + } + + dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); + primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); + if ((dependent_id == NULL) || (primary_id == NULL)) { + return pcmk_rc_ok; + } + + if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent, + &dependent_tag)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", id, dependent_id); + return pcmk_rc_unpack_error; + } + + if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary, + &primary_tag)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", id, primary_id); + return pcmk_rc_unpack_error; + } + + if ((dependent != NULL) && (primary != NULL)) { + /* Neither side references any template/tag. */ + return pcmk_rc_ok; + } + + if ((dependent_tag != NULL) && (primary_tag != NULL)) { + // A colocation constraint between two templates/tags makes no sense + pcmk__config_err("Ignoring constraint '%s' because two templates or " + "tags cannot be colocated", id); + return pcmk_rc_unpack_error; + } + + dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); + primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); + + *expanded_xml = copy_xml(xml_obj); + + // Convert template/tag reference in "rsc" into resource_set under constraint + if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE, + true, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (dependent_set != NULL) { + if (dependent_role != NULL) { + // Move "rsc-role" into converted resource_set as "role" + crm_xml_add(dependent_set, "role", dependent_role); + xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE); + } + any_sets = true; + } + + // Convert template/tag reference in "with-rsc" into resource_set under constraint + if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET, + true, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (primary_set != NULL) { + if (primary_role != NULL) { + // Move "with-rsc-role" into converted resource_set as "role" + crm_xml_add(primary_set, "role", primary_role); + xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE); + } + any_sets = true; + } + + if (any_sets) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); + } else { + free_xml(*expanded_xml); + *expanded_xml = NULL; + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Parse a colocation constraint from XML into a cluster working set + * + * \param[in,out] xml_obj Colocation constraint XML to unpack + * \param[in,out] data_set Cluster working set to add constraint to + */ +void +pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + int score_i = 0; + xmlNode *set = NULL; + xmlNode *last = NULL; + + xmlNode *orig_xml = NULL; + xmlNode *expanded_xml = NULL; + + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + const char *influence_s = crm_element_value(xml_obj, + XML_COLOC_ATTR_INFLUENCE); + + if (score) { + score_i = char2score(score); + } + + if (unpack_colocation_tags(xml_obj, &expanded_xml, + data_set) != pcmk_rc_ok) { + return; + } + if (expanded_xml) { + orig_xml = xml_obj; + xml_obj = expanded_xml; + } + + for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; + set = crm_next_same_xml(set)) { + + set = expand_idref(set, data_set->input); + if (set == NULL) { // Configuration error, message already logged + if (expanded_xml != NULL) { + free_xml(expanded_xml); + } + return; + } + + unpack_colocation_set(set, score_i, id, influence_s, data_set); + + if (last != NULL) { + colocate_rsc_sets(id, last, set, score_i, influence_s, data_set); + } + last = set; + } + + if (expanded_xml) { + free_xml(expanded_xml); + xml_obj = orig_xml; + } + + if (last == NULL) { + unpack_simple_colocation(xml_obj, id, influence_s, data_set); + } +} + +/*! + * \internal + * \brief Make actions of a given type unrunnable for a given resource + * + * \param[in,out] rsc Resource whose actions should be blocked + * \param[in] task Name of action to block + * \param[in] reason Unrunnable start action causing the block + */ +static void +mark_action_blocked(pe_resource_t *rsc, const char *task, + const pe_resource_t *reason) +{ + char *reason_text = crm_strdup_printf("colocation with %s", reason->id); + + for (GList *gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + if (pcmk_is_set(action->flags, pe_action_runnable) + && pcmk__str_eq(action->task, task, pcmk__str_casei)) { + + pe__clear_action_flags(action, pe_action_runnable); + pe_action_set_reason(action, reason_text, false); + pcmk__block_colocation_dependents(action, rsc->cluster); + pcmk__update_action_for_orderings(action, rsc->cluster); + } + } + + // If parent resource can't perform an action, neither can any children + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + mark_action_blocked((pe_resource_t *) (iter->data), task, reason); + } + free(reason_text); +} + +/*! + * \internal + * \brief If an action is unrunnable, block any relevant dependent actions + * + * If a given action is an unrunnable start or promote, block the start or + * promote actions of resources colocated with it, as appropriate to the + * colocations' configured roles. + * + * \param[in,out] action Action to check + * \param[in] data_set Cluster working set (ignored) + */ +void +pcmk__block_colocation_dependents(pe_action_t *action, + pe_working_set_t *data_set) +{ + GList *gIter = NULL; + GList *colocations = NULL; + pe_resource_t *rsc = NULL; + bool is_start = false; + + if (pcmk_is_set(action->flags, pe_action_runnable)) { + return; // Only unrunnable actions block dependents + } + + is_start = pcmk__str_eq(action->task, RSC_START, pcmk__str_none); + if (!is_start && !pcmk__str_eq(action->task, RSC_PROMOTE, pcmk__str_none)) { + return; // Only unrunnable starts and promotes block dependents + } + + CRM_ASSERT(action->rsc != NULL); // Start and promote are resource actions + + /* If this resource is part of a collective resource, dependents are blocked + * only if all instances of the collective are unrunnable, so check the + * collective resource. + */ + rsc = uber_parent(action->rsc); + if (rsc->parent != NULL) { + rsc = rsc->parent; // Bundle + } + + // Colocation fails only if entire primary can't reach desired role + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + pe_action_t *child_action = find_first_action(child->actions, NULL, + action->task, NULL); + + if ((child_action == NULL) + || pcmk_is_set(child_action->flags, pe_action_runnable)) { + crm_trace("Not blocking %s colocation dependents because " + "at least %s has runnable %s", + rsc->id, child->id, action->task); + return; // At least one child can reach desired role + } + } + + crm_trace("Blocking %s colocation dependents due to unrunnable %s %s", + rsc->id, action->rsc->id, action->task); + + // Check each colocation where this resource is primary + colocations = pcmk__with_this_colocations(rsc); + for (gIter = colocations; gIter != NULL; gIter = gIter->next) { + pcmk__colocation_t *colocation = (pcmk__colocation_t *) gIter->data; + + if (colocation->score < INFINITY) { + continue; // Only mandatory colocations block dependent + } + + /* If the primary can't start, the dependent can't reach its colocated + * role, regardless of what the primary or dependent colocation role is. + * + * If the primary can't be promoted, the dependent can't reach its + * colocated role if the primary's colocation role is promoted. + */ + if (!is_start && (colocation->primary_role != RSC_ROLE_PROMOTED)) { + continue; + } + + // Block the dependent from reaching its colocated role + if (colocation->dependent_role == RSC_ROLE_PROMOTED) { + mark_action_blocked(colocation->dependent, RSC_PROMOTE, + action->rsc); + } else { + mark_action_blocked(colocation->dependent, RSC_START, action->rsc); + } + } + g_list_free(colocations); +} + +/*! + * \internal + * \brief Determine how a colocation constraint should affect a resource + * + * Colocation constraints have different effects at different points in the + * scheduler sequence. Initially, they affect a resource's location; once that + * is determined, then for promotable clones they can affect a resource + * instance's role; after both are determined, the constraints no longer matter. + * Given a specific colocation constraint, check what has been done so far to + * determine what should be affected at the current point in the scheduler. + * + * \param[in] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint + * \param[in] preview If true, pretend resources have already been allocated + * + * \return How colocation constraint should be applied at this point + */ +enum pcmk__coloc_affects +pcmk__colocation_affects(const pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, bool preview) +{ + if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) { + // Primary resource has not been allocated yet, so we can't do anything + return pcmk__coloc_affects_nothing; + } + + if ((colocation->dependent_role >= RSC_ROLE_UNPROMOTED) + && (dependent->parent != NULL) + && pcmk_is_set(dependent->parent->flags, pe_rsc_promotable) + && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { + + /* This is a colocation by role, and the dependent is a promotable clone + * that has already been allocated, so the colocation should now affect + * the role. + */ + return pcmk__coloc_affects_role; + } + + if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { + /* The dependent resource has already been through allocation, so the + * constraint no longer has any effect. Log an error if a mandatory + * colocation constraint has been violated. + */ + + const pe_node_t *primary_node = primary->allocated_to; + + if (dependent->allocated_to == NULL) { + crm_trace("Skipping colocation '%s': %s will not run anywhere", + colocation->id, dependent->id); + + } else if (colocation->score >= INFINITY) { + // Dependent resource must colocate with primary resource + + if ((primary_node == NULL) || + (primary_node->details != dependent->allocated_to->details)) { + crm_err("%s must be colocated with %s but is not (%s vs. %s)", + dependent->id, primary->id, + pe__node_name(dependent->allocated_to), + pe__node_name(primary_node)); + } + + } else if (colocation->score <= -CRM_SCORE_INFINITY) { + // Dependent resource must anti-colocate with primary resource + + if ((primary_node != NULL) && + (dependent->allocated_to->details == primary_node->details)) { + crm_err("%s and %s must be anti-colocated but are allocated " + "to the same node (%s)", + dependent->id, primary->id, pe__node_name(primary_node)); + } + } + return pcmk__coloc_affects_nothing; + } + + if ((colocation->score > 0) + && (colocation->dependent_role != RSC_ROLE_UNKNOWN) + && (colocation->dependent_role != dependent->next_role)) { + + crm_trace("Skipping colocation '%s': dependent limited to %s role " + "but %s next role is %s", + colocation->id, role2text(colocation->dependent_role), + dependent->id, role2text(dependent->next_role)); + return pcmk__coloc_affects_nothing; + } + + if ((colocation->score > 0) + && (colocation->primary_role != RSC_ROLE_UNKNOWN) + && (colocation->primary_role != primary->next_role)) { + + crm_trace("Skipping colocation '%s': primary limited to %s role " + "but %s next role is %s", + colocation->id, role2text(colocation->primary_role), + primary->id, role2text(primary->next_role)); + return pcmk__coloc_affects_nothing; + } + + if ((colocation->score < 0) + && (colocation->dependent_role != RSC_ROLE_UNKNOWN) + && (colocation->dependent_role == dependent->next_role)) { + crm_trace("Skipping anti-colocation '%s': dependent role %s matches", + colocation->id, role2text(colocation->dependent_role)); + return pcmk__coloc_affects_nothing; + } + + if ((colocation->score < 0) + && (colocation->primary_role != RSC_ROLE_UNKNOWN) + && (colocation->primary_role == primary->next_role)) { + crm_trace("Skipping anti-colocation '%s': primary role %s matches", + colocation->id, role2text(colocation->primary_role)); + return pcmk__coloc_affects_nothing; + } + + return pcmk__coloc_affects_location; +} + +/*! + * \internal + * \brief Apply colocation to dependent for allocation purposes + * + * Update the allowed node weights of the dependent resource in a colocation, + * for the purposes of allocating it to a node + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint + */ +void +pcmk__apply_coloc_to_weights(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation) +{ + const char *attribute = CRM_ATTR_ID; + const char *value = NULL; + GHashTable *work = NULL; + GHashTableIter iter; + pe_node_t *node = NULL; + + if (colocation->node_attribute != NULL) { + attribute = colocation->node_attribute; + } + + if (primary->allocated_to != NULL) { + value = pe_node_attribute_raw(primary->allocated_to, attribute); + + } else if (colocation->score < 0) { + // Nothing to do (anti-colocation with something that is not running) + return; + } + + work = pcmk__copy_node_table(dependent->allowed_nodes); + + g_hash_table_iter_init(&iter, work); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (primary->allocated_to == NULL) { + node->weight = pcmk__add_scores(-colocation->score, node->weight); + pe_rsc_trace(dependent, + "Applied %s to %s score on %s (now %s after " + "subtracting %s because primary %s inactive)", + colocation->id, dependent->id, pe__node_name(node), + pcmk_readable_score(node->weight), + pcmk_readable_score(colocation->score), primary->id); + + } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value, + pcmk__str_casei)) { + /* Add colocation score only if optional (or minus infinity). A + * mandatory colocation is a requirement rather than a preference, + * so we don't need to consider it for relative assignment purposes. + * The resource will simply be forbidden from running on the node if + * the primary isn't active there (via the condition above). + */ + if (colocation->score < CRM_SCORE_INFINITY) { + node->weight = pcmk__add_scores(colocation->score, + node->weight); + pe_rsc_trace(dependent, + "Applied %s to %s score on %s (now %s after " + "adding %s)", + colocation->id, dependent->id, pe__node_name(node), + pcmk_readable_score(node->weight), + pcmk_readable_score(colocation->score)); + } + + } else if (colocation->score >= CRM_SCORE_INFINITY) { + /* Only mandatory colocations are relevant when the colocation + * attribute doesn't match, because an attribute not matching is not + * a negative preference -- the colocation is simply relevant only + * where it matches. + */ + node->weight = -CRM_SCORE_INFINITY; + pe_rsc_trace(dependent, + "Banned %s from %s because colocation %s attribute %s " + "does not match", + dependent->id, pe__node_name(node), colocation->id, + attribute); + } + } + + if ((colocation->score <= -INFINITY) || (colocation->score >= INFINITY) + || pcmk__any_node_available(work)) { + + g_hash_table_destroy(dependent->allowed_nodes); + dependent->allowed_nodes = work; + work = NULL; + + } else { + pe_rsc_info(dependent, + "%s: Rolling back scores from %s (no available nodes)", + dependent->id, primary->id); + } + + if (work != NULL) { + g_hash_table_destroy(work); + } +} + +/*! + * \internal + * \brief Apply colocation to dependent for role purposes + * + * Update the priority of the dependent resource in a colocation, for the + * purposes of selecting its role + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint + */ +void +pcmk__apply_coloc_to_priority(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation) +{ + const char *dependent_value = NULL; + const char *primary_value = NULL; + const char *attribute = CRM_ATTR_ID; + int score_multiplier = 1; + + if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) { + return; + } + + if (colocation->node_attribute != NULL) { + attribute = colocation->node_attribute; + } + + dependent_value = pe_node_attribute_raw(dependent->allocated_to, attribute); + primary_value = pe_node_attribute_raw(primary->allocated_to, attribute); + + if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) { + if ((colocation->score == INFINITY) + && (colocation->dependent_role == RSC_ROLE_PROMOTED)) { + dependent->priority = -INFINITY; + } + return; + } + + if ((colocation->primary_role != RSC_ROLE_UNKNOWN) + && (colocation->primary_role != primary->next_role)) { + return; + } + + if (colocation->dependent_role == RSC_ROLE_UNPROMOTED) { + score_multiplier = -1; + } + + dependent->priority = pcmk__add_scores(score_multiplier * colocation->score, + dependent->priority); + pe_rsc_trace(dependent, + "Applied %s to %s promotion priority (now %s after %s %s)", + colocation->id, dependent->id, + pcmk_readable_score(dependent->priority), + ((score_multiplier == 1)? "adding" : "subtracting"), + pcmk_readable_score(colocation->score)); +} + +/*! + * \internal + * \brief Find score of highest-scored node that matches colocation attribute + * + * \param[in] rsc Resource whose allowed nodes should be searched + * \param[in] attr Colocation attribute name (must not be NULL) + * \param[in] value Colocation attribute value to require + */ +static int +best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr, + const char *value) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + int best_score = -INFINITY; + const char *best_node = NULL; + + // Find best allowed node with matching attribute + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + + if ((node->weight > best_score) && pcmk__node_available(node, false, false) + && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) { + + best_score = node->weight; + best_node = node->details->uname; + } + } + + if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) { + if (best_node == NULL) { + crm_info("No allowed node for %s matches node attribute %s=%s", + rsc->id, attr, value); + } else { + crm_info("Allowed node %s for %s had best score (%d) " + "of those matching node attribute %s=%s", + best_node, rsc->id, best_score, attr, value); + } + } + return best_score; +} + +/*! + * \internal + * \brief Add resource's colocation matches to current node allocation scores + * + * For each node in a given table, if any of a given resource's allowed nodes + * have a matching value for the colocation attribute, add the highest of those + * nodes' scores to the node's score. + * + * \param[in,out] nodes Hash table of nodes with allocation scores so far + * \param[in] rsc Resource whose allowed nodes should be compared + * \param[in] attr Colocation attribute that must match (NULL for default) + * \param[in] factor Factor by which to multiply scores being added + * \param[in] only_positive Whether to add only positive scores + */ +static void +add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc, + const char *attr, float factor, + bool only_positive) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + + if (attr == NULL) { + attr = CRM_ATTR_UNAME; + } + + // Iterate through each node + g_hash_table_iter_init(&iter, nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + float weight_f = 0; + int weight = 0; + int score = 0; + int new_score = 0; + + score = best_node_score_matching_attr(rsc, attr, + pe_node_attribute_raw(node, attr)); + + if ((factor < 0) && (score < 0)) { + /* Negative preference for a node with a negative score + * should not become a positive preference. + * + * @TODO Consider filtering only if weight is -INFINITY + */ + crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)", + pe__node_name(node), node->weight, factor, score); + continue; + } + + if (node->weight == INFINITY_HACK) { + crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)", + pe__node_name(node), node->weight, factor, score); + continue; + } + + weight_f = factor * score; + + // Round the number; see http://c-faq.com/fp/round.html + weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5)); + + /* Small factors can obliterate the small scores that are often actually + * used in configurations. If the score and factor are nonzero, ensure + * that the result is nonzero as well. + */ + if ((weight == 0) && (score != 0)) { + if (factor > 0.0) { + weight = 1; + } else if (factor < 0.0) { + weight = -1; + } + } + + new_score = pcmk__add_scores(weight, node->weight); + + if (only_positive && (new_score < 0) && (node->weight > 0)) { + crm_trace("%s: Filtering %d + %f * %d = %d " + "(negative disallowed, marking node unusable)", + pe__node_name(node), node->weight, factor, score, + new_score); + node->weight = INFINITY_HACK; + continue; + } + + if (only_positive && (new_score < 0) && (node->weight == 0)) { + crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)", + pe__node_name(node), node->weight, factor, score, + new_score); + continue; + } + + crm_trace("%s: %d + %f * %d = %d", pe__node_name(node), + node->weight, factor, score, new_score); + node->weight = new_score; + } +} + +/*! + * \internal + * \brief Update nodes with scores of colocated resources' nodes + * + * Given a table of nodes and a resource, update the nodes' scores with the + * scores of the best nodes matching the attribute used for each of the + * resource's relevant colocations. + * + * \param[in,out] rsc Resource to check colocations for + * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID) + * \param[in,out] nodes Nodes to update + * \param[in] attr Colocation attribute (NULL to use default) + * \param[in] factor Incorporate scores multiplied by this factor + * \param[in] flags Bitmask of enum pcmk__coloc_select values + * + * \note The caller remains responsible for freeing \p *nodes. + */ +void +pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, + GHashTable **nodes, const char *attr, + float factor, uint32_t flags) +{ + GHashTable *work = NULL; + + CRM_CHECK((rsc != NULL) && (nodes != NULL), return); + + if (log_id == NULL) { + log_id = rsc->id; + } + + // Avoid infinite recursion + if (pcmk_is_set(rsc->flags, pe_rsc_merging)) { + pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", + log_id, rsc->id); + return; + } + pe__set_resource_flags(rsc, pe_rsc_merging); + + if (*nodes == NULL) { + /* Only cmp_resources() passes a NULL nodes table, which indicates we + * should initialize it with the resource's allowed node scores. + */ + work = pcmk__copy_node_table(rsc->allowed_nodes); + } else { + pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)", + log_id, rsc->id, factor); + work = pcmk__copy_node_table(*nodes); + add_node_scores_matching_attr(work, rsc, attr, factor, + pcmk_is_set(flags, + pcmk__coloc_select_nonnegative)); + } + + if (work == NULL) { + pe__clear_resource_flags(rsc, pe_rsc_merging); + return; + } + + if (pcmk__any_node_available(work)) { + GList *colocations = NULL; + + if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) { + colocations = pcmk__this_with_colocations(rsc); + pe_rsc_trace(rsc, + "Checking additional %d optional '%s with' constraints", + g_list_length(colocations), rsc->id); + } else { + colocations = pcmk__with_this_colocations(rsc); + pe_rsc_trace(rsc, + "Checking additional %d optional 'with %s' constraints", + g_list_length(colocations), rsc->id); + } + flags |= pcmk__coloc_select_active; + + for (GList *iter = colocations; iter != NULL; iter = iter->next) { + pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data; + + pe_resource_t *other = NULL; + float other_factor = factor * constraint->score / (float) INFINITY; + + if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) { + other = constraint->primary; + } else if (!pcmk__colocation_has_influence(constraint, NULL)) { + continue; + } else { + other = constraint->dependent; + } + + pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)", + constraint->id, constraint->dependent->id, + constraint->primary->id); + other->cmds->add_colocated_node_scores(other, log_id, &work, + constraint->node_attribute, + other_factor, flags); + pe__show_node_weights(true, NULL, log_id, work, rsc->cluster); + } + g_list_free(colocations); + + } else if (pcmk_is_set(flags, pcmk__coloc_select_active)) { + pe_rsc_info(rsc, "%s: Rolling back optional scores from %s", + log_id, rsc->id); + g_hash_table_destroy(work); + pe__clear_resource_flags(rsc, pe_rsc_merging); + return; + } + + + if (pcmk_is_set(flags, pcmk__coloc_select_nonnegative)) { + pe_node_t *node = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, work); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (node->weight == INFINITY_HACK) { + node->weight = 1; + } + } + } + + if (*nodes != NULL) { + g_hash_table_destroy(*nodes); + } + *nodes = work; + + pe__clear_resource_flags(rsc, pe_rsc_merging); +} + +/*! + * \internal + * \brief Apply a "with this" colocation to a resource's allowed node scores + * + * \param[in,out] data Colocation to apply + * \param[in,out] user_data Resource being assigned + */ +void +pcmk__add_dependent_scores(gpointer data, gpointer user_data) +{ + pcmk__colocation_t *colocation = (pcmk__colocation_t *) data; + pe_resource_t *rsc = (pe_resource_t *) user_data; + + pe_resource_t *other = colocation->dependent; + const float factor = colocation->score / (float) INFINITY; + uint32_t flags = pcmk__coloc_select_active; + + if (!pcmk__colocation_has_influence(colocation, NULL)) { + return; + } + if (rsc->variant == pe_clone) { + flags |= pcmk__coloc_select_nonnegative; + } + pe_rsc_trace(rsc, + "%s: Incorporating attenuated %s assignment scores due " + "to colocation %s", rsc->id, other->id, colocation->id); + other->cmds->add_colocated_node_scores(other, rsc->id, &rsc->allowed_nodes, + colocation->node_attribute, factor, + flags); +} + +/*! + * \internal + * \brief Get all colocations affecting a resource as the primary + * + * \param[in] rsc Resource to get colocations for + * + * \return Newly allocated list of colocations affecting \p rsc as primary + * + * \note This is a convenience wrapper for the with_this_colocations() method. + */ +GList * +pcmk__with_this_colocations(const pe_resource_t *rsc) +{ + GList *list = NULL; + + rsc->cmds->with_this_colocations(rsc, rsc, &list); + return list; +} + +/*! + * \internal + * \brief Get all colocations affecting a resource as the dependent + * + * \param[in] rsc Resource to get colocations for + * + * \return Newly allocated list of colocations affecting \p rsc as dependent + * + * \note This is a convenience wrapper for the this_with_colocations() method. + */ +GList * +pcmk__this_with_colocations(const pe_resource_t *rsc) +{ + GList *list = NULL; + + rsc->cmds->this_with_colocations(rsc, rsc, &list); + return list; +} diff --git a/lib/pacemaker/pcmk_sched_constraints.c b/lib/pacemaker/pcmk_sched_constraints.c new file mode 100644 index 0000000..bae6827 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_constraints.c @@ -0,0 +1,421 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> +#include <sys/types.h> +#include <stdbool.h> +#include <regex.h> +#include <glib.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> +#include <crm/common/iso8601.h> +#include <crm/pengine/status.h> +#include <crm/pengine/internal.h> +#include <crm/pengine/rules.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +static bool +evaluate_lifetime(xmlNode *lifetime, pe_working_set_t *data_set) +{ + bool result = FALSE; + crm_time_t *next_change = crm_time_new_undefined(); + + result = pe_evaluate_rules(lifetime, NULL, data_set->now, next_change); + if (crm_time_is_defined(next_change)) { + time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); + + pe__update_recheck_time(recheck, data_set); + } + crm_time_free(next_change); + return result; +} + +/*! + * \internal + * \brief Unpack constraints from XML + * + * Given a cluster working set, unpack all constraints from its input XML into + * data structures. + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__unpack_constraints(pe_working_set_t *data_set) +{ + xmlNode *xml_constraints = pcmk_find_cib_element(data_set->input, + XML_CIB_TAG_CONSTRAINTS); + + for (xmlNode *xml_obj = pcmk__xe_first_child(xml_constraints); + xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { + + xmlNode *lifetime = NULL; + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + const char *tag = crm_element_name(xml_obj); + + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " + XML_ATTR_ID, tag); + continue; + } + + crm_trace("Unpacking %s constraint '%s'", tag, id); + + lifetime = first_named_child(xml_obj, "lifetime"); + if (lifetime != NULL) { + pcmk__config_warn("Support for 'lifetime' attribute (in %s) is " + "deprecated (the rules it contains should " + "instead be direct descendants of the " + "constraint object)", id); + } + + if ((lifetime != NULL) && !evaluate_lifetime(lifetime, data_set)) { + crm_info("Constraint %s %s is not active", tag, id); + + } else if (pcmk__str_eq(XML_CONS_TAG_RSC_ORDER, tag, pcmk__str_casei)) { + pcmk__unpack_ordering(xml_obj, data_set); + + } else if (pcmk__str_eq(XML_CONS_TAG_RSC_DEPEND, tag, pcmk__str_casei)) { + pcmk__unpack_colocation(xml_obj, data_set); + + } else if (pcmk__str_eq(XML_CONS_TAG_RSC_LOCATION, tag, pcmk__str_casei)) { + pcmk__unpack_location(xml_obj, data_set); + + } else if (pcmk__str_eq(XML_CONS_TAG_RSC_TICKET, tag, pcmk__str_casei)) { + pcmk__unpack_rsc_ticket(xml_obj, data_set); + + } else { + pe_err("Unsupported constraint type: %s", tag); + } + } +} + +pe_resource_t * +pcmk__find_constraint_resource(GList *rsc_list, const char *id) +{ + GList *rIter = NULL; + + for (rIter = rsc_list; id && rIter; rIter = rIter->next) { + pe_resource_t *parent = rIter->data; + pe_resource_t *match = parent->fns->find_rsc(parent, id, NULL, + pe_find_renamed); + + if (match != NULL) { + if(!pcmk__str_eq(match->id, id, pcmk__str_casei)) { + /* We found an instance of a clone instead */ + match = uber_parent(match); + crm_debug("Found %s for %s", match->id, id); + } + return match; + } + } + crm_trace("No match for %s", id); + return NULL; +} + +/*! + * \internal + * \brief Check whether an ID references a resource tag + * + * \param[in] data_set Cluster working set + * \param[in] id Tag ID to search for + * \param[out] tag Where to store tag, if found + * + * \return true if ID refers to a tagged resource or resource set template, + * otherwise false + */ +static bool +find_constraint_tag(const pe_working_set_t *data_set, const char *id, + pe_tag_t **tag) +{ + *tag = NULL; + + // Check whether id refers to a resource set template + if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id, + NULL, (gpointer *) tag)) { + if (*tag == NULL) { + crm_warn("No resource is derived from template '%s'", id); + return false; + } + return true; + } + + // If not, check whether id refers to a tag + if (g_hash_table_lookup_extended(data_set->tags, id, + NULL, (gpointer *) tag)) { + if (*tag == NULL) { + crm_warn("No resource is tagged with '%s'", id); + return false; + } + return true; + } + + crm_warn("No template or tag named '%s'", id); + return false; +} + +/*! + * \brief + * \internal Check whether an ID refers to a valid resource or tag + * + * \param[in] data_set Cluster working set + * \param[in] id ID to search for + * \param[out] rsc Where to store resource, if found (or NULL to skip + * searching resources) + * \param[out] tag Where to store tag, if found (or NULL to skip searching + * tags) + * + * \return true if id refers to a resource (possibly indirectly via a tag) + */ +bool +pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, const char *id, + pe_resource_t **rsc, pe_tag_t **tag) +{ + if (rsc != NULL) { + *rsc = pcmk__find_constraint_resource(data_set->resources, id); + if (*rsc != NULL) { + return true; + } + } + + if ((tag != NULL) && find_constraint_tag(data_set, id, tag)) { + return true; + } + + return false; +} + +/*! + * \internal + * \brief Replace any resource tags with equivalent resource_ref entries + * + * If a given constraint has resource sets, check each set for resource_ref + * entries that list tags rather than resource IDs, and replace any found with + * resource_ref entries for the corresponding resource IDs. + * + * \param[in,out] xml_obj Constraint XML + * \param[in] data_set Cluster working set + * + * \return Equivalent XML with resource tags replaced (or NULL if none) + * \note It is the caller's responsibility to free the result with free_xml(). + */ +xmlNode * +pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pe_working_set_t *data_set) +{ + xmlNode *new_xml = NULL; + bool any_refs = false; + + // Short-circuit if there are no sets + if (first_named_child(xml_obj, XML_CONS_TAG_RSC_SET) == NULL) { + return NULL; + } + + new_xml = copy_xml(xml_obj); + + for (xmlNode *set = first_named_child(new_xml, XML_CONS_TAG_RSC_SET); + set != NULL; set = crm_next_same_xml(set)) { + + GList *tag_refs = NULL; + GList *gIter = NULL; + + for (xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + pe_resource_t *rsc = NULL; + pe_tag_t *tag = NULL; + + if (!pcmk__valid_resource_or_tag(data_set, ID(xml_rsc), &rsc, + &tag)) { + pcmk__config_err("Ignoring resource sets for constraint '%s' " + "because '%s' is not a valid resource or tag", + ID(xml_obj), ID(xml_rsc)); + free_xml(new_xml); + return NULL; + + } else if (rsc) { + continue; + + } else if (tag) { + /* The resource_ref under the resource_set references a template/tag */ + xmlNode *last_ref = xml_rsc; + + /* A sample: + + Original XML: + + <resource_set id="tag1-colocation-0" sequential="true"> + <resource_ref id="rsc1"/> + <resource_ref id="tag1"/> + <resource_ref id="rsc4"/> + </resource_set> + + Now we are appending rsc2 and rsc3 which are tagged with tag1 right after it: + + <resource_set id="tag1-colocation-0" sequential="true"> + <resource_ref id="rsc1"/> + <resource_ref id="tag1"/> + <resource_ref id="rsc2"/> + <resource_ref id="rsc3"/> + <resource_ref id="rsc4"/> + </resource_set> + + */ + + for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { + const char *obj_ref = (const char *) gIter->data; + xmlNode *new_rsc_ref = NULL; + + new_rsc_ref = xmlNewDocRawNode(getDocPtr(set), NULL, + (pcmkXmlStr) XML_TAG_RESOURCE_REF, NULL); + crm_xml_add(new_rsc_ref, XML_ATTR_ID, obj_ref); + xmlAddNextSibling(last_ref, new_rsc_ref); + + last_ref = new_rsc_ref; + } + + any_refs = true; + + /* Freeing the resource_ref now would break the XML child + * iteration, so just remember it for freeing later. + */ + tag_refs = g_list_append(tag_refs, xml_rsc); + } + } + + /* Now free '<resource_ref id="tag1"/>', and finally get: + + <resource_set id="tag1-colocation-0" sequential="true"> + <resource_ref id="rsc1"/> + <resource_ref id="rsc2"/> + <resource_ref id="rsc3"/> + <resource_ref id="rsc4"/> + </resource_set> + + */ + for (gIter = tag_refs; gIter != NULL; gIter = gIter->next) { + xmlNode *tag_ref = gIter->data; + + free_xml(tag_ref); + } + g_list_free(tag_refs); + } + + if (!any_refs) { + free_xml(new_xml); + new_xml = NULL; + } + return new_xml; +} + +/*! + * \internal + * \brief Convert a tag into a resource set of tagged resources + * + * \param[in,out] xml_obj Constraint XML + * \param[out] rsc_set Where to store resource set XML created based on tag + * \param[in] attr Name of XML attribute containing resource or tag ID + * \param[in] convert_rsc Convert to set even if \p attr references a resource + * \param[in] data_set Cluster working set + */ +bool +pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, + bool convert_rsc, const pe_working_set_t *data_set) +{ + const char *cons_id = NULL; + const char *id = NULL; + + pe_resource_t *rsc = NULL; + pe_tag_t *tag = NULL; + + *rsc_set = NULL; + + CRM_CHECK((xml_obj != NULL) && (attr != NULL), return false); + + cons_id = ID(xml_obj); + if (cons_id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return false; + } + + id = crm_element_value(xml_obj, attr); + if (id == NULL) { + return true; + } + + if (!pcmk__valid_resource_or_tag(data_set, id, &rsc, &tag)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", cons_id, id); + return false; + + } else if (tag) { + GList *gIter = NULL; + + /* A template/tag is referenced by the "attr" attribute (first, then, rsc or with-rsc). + Add the template/tag's corresponding "resource_set" which contains the resources derived + from it or tagged with it under the constraint. */ + *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); + crm_xml_add(*rsc_set, XML_ATTR_ID, id); + + for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { + const char *obj_ref = (const char *) gIter->data; + xmlNode *rsc_ref = NULL; + + rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); + crm_xml_add(rsc_ref, XML_ATTR_ID, obj_ref); + } + + /* Set sequential="false" for the resource_set */ + pcmk__xe_set_bool_attr(*rsc_set, "sequential", false); + + } else if ((rsc != NULL) && convert_rsc) { + /* Even a regular resource is referenced by "attr", convert it into a resource_set. + Because the other side of the constraint could be a template/tag reference. */ + xmlNode *rsc_ref = NULL; + + *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); + crm_xml_add(*rsc_set, XML_ATTR_ID, id); + + rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); + crm_xml_add(rsc_ref, XML_ATTR_ID, id); + + } else { + return true; + } + + /* Remove the "attr" attribute referencing the template/tag */ + if (*rsc_set != NULL) { + xml_remove_prop(xml_obj, attr); + } + + return true; +} + +/*! + * \internal + * \brief Create constraints inherent to resource types + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__create_internal_constraints(pe_working_set_t *data_set) +{ + crm_trace("Create internal constraints"); + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + rsc->cmds->internal_constraints(rsc); + } +} diff --git a/lib/pacemaker/pcmk_sched_fencing.c b/lib/pacemaker/pcmk_sched_fencing.c new file mode 100644 index 0000000..c912640 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_fencing.c @@ -0,0 +1,493 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> + +#include <crm/crm.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Check whether a resource is known on a particular node + * + * \param[in] rsc Resource to check + * \param[in] node Node to check + * + * \return TRUE if resource (or parent if an anonymous clone) is known + */ +static bool +rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node) +{ + if (pe_hash_table_lookup(rsc->known_on, node->details->id)) { + return TRUE; + + } else if ((rsc->variant == pe_native) + && pe_rsc_is_anon_clone(rsc->parent) + && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) { + /* We check only the parent, not the uber-parent, because we cannot + * assume that the resource is known if it is in an anonymously cloned + * group (which may be only partially known). + */ + return TRUE; + } + return FALSE; +} + +/*! + * \internal + * \brief Order a resource's start and promote actions relative to fencing + * + * \param[in,out] rsc Resource to be ordered + * \param[in,out] stonith_op Fence action + */ +static void +order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op) +{ + pe_node_t *target; + GList *gIter = NULL; + + CRM_CHECK(stonith_op && stonith_op->node, return); + target = stonith_op->node; + + for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + switch (action->needs) { + case rsc_req_nothing: + // Anything other than start or promote requires nothing + break; + + case rsc_req_stonith: + order_actions(stonith_op, action, pe_order_optional); + break; + + case rsc_req_quorum: + if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) + && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) + && !rsc_is_known_on(rsc, target)) { + + /* If we don't know the status of the resource on the node + * we're about to shoot, we have to assume it may be active + * there. Order the resource start after the fencing. This + * is analogous to waiting for all the probes for a resource + * to complete before starting it. + * + * The most likely explanation is that the DC died and took + * its status with it. + */ + pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, + pe__node_name(target)); + order_actions(stonith_op, action, + pe_order_optional | pe_order_runnable_left); + } + break; + } + } +} + +/*! + * \internal + * \brief Order a resource's stop and demote actions relative to fencing + * + * \param[in,out] rsc Resource to be ordered + * \param[in,out] stonith_op Fence action + */ +static void +order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op) +{ + GList *gIter = NULL; + GList *action_list = NULL; + bool order_implicit = false; + + pe_resource_t *top = uber_parent(rsc); + pe_action_t *parent_stop = NULL; + pe_node_t *target; + + CRM_CHECK(stonith_op && stonith_op->node, return); + target = stonith_op->node; + + /* Get a list of stop actions potentially implied by the fencing */ + action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE); + + /* If resource requires fencing, implicit actions must occur after fencing. + * + * Implied stops and demotes of resources running on guest nodes are always + * ordered after fencing, even if the resource does not require fencing, + * because guest node "fencing" is actually just a resource stop. + */ + if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing) + || pe__is_guest_node(target)) { + + order_implicit = true; + } + + if (action_list && order_implicit) { + parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); + } + + for (gIter = action_list; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + // The stop would never complete, so convert it into a pseudo-action. + pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable); + + if (order_implicit) { + pe__set_action_flags(action, pe_action_implied_by_stonith); + + /* Order the stonith before the parent stop (if any). + * + * Also order the stonith before the resource stop, unless the + * resource is inside a bundle -- that would cause a graph loop. + * We can rely on the parent stop's ordering instead. + * + * User constraints must not order a resource in a guest node + * relative to the guest node container resource. The + * pe_order_preserve flag marks constraints as generated by the + * cluster and thus immune to that check (and is irrelevant if + * target is not a guest). + */ + if (!pe_rsc_is_bundled(rsc)) { + order_actions(stonith_op, action, pe_order_preserve); + } + order_actions(stonith_op, parent_stop, pe_order_preserve); + } + + if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + crm_notice("Stop of failed resource %s is implicit %s %s is fenced", + rsc->id, (order_implicit? "after" : "because"), + pe__node_name(target)); + } else { + crm_info("%s is implicit %s %s is fenced", + action->uuid, (order_implicit? "after" : "because"), + pe__node_name(target)); + } + + if (pcmk_is_set(rsc->flags, pe_rsc_notify)) { + pe__order_notifs_after_fencing(action, rsc, stonith_op); + } + +#if 0 + /* It might be a good idea to stop healthy resources on a node about to + * be fenced, when possible. + * + * However, fencing must be done before a failed resource's + * (pseudo-)stop action, so that could create a loop. For example, given + * a group of A and B running on node N with a failed stop of B: + * + * fence N -> stop B (pseudo-op) -> stop A -> fence N + * + * The block below creates the stop A -> fence N ordering and therefore + * must (at least for now) be disabled. Instead, run the block above and + * treat all resources on N as B would be (i.e., as a pseudo-op after + * the fencing). + * + * @TODO Maybe break the "A requires B" dependency in + * pcmk__update_action_for_orderings() and use this block for healthy + * resources instead of the above. + */ + crm_info("Moving healthy resource %s off %s before fencing", + rsc->id, pe__node_name(node)); + pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, + strdup(CRM_OP_FENCE), stonith_op, + pe_order_optional, rsc->cluster); +#endif + } + + g_list_free(action_list); + + /* Get a list of demote actions potentially implied by the fencing */ + action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE); + + for (gIter = action_list; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + if (!(action->node->details->online) || action->node->details->unclean + || pcmk_is_set(rsc->flags, pe_rsc_failed)) { + + if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + pe_rsc_info(rsc, + "Demote of failed resource %s is implicit after %s is fenced", + rsc->id, pe__node_name(target)); + } else { + pe_rsc_info(rsc, "%s is implicit after %s is fenced", + action->uuid, pe__node_name(target)); + } + + /* The demote would never complete and is now implied by the + * fencing, so convert it into a pseudo-action. + */ + pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable); + + if (pe_rsc_is_bundled(rsc)) { + // Do nothing, let recovery be ordered after parent's implied stop + + } else if (order_implicit) { + order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); + } + } + } + + g_list_free(action_list); +} + +/*! + * \internal + * \brief Order resource actions properly relative to fencing + * + * \param[in,out] rsc Resource whose actions should be ordered + * \param[in,out] stonith_op Fencing operation to be ordered against + */ +static void +rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op) +{ + if (rsc->children) { + GList *gIter = NULL; + + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + rsc_stonith_ordering(child_rsc, stonith_op); + } + + } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe_rsc_trace(rsc, + "Skipping fencing constraints for unmanaged resource: %s", + rsc->id); + + } else { + order_start_vs_fencing(rsc, stonith_op); + order_stop_vs_fencing(rsc, stonith_op); + } +} + +/*! + * \internal + * \brief Order all actions appropriately relative to a fencing operation + * + * Ensure start operations of affected resources are ordered after fencing, + * imply stop and demote operations of affected resources by marking them as + * pseudo-actions, etc. + * + * \param[in,out] stonith_op Fencing operation + * \param[in,out] data_set Working set of cluster + */ +void +pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set) +{ + CRM_CHECK(stonith_op && data_set, return); + for (GList *r = data_set->resources; r != NULL; r = r->next) { + rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op); + } +} + +/*! + * \internal + * \brief Order an action after unfencing + * + * \param[in] rsc Resource that action is for + * \param[in,out] node Node that action is on + * \param[in,out] action Action to be ordered after unfencing + * \param[in] order Ordering flags + */ +void +pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, + pe_action_t *action, enum pe_ordering order) +{ + /* When unfencing is in use, we order unfence actions before any probe or + * start of resources that require unfencing, and also of fence devices. + * + * This might seem to violate the principle that fence devices require + * only quorum. However, fence agents that unfence often don't have enough + * information to even probe or start unless the node is first unfenced. + */ + if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device) + && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)) + || pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { + + /* Start with an optional ordering. Requiring unfencing would result in + * the node being unfenced, and all its resources being stopped, + * whenever a new resource is added -- which would be highly suboptimal. + */ + pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, + node->details->data_set); + + order_actions(unfence, action, order); + + if (!pcmk__node_unfenced(node)) { + // But unfencing is required if it has never been done + char *reason = crm_strdup_printf("required by %s %s", + rsc->id, action->task); + + trigger_unfencing(NULL, node, reason, NULL, + node->details->data_set); + free(reason); + } + } +} + +/*! + * \internal + * \brief Create pseudo-op for guest node fence, and order relative to it + * + * \param[in,out] node Guest node to fence + */ +void +pcmk__fence_guest(pe_node_t *node) +{ + pe_resource_t *container = NULL; + pe_action_t *stop = NULL; + pe_action_t *stonith_op = NULL; + + /* The fence action is just a label; we don't do anything differently for + * off vs. reboot. We specify it explicitly, rather than let it default to + * cluster's default action, because we are not _initiating_ fencing -- we + * are creating a pseudo-event to describe fencing that is already occurring + * by other means (container recovery). + */ + const char *fence_action = "off"; + + CRM_ASSERT(node != NULL); + + /* Check whether guest's container resource has any explicit stop or + * start (the stop may be implied by fencing of the guest's host). + */ + container = node->details->remote_rsc->container; + if (container) { + stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, + NULL); + + if (find_first_action(container->actions, NULL, CRMD_ACTION_START, + NULL)) { + fence_action = "reboot"; + } + } + + /* Create a fence pseudo-event, so we have an event to order actions + * against, and the controller can always detect it. + */ + stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", + FALSE, node->details->data_set); + pe__set_action_flags(stonith_op, pe_action_pseudo|pe_action_runnable); + + /* We want to imply stops/demotes after the guest is stopped, not wait until + * it is restarted, so we always order pseudo-fencing after stop, not start + * (even though start might be closer to what is done for a real reboot). + */ + if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) { + pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, + NULL, FALSE, + node->details->data_set); + + crm_info("Implying guest %s is down (action %d) after %s fencing", + pe__node_name(node), stonith_op->id, + pe__node_name(stop->node)); + order_actions(parent_stonith_op, stonith_op, + pe_order_runnable_left|pe_order_implies_then); + + } else if (stop) { + order_actions(stop, stonith_op, + pe_order_runnable_left|pe_order_implies_then); + crm_info("Implying guest %s is down (action %d) " + "after container %s is stopped (action %d)", + pe__node_name(node), stonith_op->id, + container->id, stop->id); + } else { + /* If we're fencing the guest node but there's no stop for the guest + * resource, we must think the guest is already stopped. However, we may + * think so because its resource history was just cleaned. To avoid + * unnecessarily considering the guest node down if it's really up, + * order the pseudo-fencing after any stop of the connection resource, + * which will be ordered after any container (re-)probe. + */ + stop = find_first_action(node->details->remote_rsc->actions, NULL, + RSC_STOP, NULL); + + if (stop) { + order_actions(stop, stonith_op, pe_order_optional); + crm_info("Implying guest %s is down (action %d) " + "after connection is stopped (action %d)", + pe__node_name(node), stonith_op->id, stop->id); + } else { + /* Not sure why we're fencing, but everything must already be + * cleanly stopped. + */ + crm_info("Implying guest %s is down (action %d) ", + pe__node_name(node), stonith_op->id); + } + } + + // Order/imply other actions relative to pseudo-fence as with real fence + pcmk__order_vs_fence(stonith_op, node->details->data_set); +} + +/*! + * \internal + * \brief Check whether node has already been unfenced + * + * \param[in] node Node to check + * + * \return true if node has a nonzero #node-unfenced attribute (or none), + * otherwise false + */ +bool +pcmk__node_unfenced(const pe_node_t *node) +{ + const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED); + + return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches); +} + +/*! + * \internal + * \brief Order a resource's start and stop relative to unfencing of a node + * + * \param[in,out] data Node that could be unfenced + * \param[in,out] user_data Resource to order + */ +void +pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data) +{ + pe_node_t *node = (pe_node_t *) data; + pe_resource_t *rsc = (pe_resource_t *) user_data; + + pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false, + rsc->cluster); + + crm_debug("Ordering any stops of %s before %s, and any starts after", + rsc->id, unfence->uuid); + + /* + * It would be more efficient to order clone resources once, + * rather than order each instance, but ordering the instance + * allows us to avoid unnecessary dependencies that might conflict + * with user constraints. + * + * @TODO: This constraint can still produce a transition loop if the + * resource has a stop scheduled on the node being unfenced, and + * there is a user ordering constraint to start some other resource + * (which will be ordered after the unfence) before stopping this + * resource. An example is "start some slow-starting cloned service + * before stopping an associated virtual IP that may be moving to + * it": + * stop this -> unfencing -> start that -> stop this + */ + pcmk__new_ordering(rsc, stop_key(rsc), NULL, + NULL, strdup(unfence->uuid), unfence, + pe_order_optional|pe_order_same_node, + rsc->cluster); + + pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, + rsc, start_key(rsc), NULL, + pe_order_implies_then_on_node|pe_order_same_node, + rsc->cluster); +} diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c new file mode 100644 index 0000000..cb139f7 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_group.c @@ -0,0 +1,865 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> + +#include <crm/msg_xml.h> + +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Assign a group resource to a node + * + * \param[in,out] rsc Group resource to assign to a node + * \param[in] prefer Node to prefer, if all else is equal + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node + */ +pe_node_t * +pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer) +{ + pe_node_t *first_assigned_node = NULL; + pe_resource_t *first_member = NULL; + + CRM_ASSERT(rsc != NULL); + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return rsc->allocated_to; // Assignment already done + } + if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "Assignment dependency loop detected involving %s", + rsc->id); + return NULL; + } + + if (rsc->children == NULL) { + // No members to assign + pe__clear_resource_flags(rsc, pe_rsc_provisional); + return NULL; + } + + pe__set_resource_flags(rsc, pe_rsc_allocating); + first_member = (pe_resource_t *) rsc->children->data; + rsc->role = first_member->role; + + pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), + rsc, __func__, rsc->allowed_nodes, rsc->cluster); + + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + pe_node_t *node = NULL; + + pe_rsc_trace(rsc, "Assigning group %s member %s", + rsc->id, member->id); + node = member->cmds->assign(member, prefer); + if (first_assigned_node == NULL) { + first_assigned_node = node; + } + } + + pe__set_next_role(rsc, first_member->next_role, "first group member"); + pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); + + if (!pe__group_flag_is_set(rsc, pe__group_colocated)) { + return NULL; + } + return first_assigned_node; +} + +/*! + * \internal + * \brief Create a pseudo-operation for a group as an ordering point + * + * \param[in,out] group Group resource to create action for + * \param[in] action Action name + * + * \return Newly created pseudo-operation + */ +static pe_action_t * +create_group_pseudo_op(pe_resource_t *group, const char *action) +{ + pe_action_t *op = custom_action(group, pcmk__op_key(group->id, action, 0), + action, NULL, TRUE, TRUE, group->cluster); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); + return op; +} + +/*! + * \internal + * \brief Create all actions needed for a given group resource + * + * \param[in,out] rsc Group resource to create actions for + */ +void +pcmk__group_create_actions(pe_resource_t *rsc) +{ + CRM_ASSERT(rsc != NULL); + + pe_rsc_trace(rsc, "Creating actions for group %s", rsc->id); + + // Create actions for individual group members + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + + member->cmds->create_actions(member); + } + + // Create pseudo-actions for group itself to serve as ordering points + create_group_pseudo_op(rsc, RSC_START); + create_group_pseudo_op(rsc, RSC_STARTED); + create_group_pseudo_op(rsc, RSC_STOP); + create_group_pseudo_op(rsc, RSC_STOPPED); + if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE))) { + create_group_pseudo_op(rsc, RSC_DEMOTE); + create_group_pseudo_op(rsc, RSC_DEMOTED); + create_group_pseudo_op(rsc, RSC_PROMOTE); + create_group_pseudo_op(rsc, RSC_PROMOTED); + } +} + +// User data for member_internal_constraints() +struct member_data { + // These could be derived from member but this avoids some function calls + bool ordered; + bool colocated; + bool promotable; + + pe_resource_t *last_active; + pe_resource_t *previous_member; +}; + +/*! + * \internal + * \brief Create implicit constraints needed for a group member + * + * \param[in,out] data Group member to create implicit constraints for + * \param[in,out] user_data Member data (struct member_data *) + */ +static void +member_internal_constraints(gpointer data, gpointer user_data) +{ + pe_resource_t *member = (pe_resource_t *) data; + struct member_data *member_data = (struct member_data *) user_data; + + // For ordering demote vs demote or stop vs stop + uint32_t down_flags = pe_order_implies_first_printed; + + // For ordering demote vs demoted or stop vs stopped + uint32_t post_down_flags = pe_order_implies_then_printed; + + // Create the individual member's implicit constraints + member->cmds->internal_constraints(member); + + if (member_data->previous_member == NULL) { + // This is first member + if (member_data->ordered) { + pe__set_order_flags(down_flags, pe_order_optional); + post_down_flags = pe_order_implies_then; + } + + } else if (member_data->colocated) { + // Colocate this member with the previous one + pcmk__new_colocation("group:internal_colocation", NULL, INFINITY, + member, member_data->previous_member, NULL, NULL, + pcmk_is_set(member->flags, pe_rsc_critical), + member->cluster); + } + + if (member_data->promotable) { + // Demote group -> demote member -> group is demoted + pcmk__order_resource_actions(member->parent, RSC_DEMOTE, + member, RSC_DEMOTE, down_flags); + pcmk__order_resource_actions(member, RSC_DEMOTE, + member->parent, RSC_DEMOTED, + post_down_flags); + + // Promote group -> promote member -> group is promoted + pcmk__order_resource_actions(member, RSC_PROMOTE, + member->parent, RSC_PROMOTED, + pe_order_runnable_left + |pe_order_implies_then + |pe_order_implies_then_printed); + pcmk__order_resource_actions(member->parent, RSC_PROMOTE, + member, RSC_PROMOTE, + pe_order_implies_first_printed); + } + + // Stop group -> stop member -> group is stopped + pcmk__order_stops(member->parent, member, down_flags); + pcmk__order_resource_actions(member, RSC_STOP, member->parent, RSC_STOPPED, + post_down_flags); + + // Start group -> start member -> group is started + pcmk__order_starts(member->parent, member, pe_order_implies_first_printed); + pcmk__order_resource_actions(member, RSC_START, member->parent, RSC_STARTED, + pe_order_runnable_left + |pe_order_implies_then + |pe_order_implies_then_printed); + + if (!member_data->ordered) { + pcmk__order_starts(member->parent, member, + pe_order_implies_then + |pe_order_runnable_left + |pe_order_implies_first_printed); + if (member_data->promotable) { + pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member, + RSC_PROMOTE, + pe_order_implies_then + |pe_order_runnable_left + |pe_order_implies_first_printed); + } + + } else if (member_data->previous_member == NULL) { + pcmk__order_starts(member->parent, member, pe_order_none); + if (member_data->promotable) { + pcmk__order_resource_actions(member->parent, RSC_PROMOTE, member, + RSC_PROMOTE, pe_order_none); + } + + } else { + // Order this member relative to the previous one + + pcmk__order_starts(member_data->previous_member, member, + pe_order_implies_then|pe_order_runnable_left); + pcmk__order_stops(member, member_data->previous_member, + pe_order_optional|pe_order_restart); + + /* In unusual circumstances (such as adding a new member to the middle + * of a group with unmanaged later members), this member may be active + * while the previous (new) member is inactive. In this situation, the + * usual restart orderings will be irrelevant, so we need to order this + * member's stop before the previous member's start. + */ + if ((member->running_on != NULL) + && (member_data->previous_member->running_on == NULL)) { + pcmk__order_resource_actions(member, RSC_STOP, + member_data->previous_member, RSC_START, + pe_order_implies_first + |pe_order_runnable_left); + } + + if (member_data->promotable) { + pcmk__order_resource_actions(member_data->previous_member, + RSC_PROMOTE, member, RSC_PROMOTE, + pe_order_implies_then + |pe_order_runnable_left); + pcmk__order_resource_actions(member, RSC_DEMOTE, + member_data->previous_member, + RSC_DEMOTE, pe_order_optional); + } + } + + // Make sure partially active groups shut down in sequence + if (member->running_on != NULL) { + if (member_data->ordered && (member_data->previous_member != NULL) + && (member_data->previous_member->running_on == NULL) + && (member_data->last_active != NULL) + && (member_data->last_active->running_on != NULL)) { + pcmk__order_stops(member, member_data->last_active, pe_order_optional); + } + member_data->last_active = member; + } + + member_data->previous_member = member; +} + +/*! + * \internal + * \brief Create implicit constraints needed for a group resource + * + * \param[in,out] rsc Group resource to create implicit constraints for + */ +void +pcmk__group_internal_constraints(pe_resource_t *rsc) +{ + struct member_data member_data = { false, }; + + CRM_ASSERT(rsc != NULL); + + /* Order group pseudo-actions relative to each other for restarting: + * stop group -> group is stopped -> start group -> group is started + */ + pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, + pe_order_runnable_left); + pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, + pe_order_runnable_left); + + member_data.ordered = pe__group_flag_is_set(rsc, pe__group_ordered); + member_data.colocated = pe__group_flag_is_set(rsc, pe__group_colocated); + member_data.promotable = pcmk_is_set(pe__const_top_resource(rsc, false)->flags, + pe_rsc_promotable); + g_list_foreach(rsc->children, member_internal_constraints, &member_data); +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint for a group with some other resource, apply the + * score to the dependent's allowed node weights (if we are still placing + * resources) or priority (if we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent group resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + */ +static void +colocate_group_with(pe_resource_t *dependent, const pe_resource_t *primary, + const pcmk__colocation_t *colocation) +{ + pe_resource_t *member = NULL; + + if (dependent->children == NULL) { + return; + } + + pe_rsc_trace(primary, "Processing %s (group %s with %s) for dependent", + colocation->id, dependent->id, primary->id); + + if (pe__group_flag_is_set(dependent, pe__group_colocated)) { + // Colocate first member (internal colocations will handle the rest) + member = (pe_resource_t *) dependent->children->data; + member->cmds->apply_coloc_score(member, primary, colocation, true); + return; + } + + if (colocation->score >= INFINITY) { + pcmk__config_err("%s: Cannot perform mandatory colocation between " + "non-colocated group and %s", + dependent->id, primary->id); + return; + } + + // Colocate each member individually + for (GList *iter = dependent->children; iter != NULL; iter = iter->next) { + member = (pe_resource_t *) iter->data; + member->cmds->apply_coloc_score(member, primary, colocation, true); + } +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint for some other resource with a group, apply the + * score to the dependent's allowed node weights (if we are still placing + * resources) or priority (if we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary group resource in colocation + * \param[in] colocation Colocation constraint to apply + */ +static void +colocate_with_group(pe_resource_t *dependent, const pe_resource_t *primary, + const pcmk__colocation_t *colocation) +{ + pe_resource_t *member = NULL; + + pe_rsc_trace(primary, + "Processing colocation %s (%s with group %s) for primary", + colocation->id, dependent->id, primary->id); + + if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { + return; + } + + if (pe__group_flag_is_set(primary, pe__group_colocated)) { + + if (colocation->score >= INFINITY) { + /* For mandatory colocations, the entire group must be assignable + * (and in the specified role if any), so apply the colocation based + * on the last member. + */ + member = pe__last_group_member(primary); + } else if (primary->children != NULL) { + /* For optional colocations, whether the group is partially or fully + * up doesn't matter, so apply the colocation based on the first + * member. + */ + member = (pe_resource_t *) primary->children->data; + } + if (member == NULL) { + return; // Nothing to colocate with + } + + member->cmds->apply_coloc_score(dependent, member, colocation, false); + return; + } + + if (colocation->score >= INFINITY) { + pcmk__config_err("%s: Cannot perform mandatory colocation with" + " non-colocated group %s", + dependent->id, primary->id); + return; + } + + // Colocate dependent with each member individually + for (GList *iter = primary->children; iter != NULL; iter = iter->next) { + member = (pe_resource_t *) iter->data; + member->cmds->apply_coloc_score(dependent, member, colocation, false); + } +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint, apply its score to the dependent's + * allowed node weights (if we are still placing resources) or priority (if + * we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + * \param[in] for_dependent true if called on behalf of dependent + */ +void +pcmk__group_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent) +{ + CRM_ASSERT((dependent != NULL) && (primary != NULL) + && (colocation != NULL)); + + if (for_dependent) { + colocate_group_with(dependent, primary, colocation); + + } else { + // Method should only be called for primitive dependents + CRM_ASSERT(dependent->variant == pe_native); + + colocate_with_group(dependent, primary, colocation); + } +} + +/*! + * \internal + * \brief Return action flags for a given group resource action + * + * \param[in,out] action Group action to get flags for + * \param[in] node If not NULL, limit effects to this node + * + * \return Flags appropriate to \p action on \p node + */ +enum pe_action_flags +pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node) +{ + // Default flags for a group action + enum pe_action_flags flags = pe_action_optional + |pe_action_runnable + |pe_action_pseudo; + + CRM_ASSERT(action != NULL); + + // Update flags considering each member's own flags for same action + for (GList *iter = action->rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + + // Check whether member has the same action + enum action_tasks task = get_complex_task(member, action->task); + const char *task_s = task2text(task); + pe_action_t *member_action = find_first_action(member->actions, NULL, + task_s, node); + + if (member_action != NULL) { + enum pe_action_flags member_flags; + + member_flags = member->cmds->action_flags(member_action, node); + + // Group action is mandatory if any member action is + if (pcmk_is_set(flags, pe_action_optional) + && !pcmk_is_set(member_flags, pe_action_optional)) { + pe_rsc_trace(action->rsc, "%s is mandatory because %s is", + action->uuid, member_action->uuid); + pe__clear_raw_action_flags(flags, "group action", + pe_action_optional); + pe__clear_action_flags(action, pe_action_optional); + } + + // Group action is unrunnable if any member action is + if (!pcmk__str_eq(task_s, action->task, pcmk__str_none) + && pcmk_is_set(flags, pe_action_runnable) + && !pcmk_is_set(member_flags, pe_action_runnable)) { + + pe_rsc_trace(action->rsc, "%s is unrunnable because %s is", + action->uuid, member_action->uuid); + pe__clear_raw_action_flags(flags, "group action", + pe_action_runnable); + pe__clear_action_flags(action, pe_action_runnable); + } + + /* Group (pseudo-)actions other than stop or demote are unrunnable + * unless every member will do it. + */ + } else if ((task != stop_rsc) && (task != action_demote)) { + pe_rsc_trace(action->rsc, + "%s is not runnable because %s will not %s", + action->uuid, member->id, task_s); + pe__clear_raw_action_flags(flags, "group action", + pe_action_runnable); + } + } + + return flags; +} + +/*! + * \internal + * \brief Update two actions according to an ordering between them + * + * Given information about an ordering of two actions, update the actions' flags + * (and runnable_before members if appropriate) as appropriate for the ordering. + * Effects may cascade to other orderings involving the actions as well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this node + * (only used when interleaving instances) + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +uint32_t +pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set) +{ + uint32_t changed = pcmk__updated_none; + + CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL)); + + // Group method can be called only for group action as "then" action + CRM_ASSERT(then->rsc != NULL); + + // Update the actions for the group itself + changed |= pcmk__update_ordered_actions(first, then, node, flags, filter, + type, data_set); + + // Update the actions for each group member + for (GList *iter = then->rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + + pe_action_t *member_action = find_first_action(member->actions, NULL, + then->task, node); + + if (member_action != NULL) { + changed |= member->cmds->update_ordered_actions(first, + member_action, node, + flags, filter, type, + data_set); + } + } + return changed; +} + +/*! + * \internal + * \brief Apply a location constraint to a group's allowed node scores + * + * \param[in,out] rsc Group resource to apply constraint to + * \param[in,out] location Location constraint to apply + */ +void +pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location) +{ + GList *node_list_orig = NULL; + GList *node_list_copy = NULL; + bool reset_scores = true; + + CRM_ASSERT((rsc != NULL) && (location != NULL)); + + node_list_orig = location->node_list_rh; + node_list_copy = pcmk__copy_node_list(node_list_orig, true); + reset_scores = pe__group_flag_is_set(rsc, pe__group_colocated); + + // Apply the constraint for the group itself (updates node scores) + pcmk__apply_location(rsc, location); + + // Apply the constraint for each member + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + + member->cmds->apply_location(member, location); + + if (reset_scores) { + /* The first member of colocated groups needs to use the original + * node scores, but subsequent members should work on a copy, since + * the first member's scores already incorporate theirs. + */ + reset_scores = false; + location->node_list_rh = node_list_copy; + } + } + + location->node_list_rh = node_list_orig; + g_list_free_full(node_list_copy, free); +} + +// Group implementation of resource_alloc_functions_t:colocated_resources() +GList * +pcmk__group_colocated_resources(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, + GList *colocated_rscs) +{ + const pe_resource_t *member = NULL; + + CRM_ASSERT(rsc != NULL); + + if (orig_rsc == NULL) { + orig_rsc = rsc; + } + + if (pe__group_flag_is_set(rsc, pe__group_colocated) + || pe_rsc_is_clone(rsc->parent)) { + /* This group has colocated members and/or is cloned -- either way, + * add every child's colocated resources to the list. The first and last + * members will include the group's own colocations. + */ + colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc); + for (const GList *iter = rsc->children; + iter != NULL; iter = iter->next) { + + member = (const pe_resource_t *) iter->data; + colocated_rscs = member->cmds->colocated_resources(member, orig_rsc, + colocated_rscs); + } + + } else if (rsc->children != NULL) { + /* This group's members are not colocated, and the group is not cloned, + * so just add the group's own colocations to the list. + */ + colocated_rscs = pcmk__colocated_resources(rsc, orig_rsc, colocated_rscs); + } + + return colocated_rscs; +} + +// Group implementation of resource_alloc_functions_t:with_this_colocations() +void +pcmk__with_group_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) + +{ + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_group) + && (orig_rsc != NULL) && (list != NULL), + return); + + // Ignore empty groups + if (rsc->children == NULL) { + return; + } + + /* "With this" colocations are needed only for the group itself and for its + * last member. Add the group's colocations plus any relevant + * parent colocations if cloned. + */ + if ((rsc == orig_rsc) || (orig_rsc == pe__last_group_member(rsc))) { + crm_trace("Adding 'with %s' colocations to list for %s", + rsc->id, orig_rsc->id); + pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); + if (rsc->parent != NULL) { // Cloned group + rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, + list); + } + } +} + +// Group implementation of resource_alloc_functions_t:this_with_colocations() +void +pcmk__group_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_group) + && (orig_rsc != NULL) && (list != NULL), + return); + + // Ignore empty groups + if (rsc->children == NULL) { + return; + } + + /* Colocations for the group itself, or for its first member, consist of the + * group's colocations plus any relevant parent colocations if cloned. + */ + if ((rsc == orig_rsc) + || (orig_rsc == (const pe_resource_t *) rsc->children->data)) { + crm_trace("Adding '%s with' colocations to list for %s", + rsc->id, orig_rsc->id); + pcmk__add_this_with_list(list, rsc->rsc_cons); + if (rsc->parent != NULL) { // Cloned group + rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, + list); + } + return; + } + + /* Later group members honor the group's colocations indirectly, due to the + * internal group colocations that chain everything from the first member. + * However, if an earlier group member is unmanaged, this chaining will not + * happen, so the group's mandatory colocations must be explicitly added. + */ + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + const pe_resource_t *member = (const pe_resource_t *) iter->data; + + if (orig_rsc == member) { + break; // We've seen all earlier members, and none are unmanaged + } + + if (!pcmk_is_set(member->flags, pe_rsc_managed)) { + crm_trace("Adding mandatory '%s with' colocations to list for " + "member %s because earlier member %s is unmanaged", + rsc->id, orig_rsc->id, member->id); + for (const GList *cons_iter = rsc->rsc_cons; cons_iter != NULL; + cons_iter = cons_iter->next) { + const pcmk__colocation_t *colocation = NULL; + + colocation = (const pcmk__colocation_t *) cons_iter->data; + if (colocation->score == INFINITY) { + pcmk__add_this_with(list, colocation); + } + } + // @TODO Add mandatory (or all?) clone constraints if cloned + break; + } + } +} + +/*! + * \internal + * \brief Update nodes with scores of colocated resources' nodes + * + * Given a table of nodes and a resource, update the nodes' scores with the + * scores of the best nodes matching the attribute used for each of the + * resource's relevant colocations. + * + * \param[in,out] rsc Resource to check colocations for + * \param[in] log_id Resource ID to use in logs (if NULL, use \p rsc ID) + * \param[in,out] nodes Nodes to update + * \param[in] attr Colocation attribute (NULL to use default) + * \param[in] factor Incorporate scores multiplied by this factor + * \param[in] flags Bitmask of enum pcmk__coloc_select values + * + * \note The caller remains responsible for freeing \p *nodes. + */ +void +pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, + GHashTable **nodes, const char *attr, + float factor, uint32_t flags) +{ + pe_resource_t *member = NULL; + + CRM_CHECK((rsc != NULL) && (nodes != NULL), return); + + if (log_id == NULL) { + log_id = rsc->id; + } + + // Avoid infinite recursion + if (pcmk_is_set(rsc->flags, pe_rsc_merging)) { + pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", + log_id, rsc->id); + return; + } + pe__set_resource_flags(rsc, pe_rsc_merging); + + // Ignore empty groups (only possible with schema validation disabled) + if (rsc->children == NULL) { + return; + } + + /* Refer the operation to the first or last member as appropriate. + * + * cmp_resources() is the only caller that passes a NULL nodes table, + * and is also the only caller using pcmk__coloc_select_this_with. + * For "this with" colocations, the last member will recursively incorporate + * all the other members' "this with" colocations via the internal group + * colocations (and via the first member, the group's own colocations). + * + * For "with this" colocations, the first member works similarly. + */ + if (*nodes == NULL) { + member = pe__last_group_member(rsc); + } else { + member = rsc->children->data; + } + pe_rsc_trace(rsc, "%s: Merging scores from group %s using member %s " + "(at %.6f)", log_id, rsc->id, member->id, factor); + member->cmds->add_colocated_node_scores(member, log_id, nodes, attr, factor, + flags); + pe__clear_resource_flags(rsc, pe_rsc_merging); +} + +// Group implementation of resource_alloc_functions_t:add_utilization() +void +pcmk__group_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization) +{ + pe_resource_t *member = NULL; + + CRM_ASSERT((rsc != NULL) && (orig_rsc != NULL) && (utilization != NULL)); + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return; + } + + pe_rsc_trace(orig_rsc, "%s: Adding group %s as colocated utilization", + orig_rsc->id, rsc->id); + if (pe__group_flag_is_set(rsc, pe__group_colocated) + || pe_rsc_is_clone(rsc->parent)) { + // Every group member will be on same node, so sum all members + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + member = (pe_resource_t *) iter->data; + + if (pcmk_is_set(member->flags, pe_rsc_provisional) + && (g_list_find(all_rscs, member) == NULL)) { + member->cmds->add_utilization(member, orig_rsc, all_rscs, + utilization); + } + } + + } else if (rsc->children != NULL) { + // Just add first member's utilization + member = (pe_resource_t *) rsc->children->data; + if ((member != NULL) + && pcmk_is_set(member->flags, pe_rsc_provisional) + && (g_list_find(all_rscs, member) == NULL)) { + + member->cmds->add_utilization(member, orig_rsc, all_rscs, + utilization); + } + } +} + +// Group implementation of resource_alloc_functions_t:shutdown_lock() +void +pcmk__group_shutdown_lock(pe_resource_t *rsc) +{ + CRM_ASSERT(rsc != NULL); + + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *member = (pe_resource_t *) iter->data; + + member->cmds->shutdown_lock(member); + } +} diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c new file mode 100644 index 0000000..c880196 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -0,0 +1,1659 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +/* This file is intended for code usable with both clone instances and bundle + * replica containers. + */ + +#include <crm_internal.h> +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Check whether a clone or bundle has instances for all available nodes + * + * \param[in] collective Clone or bundle to check + * + * \return true if \p collective has enough instances for all of its available + * allowed nodes, otherwise false + */ +static bool +can_run_everywhere(const pe_resource_t *collective) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + int available_nodes = 0; + int max_instances = 0; + + switch (collective->variant) { + case pe_clone: + max_instances = pe__clone_max(collective); + break; + case pe_container: + max_instances = pe__bundle_max(collective); + break; + default: + return false; // Not actually possible + } + + g_hash_table_iter_init(&iter, collective->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if (pcmk__node_available(node, false, false) + && (max_instances < ++available_nodes)) { + return false; + } + } + return true; +} + +/*! + * \internal + * \brief Check whether a node is allowed to run an instance + * + * \param[in] instance Clone instance or bundle container to check + * \param[in] node Node to check + * \param[in] max_per_node Maximum number of instances allowed to run on a node + * + * \return true if \p node is allowed to run \p instance, otherwise false + */ +static bool +can_run_instance(const pe_resource_t *instance, const pe_node_t *node, + int max_per_node) +{ + pe_node_t *allowed_node = NULL; + + if (pcmk_is_set(instance->flags, pe_rsc_orphan)) { + pe_rsc_trace(instance, "%s cannot run on %s: orphaned", + instance->id, pe__node_name(node)); + return false; + } + + if (!pcmk__node_available(node, false, false)) { + pe_rsc_trace(instance, + "%s cannot run on %s: node cannot run resources", + instance->id, pe__node_name(node)); + return false; + } + + allowed_node = pcmk__top_allowed_node(instance, node); + if (allowed_node == NULL) { + crm_warn("%s cannot run on %s: node not allowed", + instance->id, pe__node_name(node)); + return false; + } + + if (allowed_node->weight < 0) { + pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there", + instance->id, pe__node_name(node), + pcmk_readable_score(allowed_node->weight)); + return false; + } + + if (allowed_node->count >= max_per_node) { + pe_rsc_trace(instance, + "%s cannot run on %s: node already has %d instance%s", + instance->id, pe__node_name(node), max_per_node, + pcmk__plural_s(max_per_node)); + return false; + } + + pe_rsc_trace(instance, "%s can run on %s (%d already running)", + instance->id, pe__node_name(node), allowed_node->count); + return true; +} + +/*! + * \internal + * \brief Ban a clone instance or bundle replica from unavailable allowed nodes + * + * \param[in,out] instance Clone instance or bundle replica to ban + * \param[in] max_per_node Maximum instances allowed to run on a node + */ +static void +ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node) +{ + if (instance->allowed_nodes != NULL) { + GHashTableIter iter; + pe_node_t *node = NULL; + + g_hash_table_iter_init(&iter, instance->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (!can_run_instance(instance, node, max_per_node)) { + pe_rsc_trace(instance, "Banning %s from unavailable node %s", + instance->id, pe__node_name(node)); + node->weight = -INFINITY; + for (GList *child_iter = instance->children; + child_iter != NULL; child_iter = child_iter->next) { + pe_resource_t *child = (pe_resource_t *) child_iter->data; + pe_node_t *child_node = NULL; + + child_node = pe_hash_table_lookup(child->allowed_nodes, + node->details->id); + if (child_node != NULL) { + pe_rsc_trace(instance, + "Banning %s child %s " + "from unavailable node %s", + instance->id, child->id, + pe__node_name(node)); + child_node->weight = -INFINITY; + } + } + } + } + } +} + +/*! + * \internal + * \brief Create a hash table with a single node in it + * + * \param[in] node Node to copy into new table + * + * \return Newly created hash table containing a copy of \p node + * \note The caller is responsible for freeing the result with + * g_hash_table_destroy(). + */ +static GHashTable * +new_node_table(pe_node_t *node) +{ + GHashTable *table = pcmk__strkey_table(NULL, free); + + node = pe__copy_node(node); + g_hash_table_insert(table, (gpointer) node->details->id, node); + return table; +} + +/*! + * \internal + * \brief Apply a resource's parent's colocation scores to a node table + * + * \param[in] rsc Resource whose colocations should be applied + * \param[in,out] nodes Node table to apply colocations to + */ +static void +apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes) +{ + GList *iter = NULL; + pcmk__colocation_t *colocation = NULL; + pe_resource_t *other = NULL; + float factor = 0.0; + + /* Because the this_with_colocations() and with_this_colocations() methods + * boil down to copies of rsc_cons and rsc_cons_lhs for clones and bundles, + * we can use those here directly for efficiency. + */ + for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) { + colocation = (pcmk__colocation_t *) iter->data; + other = colocation->primary; + factor = colocation->score / (float) INFINITY, + other->cmds->add_colocated_node_scores(other, rsc->id, nodes, + colocation->node_attribute, + factor, + pcmk__coloc_select_default); + } + for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) { + colocation = (pcmk__colocation_t *) iter->data; + if (!pcmk__colocation_has_influence(colocation, rsc)) { + continue; + } + other = colocation->dependent; + factor = colocation->score / (float) INFINITY, + other->cmds->add_colocated_node_scores(other, rsc->id, nodes, + colocation->node_attribute, + factor, + pcmk__coloc_select_nonnegative); + } +} + +/*! + * \internal + * \brief Compare clone or bundle instances based on colocation scores + * + * Determine the relative order in which two clone or bundle instances should be + * assigned to nodes, considering the scores of colocation constraints directly + * or indirectly involving them. + * + * \param[in] instance1 First instance to compare + * \param[in] instance2 Second instance to compare + * + * \return A negative number if \p instance1 should be assigned first, + * a positive number if \p instance2 should be assigned first, + * or 0 if assignment order doesn't matter + */ +static int +cmp_instance_by_colocation(const pe_resource_t *instance1, + const pe_resource_t *instance2) +{ + int rc = 0; + pe_node_t *node1 = NULL; + pe_node_t *node2 = NULL; + pe_node_t *current_node1 = pe__current_node(instance1); + pe_node_t *current_node2 = pe__current_node(instance2); + GHashTable *colocated_scores1 = NULL; + GHashTable *colocated_scores2 = NULL; + + CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL) + && (instance2 != NULL) && (instance2->parent != NULL) + && (current_node1 != NULL) && (current_node2 != NULL)); + + // Create node tables initialized with each node + colocated_scores1 = new_node_table(current_node1); + colocated_scores2 = new_node_table(current_node2); + + // Apply parental colocations + apply_parent_colocations(instance1, &colocated_scores1); + apply_parent_colocations(instance2, &colocated_scores2); + + // Find original nodes again, with scores updated for colocations + node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id); + node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id); + + // Compare nodes by updated scores + if (node1->weight < node2->weight) { + crm_trace("Assign %s (%d on %s) after %s (%d on %s)", + instance1->id, node1->weight, pe__node_name(node1), + instance2->id, node2->weight, pe__node_name(node2)); + rc = 1; + + } else if (node1->weight > node2->weight) { + crm_trace("Assign %s (%d on %s) before %s (%d on %s)", + instance1->id, node1->weight, pe__node_name(node1), + instance2->id, node2->weight, pe__node_name(node2)); + rc = -1; + } + + g_hash_table_destroy(colocated_scores1); + g_hash_table_destroy(colocated_scores2); + return rc; +} + +/*! + * \internal + * \brief Check whether a resource or any of its children are failed + * + * \param[in] rsc Resource to check + * + * \return true if \p rsc or any of its children are failed, otherwise false + */ +static bool +did_fail(const pe_resource_t *rsc) +{ + if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + return true; + } + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + if (did_fail((const pe_resource_t *) iter->data)) { + return true; + } + } + return false; +} + +/*! + * \internal + * \brief Check whether a node is allowed to run a resource + * + * \param[in] rsc Resource to check + * \param[in,out] node Node to check (will be set NULL if not allowed) + * + * \return true if *node is either NULL or allowed for \p rsc, otherwise false + */ +static bool +node_is_allowed(const pe_resource_t *rsc, pe_node_t **node) +{ + if (*node != NULL) { + pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes, + (*node)->details->id); + if ((allowed == NULL) || (allowed->weight < 0)) { + pe_rsc_trace(rsc, "%s: current location (%s) is unavailable", + rsc->id, pe__node_name(*node)); + *node = NULL; + return false; + } + } + return true; +} + +/*! + * \internal + * \brief Compare two clone or bundle instances' instance numbers + * + * \param[in] a First instance to compare + * \param[in] b Second instance to compare + * + * \return A negative number if \p a's instance number is lower, + * a positive number if \p b's instance number is lower, + * or 0 if their instance numbers are the same + */ +gint +pcmk__cmp_instance_number(gconstpointer a, gconstpointer b) +{ + const pe_resource_t *instance1 = (const pe_resource_t *) a; + const pe_resource_t *instance2 = (const pe_resource_t *) b; + char *div1 = NULL; + char *div2 = NULL; + + CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); + + // Clone numbers are after a colon, bundle numbers after a dash + div1 = strrchr(instance1->id, ':'); + if (div1 == NULL) { + div1 = strrchr(instance1->id, '-'); + } + div2 = strrchr(instance2->id, ':'); + if (div2 == NULL) { + div2 = strrchr(instance2->id, '-'); + } + CRM_ASSERT((div1 != NULL) && (div2 != NULL)); + + return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10)); +} + +/*! + * \internal + * \brief Compare clone or bundle instances according to assignment order + * + * Compare two clone or bundle instances according to the order they should be + * assigned to nodes, preferring (in order): + * + * - Active instance that is less multiply active + * - Instance that is not active on a disallowed node + * - Instance with higher configured priority + * - Active instance whose current node can run resources + * - Active instance whose parent is allowed on current node + * - Active instance whose current node has fewer other instances + * - Active instance + * - Instance that isn't failed + * - Instance whose colocations result in higher score on current node + * - Instance with lower ID in lexicographic order + * + * \param[in] a First instance to compare + * \param[in] b Second instance to compare + * + * \return A negative number if \p a should be assigned first, + * a positive number if \p b should be assigned first, + * or 0 if assignment order doesn't matter + */ +gint +pcmk__cmp_instance(gconstpointer a, gconstpointer b) +{ + int rc = 0; + pe_node_t *node1 = NULL; + pe_node_t *node2 = NULL; + unsigned int nnodes1 = 0; + unsigned int nnodes2 = 0; + + bool can1 = true; + bool can2 = true; + + const pe_resource_t *instance1 = (const pe_resource_t *) a; + const pe_resource_t *instance2 = (const pe_resource_t *) b; + + CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); + + node1 = instance1->fns->active_node(instance1, &nnodes1, NULL); + node2 = instance2->fns->active_node(instance2, &nnodes2, NULL); + + /* If both instances are running and at least one is multiply + * active, prefer instance that's running on fewer nodes. + */ + if ((nnodes1 > 0) && (nnodes2 > 0)) { + if (nnodes1 < nnodes2) { + crm_trace("Assign %s (active on %d) before %s (active on %d): " + "less multiply active", + instance1->id, nnodes1, instance2->id, nnodes2); + return -1; + + } else if (nnodes1 > nnodes2) { + crm_trace("Assign %s (active on %d) after %s (active on %d): " + "more multiply active", + instance1->id, nnodes1, instance2->id, nnodes2); + return 1; + } + } + + /* An instance that is either inactive or active on an allowed node is + * preferred over an instance that is active on a no-longer-allowed node. + */ + can1 = node_is_allowed(instance1, &node1); + can2 = node_is_allowed(instance2, &node2); + if (can1 && !can2) { + crm_trace("Assign %s before %s: not active on a disallowed node", + instance1->id, instance2->id); + return -1; + + } else if (!can1 && can2) { + crm_trace("Assign %s after %s: active on a disallowed node", + instance1->id, instance2->id); + return 1; + } + + // Prefer instance with higher configured priority + if (instance1->priority > instance2->priority) { + crm_trace("Assign %s before %s: priority (%d > %d)", + instance1->id, instance2->id, + instance1->priority, instance2->priority); + return -1; + + } else if (instance1->priority < instance2->priority) { + crm_trace("Assign %s after %s: priority (%d < %d)", + instance1->id, instance2->id, + instance1->priority, instance2->priority); + return 1; + } + + // Prefer active instance + if ((node1 == NULL) && (node2 == NULL)) { + crm_trace("No assignment preference for %s vs. %s: inactive", + instance1->id, instance2->id); + return 0; + + } else if (node1 == NULL) { + crm_trace("Assign %s after %s: active", instance1->id, instance2->id); + return 1; + + } else if (node2 == NULL) { + crm_trace("Assign %s before %s: active", instance1->id, instance2->id); + return -1; + } + + // Prefer instance whose current node can run resources + can1 = pcmk__node_available(node1, false, false); + can2 = pcmk__node_available(node2, false, false); + if (can1 && !can2) { + crm_trace("Assign %s before %s: current node can run resources", + instance1->id, instance2->id); + return -1; + + } else if (!can1 && can2) { + crm_trace("Assign %s after %s: current node can't run resources", + instance1->id, instance2->id); + return 1; + } + + // Prefer instance whose parent is allowed to run on instance's current node + node1 = pcmk__top_allowed_node(instance1, node1); + node2 = pcmk__top_allowed_node(instance2, node2); + if ((node1 == NULL) && (node2 == NULL)) { + crm_trace("No assignment preference for %s vs. %s: " + "parent not allowed on either instance's current node", + instance1->id, instance2->id); + return 0; + + } else if (node1 == NULL) { + crm_trace("Assign %s after %s: parent not allowed on current node", + instance1->id, instance2->id); + return 1; + + } else if (node2 == NULL) { + crm_trace("Assign %s before %s: parent allowed on current node", + instance1->id, instance2->id); + return -1; + } + + // Prefer instance whose current node is running fewer other instances + if (node1->count < node2->count) { + crm_trace("Assign %s before %s: fewer active instances on current node", + instance1->id, instance2->id); + return -1; + + } else if (node1->count > node2->count) { + crm_trace("Assign %s after %s: more active instances on current node", + instance1->id, instance2->id); + return 1; + } + + // Prefer instance that isn't failed + can1 = did_fail(instance1); + can2 = did_fail(instance2); + if (!can1 && can2) { + crm_trace("Assign %s before %s: not failed", + instance1->id, instance2->id); + return -1; + } else if (can1 && !can2) { + crm_trace("Assign %s after %s: failed", + instance1->id, instance2->id); + return 1; + } + + // Prefer instance with higher cumulative colocation score on current node + rc = cmp_instance_by_colocation(instance1, instance2); + if (rc != 0) { + return rc; + } + + // Prefer instance with lower instance number + rc = pcmk__cmp_instance_number(instance1, instance2); + if (rc < 0) { + crm_trace("Assign %s before %s: instance number", + instance1->id, instance2->id); + } else if (rc > 0) { + crm_trace("Assign %s after %s: instance number", + instance1->id, instance2->id); + } else { + crm_trace("No assignment preference for %s vs. %s", + instance1->id, instance2->id); + } + return rc; +} + +/*! + * \internal + * \brief Choose a node for an instance + * + * \param[in,out] instance Clone instance or bundle replica container + * \param[in] prefer If not NULL, attempt early assignment to this + * node, if still the best choice; otherwise, + * perform final assignment + * \param[in] max_per_node Assign at most this many instances to one node + * + * \return true if \p instance could be assigned to a node, otherwise false + */ +static bool +assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + int max_per_node) +{ + pe_node_t *chosen = NULL; + pe_node_t *allowed = NULL; + + CRM_ASSERT(instance != NULL); + pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id, + ((prefer == NULL)? "no node" : prefer->details->uname)); + + if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { + // Instance is already assigned + return instance->fns->location(instance, NULL, FALSE) != NULL; + } + + if (pcmk_is_set(instance->flags, pe_rsc_allocating)) { + pe_rsc_debug(instance, + "Assignment loop detected involving %s colocations", + instance->id); + return false; + } + + if (prefer != NULL) { // Possible early assignment to preferred node + + // Get preferred node with instance's scores + allowed = g_hash_table_lookup(instance->allowed_nodes, + prefer->details->id); + + if ((allowed == NULL) || (allowed->weight < 0)) { + pe_rsc_trace(instance, + "Not assigning %s to preferred node %s: unavailable", + instance->id, pe__node_name(prefer)); + return false; + } + } + + ban_unavailable_allowed_nodes(instance, max_per_node); + + if (prefer == NULL) { // Final assignment + chosen = instance->cmds->assign(instance, NULL); + + } else { // Possible early assignment to preferred node + GHashTable *backup = pcmk__copy_node_table(instance->allowed_nodes); + + chosen = instance->cmds->assign(instance, prefer); + + // Revert nodes if preferred node won't be assigned + if ((chosen != NULL) && (chosen->details != prefer->details)) { + crm_info("Not assigning %s to preferred node %s: %s is better", + instance->id, pe__node_name(prefer), + pe__node_name(chosen)); + g_hash_table_destroy(instance->allowed_nodes); + instance->allowed_nodes = backup; + pcmk__unassign_resource(instance); + chosen = NULL; + } else if (backup != NULL) { + g_hash_table_destroy(backup); + } + } + + // The parent tracks how many instances have been assigned to each node + if (chosen != NULL) { + allowed = pcmk__top_allowed_node(instance, chosen); + if (allowed == NULL) { + /* The instance is allowed on the node, but its parent isn't. This + * shouldn't be possible if the resource is managed, and we won't be + * able to limit the number of instances assigned to the node. + */ + CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); + + } else { + allowed->count++; + } + } + return chosen != NULL; +} + +/*! + * \internal + * \brief Reset the node counts of a resource's allowed nodes to zero + * + * \param[in,out] rsc Resource to reset + * + * \return Number of nodes that are available to run resources + */ +static unsigned int +reset_allowed_node_counts(pe_resource_t *rsc) +{ + unsigned int available_nodes = 0; + pe_node_t *node = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + node->count = 0; + if (pcmk__node_available(node, false, false)) { + available_nodes++; + } + } + return available_nodes; +} + +/*! + * \internal + * \brief Check whether an instance has a preferred node + * + * \param[in] rsc Clone or bundle being assigned (for logs only) + * \param[in] instance Clone instance or bundle replica container + * \param[in] optimal_per_node Optimal number of instances per node + * + * \return Instance's current node if still available, otherwise NULL + */ +static const pe_node_t * +preferred_node(const pe_resource_t *rsc, const pe_resource_t *instance, + int optimal_per_node) +{ + const pe_node_t *node = NULL; + const pe_node_t *parent_node = NULL; + + // Check whether instance is active, healthy, and not yet assigned + if ((instance->running_on == NULL) + || !pcmk_is_set(instance->flags, pe_rsc_provisional) + || pcmk_is_set(instance->flags, pe_rsc_failed)) { + return NULL; + } + + // Check whether instance's current node can run resources + node = pe__current_node(instance); + if (!pcmk__node_available(node, true, false)) { + pe_rsc_trace(rsc, "Not assigning %s to %s early (unavailable)", + instance->id, pe__node_name(node)); + return NULL; + } + + // Check whether node already has optimal number of instances assigned + parent_node = pcmk__top_allowed_node(instance, node); + if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) { + pe_rsc_trace(rsc, + "Not assigning %s to %s early " + "(optimal instances already assigned)", + instance->id, pe__node_name(node)); + return NULL; + } + + return node; +} + +/*! + * \internal + * \brief Assign collective instances to nodes + * + * \param[in,out] collective Clone or bundle resource being assigned + * \param[in,out] instances List of clone instances or bundle containers + * \param[in] max_total Maximum instances to assign in total + * \param[in] max_per_node Maximum instances to assign to any one node + */ +void +pcmk__assign_instances(pe_resource_t *collective, GList *instances, + int max_total, int max_per_node) +{ + // Reuse node count to track number of assigned instances + unsigned int available_nodes = reset_allowed_node_counts(collective); + + int optimal_per_node = 0; + int assigned = 0; + GList *iter = NULL; + pe_resource_t *instance = NULL; + const pe_node_t *current = NULL; + + if (available_nodes > 0) { + optimal_per_node = max_total / available_nodes; + } + if (optimal_per_node < 1) { + optimal_per_node = 1; + } + + pe_rsc_debug(collective, + "Assigning up to %d %s instance%s to up to %u node%s " + "(at most %d per host, %d optimal)", + max_total, collective->id, pcmk__plural_s(max_total), + available_nodes, pcmk__plural_s(available_nodes), + max_per_node, optimal_per_node); + + // Assign as many instances as possible to their current location + for (iter = instances; (iter != NULL) && (assigned < max_total); + iter = iter->next) { + instance = (pe_resource_t *) iter->data; + + current = preferred_node(collective, instance, optimal_per_node); + if ((current != NULL) + && assign_instance(instance, current, max_per_node)) { + pe_rsc_trace(collective, "Assigned %s to current node %s", + instance->id, pe__node_name(current)); + assigned++; + } + } + + pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node", + assigned, max_total, pcmk__plural_s(max_total)); + + for (iter = instances; iter != NULL; iter = iter->next) { + instance = (pe_resource_t *) iter->data; + + if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { + continue; // Already assigned + } + + if (instance->running_on != NULL) { + current = pe__current_node(instance); + if (pcmk__top_allowed_node(instance, current) == NULL) { + const char *unmanaged = ""; + + if (!pcmk_is_set(instance->flags, pe_rsc_managed)) { + unmanaged = "Unmanaged resource "; + } + crm_notice("%s%s is running on %s which is no longer allowed", + unmanaged, instance->id, pe__node_name(current)); + } + } + + if (assigned >= max_total) { + pe_rsc_debug(collective, + "Not assigning %s because maximum %d instances " + "already assigned", + instance->id, max_total); + resource_location(instance, NULL, -INFINITY, + "collective_limit_reached", collective->cluster); + + } else if (assign_instance(instance, NULL, max_per_node)) { + assigned++; + } + } + + pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s", + assigned, max_total, pcmk__plural_s(max_total), + collective->id); +} + +enum instance_state { + instance_starting = (1 << 0), + instance_stopping = (1 << 1), + + /* This indicates that some instance is restarting. It's not the same as + * instance_starting|instance_stopping, which would indicate that some + * instance is starting, and some instance (not necessarily the same one) is + * stopping. + */ + instance_restarting = (1 << 2), + + instance_active = (1 << 3), + + instance_all = instance_starting|instance_stopping + |instance_restarting|instance_active, +}; + +/*! + * \internal + * \brief Check whether an instance is active, starting, and/or stopping + * + * \param[in] instance Clone instance or bundle replica container + * \param[in,out] state Whether any instance is starting, stopping, etc. + */ +static void +check_instance_state(const pe_resource_t *instance, uint32_t *state) +{ + const GList *iter = NULL; + uint32_t instance_state = 0; // State of just this instance + + // No need to check further if all conditions have already been detected + if (pcmk_all_flags_set(*state, instance_all)) { + return; + } + + // If instance is a collective (a cloned group), check its children instead + if (instance->variant > pe_native) { + for (iter = instance->children; + (iter != NULL) && !pcmk_all_flags_set(*state, instance_all); + iter = iter->next) { + check_instance_state((const pe_resource_t *) iter->data, state); + } + return; + } + + // If we get here, instance is a primitive + + if (instance->running_on != NULL) { + instance_state |= instance_active; + } + + // Check each of the instance's actions for runnable start or stop + for (iter = instance->actions; + (iter != NULL) && !pcmk_all_flags_set(instance_state, + instance_starting + |instance_stopping); + iter = iter->next) { + + const pe_action_t *action = (const pe_action_t *) iter->data; + const bool optional = pcmk_is_set(action->flags, pe_action_optional); + + if (pcmk__str_eq(RSC_START, action->task, pcmk__str_none)) { + if (!optional && pcmk_is_set(action->flags, pe_action_runnable)) { + pe_rsc_trace(instance, "Instance is starting due to %s", + action->uuid); + instance_state |= instance_starting; + } else { + pe_rsc_trace(instance, "%s doesn't affect %s state (%s)", + action->uuid, instance->id, + (optional? "optional" : "unrunnable")); + } + + } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_none)) { + /* Only stop actions can be pseudo-actions for primitives. That + * indicates that the node they are on is being fenced, so the stop + * is implied rather than actually executed. + */ + if (!optional + && pcmk_any_flags_set(action->flags, + pe_action_pseudo|pe_action_runnable)) { + pe_rsc_trace(instance, "Instance is stopping due to %s", + action->uuid); + instance_state |= instance_stopping; + } else { + pe_rsc_trace(instance, "%s doesn't affect %s state (%s)", + action->uuid, instance->id, + (optional? "optional" : "unrunnable")); + } + } + } + + if (pcmk_all_flags_set(instance_state, + instance_starting|instance_stopping)) { + instance_state |= instance_restarting; + } + *state |= instance_state; +} + +/*! + * \internal + * \brief Create actions for collective resource instances + * + * \param[in,out] collective Clone or bundle resource to create actions for + * \param[in,out] instances List of clone instances or bundle containers + */ +void +pcmk__create_instance_actions(pe_resource_t *collective, GList *instances) +{ + uint32_t state = 0; + + pe_action_t *stop = NULL; + pe_action_t *stopped = NULL; + + pe_action_t *start = NULL; + pe_action_t *started = NULL; + + pe_rsc_trace(collective, "Creating collective instance actions for %s", + collective->id); + + // Create actions for each instance appropriate to its variant + for (GList *iter = instances; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + + instance->cmds->create_actions(instance); + check_instance_state(instance, &state); + } + + // Create pseudo-actions for rsc start and started + start = pe__new_rsc_pseudo_action(collective, RSC_START, + !pcmk_is_set(state, instance_starting), + true); + started = pe__new_rsc_pseudo_action(collective, RSC_STARTED, + !pcmk_is_set(state, instance_starting), + false); + started->priority = INFINITY; + if (pcmk_any_flags_set(state, instance_active|instance_starting)) { + pe__set_action_flags(started, pe_action_runnable); + } + + // Create pseudo-actions for rsc stop and stopped + stop = pe__new_rsc_pseudo_action(collective, RSC_STOP, + !pcmk_is_set(state, instance_stopping), + true); + stopped = pe__new_rsc_pseudo_action(collective, RSC_STOPPED, + !pcmk_is_set(state, instance_stopping), + true); + stopped->priority = INFINITY; + if (!pcmk_is_set(state, instance_restarting)) { + pe__set_action_flags(stop, pe_action_migrate_runnable); + } + + if (collective->variant == pe_clone) { + pe__create_clone_notif_pseudo_ops(collective, start, started, stop, + stopped); + } +} + +/*! + * \internal + * \brief Get a list of clone instances or bundle replica containers + * + * \param[in] rsc Clone or bundle resource + * + * \return Clone instances if \p rsc is a clone, or a newly created list of + * \p rsc's replica containers if \p rsc is a bundle + * \note The caller must call free_instance_list() on the result when the list + * is no longer needed. + */ +static inline GList * +get_instance_list(const pe_resource_t *rsc) +{ + if (rsc->variant == pe_container) { + return pe__bundle_containers(rsc); + } else { + return rsc->children; + } +} + +/*! + * \internal + * \brief Free any memory created by get_instance_list() + * + * \param[in] rsc Clone or bundle resource passed to get_instance_list() + * \param[in,out] list Return value of get_instance_list() for \p rsc + */ +static inline void +free_instance_list(const pe_resource_t *rsc, GList *list) +{ + if (list != rsc->children) { + g_list_free(list); + } +} + +/*! + * \internal + * \brief Check whether an instance is compatible with a role and node + * + * \param[in] instance Clone instance or bundle replica container + * \param[in] node Instance must match this node + * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role + * \param[in] current If true, compare instance's original node and role, + * otherwise compare assigned next node and role + * + * \return true if \p instance is compatible with \p node and \p role, + * otherwise false + */ +bool +pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node, + enum rsc_role_e role, bool current) +{ + pe_node_t *instance_node = NULL; + + CRM_CHECK((instance != NULL) && (node != NULL), return false); + + if ((role != RSC_ROLE_UNKNOWN) + && (role != instance->fns->state(instance, current))) { + pe_rsc_trace(instance, + "%s is not a compatible instance (role is not %s)", + instance->id, role2text(role)); + return false; + } + + if (!is_set_recursive(instance, pe_rsc_block, true)) { + // We only want instances that haven't failed + instance_node = instance->fns->location(instance, NULL, current); + } + + if (instance_node == NULL) { + pe_rsc_trace(instance, + "%s is not a compatible instance (not assigned to a node)", + instance->id); + return false; + } + + if (instance_node->details != node->details) { + pe_rsc_trace(instance, + "%s is not a compatible instance (assigned to %s not %s)", + instance->id, pe__node_name(instance_node), + pe__node_name(node)); + return false; + } + + return true; +} + +/*! + * \internal + * \brief Find an instance that matches a given resource by node and role + * + * \param[in] match_rsc Resource that instance must match (for logging only) + * \param[in] rsc Clone or bundle resource to check for matching instance + * \param[in] node Instance must match this node + * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role + * \param[in] current If true, compare instance's original node and role, + * otherwise compare assigned next node and role + * + * \return \p rsc instance matching \p node and \p role if any, otherwise NULL + */ +static pe_resource_t * +find_compatible_instance_on_node(const pe_resource_t *match_rsc, + const pe_resource_t *rsc, + const pe_node_t *node, enum rsc_role_e role, + bool current) +{ + GList *instances = NULL; + + instances = get_instance_list(rsc); + for (GList *iter = instances; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + + if (pcmk__instance_matches(instance, node, role, current)) { + pe_rsc_trace(match_rsc, "Found %s %s instance %s compatible with %s on %s", + role == RSC_ROLE_UNKNOWN? "matching" : role2text(role), + rsc->id, instance->id, match_rsc->id, + pe__node_name(node)); + free_instance_list(rsc, instances); // Only frees list, not contents + return instance; + } + } + free_instance_list(rsc, instances); + + pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s", + ((role == RSC_ROLE_UNKNOWN)? "matching" : role2text(role)), + rsc->id, match_rsc->id, pe__node_name(node)); + return NULL; +} + +/*! + * \internal + * \brief Find a clone instance or bundle container compatible with a resource + * + * \param[in] match_rsc Resource that instance must match + * \param[in] rsc Clone or bundle resource to check for matching instance + * \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role + * \param[in] current If true, compare instance's original node and role, + * otherwise compare assigned next node and role + * + * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc + * if any, otherwise NULL + */ +pe_resource_t * +pcmk__find_compatible_instance(const pe_resource_t *match_rsc, + const pe_resource_t *rsc, enum rsc_role_e role, + bool current) +{ + pe_resource_t *instance = NULL; + GList *nodes = NULL; + const pe_node_t *node = match_rsc->fns->location(match_rsc, NULL, current); + + // If match_rsc has a node, check only that node + if (node != NULL) { + return find_compatible_instance_on_node(match_rsc, rsc, node, role, + current); + } + + // Otherwise check for an instance matching any of match_rsc's allowed nodes + nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes), + NULL); + for (GList *iter = nodes; (iter != NULL) && (instance == NULL); + iter = iter->next) { + instance = find_compatible_instance_on_node(match_rsc, rsc, + (pe_node_t *) iter->data, + role, current); + } + + if (instance == NULL) { + pe_rsc_debug(rsc, "No %s instance found compatible with %s", + rsc->id, match_rsc->id); + } + g_list_free(nodes); + return instance; +} + +/*! + * \internal + * \brief Unassign an instance if mandatory ordering has no interleave match + * + * \param[in] first 'First' action in an ordering + * \param[in] then 'Then' action in an ordering + * \param[in,out] then_instance 'Then' instance that has no interleave match + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in] current If true, "then" action is stopped or demoted + * + * \return true if \p then_instance was unassigned, otherwise false + */ +static bool +unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then, + pe_resource_t *then_instance, uint32_t type, bool current) +{ + // Allow "then" instance to go down even without an interleave match + if (current) { + pe_rsc_trace(then->rsc, + "%s has no instance to order before stopping " + "or demoting %s", + first->rsc->id, then_instance->id); + + /* If the "first" action must be runnable, but there is no "first" + * instance, the "then" instance must not be allowed to come up. + */ + } else if (pcmk_any_flags_set(type, pe_order_runnable_left + |pe_order_implies_then)) { + pe_rsc_info(then->rsc, + "Inhibiting %s from being active " + "because there is no %s instance to interleave", + then_instance->id, first->rsc->id); + return pcmk__assign_resource(then_instance, NULL, true); + } + return false; +} + +/*! + * \internal + * \brief Find first matching action for a clone instance or bundle container + * + * \param[in] action Action in an interleaved ordering + * \param[in] instance Clone instance or bundle container being interleaved + * \param[in] action_name Action to look for + * \param[in] node If not NULL, require action to be on this node + * \param[in] for_first If true, \p instance is the 'first' resource in the + * ordering, otherwise it is the 'then' resource + * + * \return First action for \p instance (or in some cases if \p instance is a + * bundle container, its containerized resource) that matches + * \p action_name and \p node if any, otherwise NULL + */ +static pe_action_t * +find_instance_action(const pe_action_t *action, const pe_resource_t *instance, + const char *action_name, const pe_node_t *node, + bool for_first) +{ + const pe_resource_t *rsc = NULL; + pe_action_t *matching_action = NULL; + + /* If instance is a bundle container, sometimes we should interleave the + * action for the container itself, and sometimes for the containerized + * resource. + * + * For example, given "start bundle A then bundle B", B likely requires the + * service inside A's container to be active, rather than just the + * container, so we should interleave the action for A's containerized + * resource. On the other hand, it's possible B's container itself requires + * something from A, so we should interleave the action for B's container. + * + * Essentially, for 'first', we should use the containerized resource for + * everything except stop, and for 'then', we should use the container for + * everything except promote and demote (which can only be performed on the + * containerized resource). + */ + if ((for_first && !pcmk__str_any_of(action->task, CRMD_ACTION_STOP, + CRMD_ACTION_STOPPED, NULL)) + + || (!for_first && pcmk__str_any_of(action->task, CRMD_ACTION_PROMOTE, + CRMD_ACTION_PROMOTED, + CRMD_ACTION_DEMOTE, + CRMD_ACTION_DEMOTED, NULL))) { + + rsc = pcmk__get_rsc_in_container(instance); + } + if (rsc == NULL) { + rsc = instance; // No containerized resource, use instance itself + } else { + node = NULL; // Containerized actions are on bundle-created guest + } + + matching_action = find_first_action(rsc->actions, NULL, action_name, node); + if (matching_action != NULL) { + return matching_action; + } + + if (pcmk_is_set(instance->flags, pe_rsc_orphan) + || pcmk__str_any_of(action_name, RSC_STOP, RSC_DEMOTE, NULL)) { + crm_trace("No %s action found for %s%s", + action_name, + pcmk_is_set(instance->flags, pe_rsc_orphan)? "orphan " : "", + instance->id); + } else { + crm_err("No %s action found for %s to interleave (bug?)", + action_name, instance->id); + } + return NULL; +} + +/*! + * \internal + * \brief Get the original action name of a bundle or clone action + * + * Given an action for a bundle or clone, get the original action name, + * mapping notify to the action being notified, and if the instances are + * primitives, mapping completion actions to the action that was completed + * (for example, stopped to stop). + * + * \param[in] action Clone or bundle action to check + * + * \return Original action name for \p action + */ +static const char * +orig_action_name(const pe_action_t *action) +{ + const pe_resource_t *instance = action->rsc->children->data; // Any instance + char *action_type = NULL; + const char *action_name = action->task; + enum action_tasks orig_task = no_action; + + if (pcmk__strcase_any_of(action->task, CRMD_ACTION_NOTIFY, + CRMD_ACTION_NOTIFIED, NULL)) { + // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL + CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL), + return task2text(no_action)); + action_name = strstr(action_type, "_notify_"); + CRM_CHECK(action_name != NULL, return task2text(no_action)); + action_name += strlen("_notify_"); + } + orig_task = get_complex_task(instance, action_name); + free(action_type); + return task2text(orig_task); +} + +/*! + * \internal + * \brief Update two interleaved actions according to an ordering between them + * + * Given information about an ordering of two interleaved actions, update the + * actions' flags (and runnable_before members if appropriate) as appropriate + * for the ordering. Effects may cascade to other orderings involving the + * actions as well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this node + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +static uint32_t +update_interleaved_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t filter, + uint32_t type) +{ + GList *instances = NULL; + uint32_t changed = pcmk__updated_none; + const char *orig_first_task = orig_action_name(first); + + // Stops and demotes must be interleaved with instance on current node + bool current = pcmk__ends_with(first->uuid, "_" CRMD_ACTION_STOPPED "_0") + || pcmk__ends_with(first->uuid, + "_" CRMD_ACTION_DEMOTED "_0"); + + // Update the specified actions for each "then" instance individually + instances = get_instance_list(then->rsc); + for (GList *iter = instances; iter != NULL; iter = iter->next) { + pe_resource_t *first_instance = NULL; + pe_resource_t *then_instance = iter->data; + + pe_action_t *first_action = NULL; + pe_action_t *then_action = NULL; + + // Find a "first" instance to interleave with this "then" instance + first_instance = pcmk__find_compatible_instance(then_instance, + first->rsc, + RSC_ROLE_UNKNOWN, + current); + + if (first_instance == NULL) { // No instance can be interleaved + if (unassign_if_mandatory(first, then, then_instance, type, + current)) { + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + continue; + } + + first_action = find_instance_action(first, first_instance, + orig_first_task, node, true); + if (first_action == NULL) { + continue; + } + + then_action = find_instance_action(then, then_instance, then->task, + node, false); + if (then_action == NULL) { + continue; + } + + if (order_actions(first_action, then_action, type)) { + pcmk__set_updated_flags(changed, first, + pcmk__updated_first|pcmk__updated_then); + } + + changed |= then_instance->cmds->update_ordered_actions( + first_action, then_action, node, + first_instance->cmds->action_flags(first_action, node), filter, + type, then->rsc->cluster); + } + free_instance_list(then->rsc, instances); + return changed; +} + +/*! + * \internal + * \brief Check whether two actions in an ordering can be interleaved + * + * \param[in] first 'First' action in the ordering + * \param[in] then 'Then' action in the ordering + * + * \return true if \p first and \p then can be interleaved, otherwise false + */ +static bool +can_interleave_actions(const pe_action_t *first, const pe_action_t *then) +{ + bool interleave = false; + pe_resource_t *rsc = NULL; + + if ((first->rsc == NULL) || (then->rsc == NULL)) { + crm_trace("Not interleaving %s with %s: not resource actions", + first->uuid, then->uuid); + return false; + } + + if (first->rsc == then->rsc) { + crm_trace("Not interleaving %s with %s: same resource", + first->uuid, then->uuid); + return false; + } + + if ((first->rsc->variant < pe_clone) || (then->rsc->variant < pe_clone)) { + crm_trace("Not interleaving %s with %s: not clones or bundles", + first->uuid, then->uuid); + return false; + } + + if (pcmk__ends_with(then->uuid, "_stop_0") + || pcmk__ends_with(then->uuid, "_demote_0")) { + rsc = first->rsc; + } else { + rsc = then->rsc; + } + + interleave = crm_is_true(g_hash_table_lookup(rsc->meta, + XML_RSC_ATTR_INTERLEAVE)); + pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)", + first->uuid, then->uuid, (interleave? "" : "not "), rsc->id); + return interleave; +} + +/*! + * \internal + * \brief Update non-interleaved instance actions according to an ordering + * + * Given information about an ordering of two non-interleaved actions, update + * the actions' flags (and runnable_before members if appropriate) as + * appropriate for the ordering. Effects may cascade to other orderings + * involving the actions as well. + * + * \param[in,out] instance Clone instance or bundle container + * \param[in,out] first "First" action in ordering + * \param[in] then "Then" action in ordering (for \p instance's parent) + * \param[in] node If not NULL, limit scope of ordering to this node + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +static uint32_t +update_noninterleaved_actions(pe_resource_t *instance, pe_action_t *first, + const pe_action_t *then, const pe_node_t *node, + uint32_t flags, uint32_t filter, uint32_t type) +{ + pe_action_t *instance_action = NULL; + uint32_t instance_flags = 0; + uint32_t changed = pcmk__updated_none; + + // Check whether instance has an equivalent of "then" action + instance_action = find_first_action(instance->actions, NULL, then->task, + node); + if (instance_action == NULL) { + return changed; + } + + // Check whether action is runnable + instance_flags = instance->cmds->action_flags(instance_action, node); + if (!pcmk_is_set(instance_flags, pe_action_runnable)) { + return changed; + } + + // If so, update actions for the instance + changed = instance->cmds->update_ordered_actions(first, instance_action, + node, flags, filter, type, + instance->cluster); + + // Propagate any changes to later actions + if (pcmk_is_set(changed, pcmk__updated_then)) { + for (GList *after_iter = instance_action->actions_after; + after_iter != NULL; after_iter = after_iter->next) { + pe_action_wrapper_t *after = after_iter->data; + + pcmk__update_action_for_orderings(after->action, instance->cluster); + } + } + + return changed; +} + +/*! + * \internal + * \brief Update two actions according to an ordering between them + * + * Given information about an ordering of two clone or bundle actions, update + * the actions' flags (and runnable_before members if appropriate) as + * appropriate for the ordering. Effects may cascade to other orderings + * involving the actions as well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this node + * (only used when interleaving instances) + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +uint32_t +pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set) +{ + if (then->rsc == NULL) { + return pcmk__updated_none; + + } else if (can_interleave_actions(first, then)) { + return update_interleaved_actions(first, then, node, filter, type); + + } else { + uint32_t changed = pcmk__updated_none; + GList *instances = get_instance_list(then->rsc); + + // Update actions for the clone or bundle resource itself + changed |= pcmk__update_ordered_actions(first, then, node, flags, + filter, type, data_set); + + // Update the 'then' clone instances or bundle containers individually + for (GList *iter = instances; iter != NULL; iter = iter->next) { + pe_resource_t *instance = iter->data; + + changed |= update_noninterleaved_actions(instance, first, then, + node, flags, filter, type); + } + free_instance_list(then->rsc, instances); + return changed; + } +} + +#define pe__clear_action_summary_flags(flags, action, flag) do { \ + flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Action summary", action->rsc->id, \ + flags, flag, #flag); \ + } while (0) + +/*! + * \internal + * \brief Return action flags for a given clone or bundle action + * + * \param[in,out] action Action for a clone or bundle + * \param[in] instances Clone instances or bundle containers + * \param[in] node If not NULL, limit effects to this node + * + * \return Flags appropriate to \p action on \p node + */ +enum pe_action_flags +pcmk__collective_action_flags(pe_action_t *action, const GList *instances, + const pe_node_t *node) +{ + bool any_runnable = false; + enum pe_action_flags flags; + const char *action_name = orig_action_name(action); + + // Set original assumptions (optional and runnable may be cleared below) + flags = pe_action_optional|pe_action_runnable|pe_action_pseudo; + + for (const GList *iter = instances; iter != NULL; iter = iter->next) { + const pe_resource_t *instance = iter->data; + const pe_node_t *instance_node = NULL; + pe_action_t *instance_action = NULL; + enum pe_action_flags instance_flags; + + // Node is relevant only to primitive instances + if (instance->variant == pe_native) { + instance_node = node; + } + + instance_action = find_first_action(instance->actions, NULL, + action_name, instance_node); + if (instance_action == NULL) { + pe_rsc_trace(action->rsc, "%s has no %s action on %s", + instance->id, action_name, pe__node_name(node)); + continue; + } + + pe_rsc_trace(action->rsc, "%s has %s for %s on %s", + instance->id, instance_action->uuid, action_name, + pe__node_name(node)); + + instance_flags = instance->cmds->action_flags(instance_action, node); + + // If any instance action is mandatory, so is the collective action + if (pcmk_is_set(flags, pe_action_optional) + && !pcmk_is_set(instance_flags, pe_action_optional)) { + pe_rsc_trace(instance, "%s is mandatory because %s is", + action->uuid, instance_action->uuid); + pe__clear_action_summary_flags(flags, action, pe_action_optional); + pe__clear_action_flags(action, pe_action_optional); + } + + // If any instance action is runnable, so is the collective action + if (pcmk_is_set(instance_flags, pe_action_runnable)) { + any_runnable = true; + } + } + + if (!any_runnable) { + pe_rsc_trace(action->rsc, + "%s is not runnable because no instance can run %s", + action->uuid, action_name); + pe__clear_action_summary_flags(flags, action, pe_action_runnable); + if (node == NULL) { + pe__clear_action_flags(action, pe_action_runnable); + } + } + + return flags; +} + +/*! + * \internal + * \brief Add a collective resource's colocations to a list for an instance + * + * \param[in,out] list Colocation list to add to + * \param[in] instance Clone or bundle instance or instance group member + * \param[in] collective Clone or bundle resource with colocations to add + * \param[in] with_this If true, add collective's "with this" colocations, + * otherwise add its "this with" colocations + */ +void +pcmk__add_collective_constraints(GList **list, const pe_resource_t *instance, + const pe_resource_t *collective, + bool with_this) +{ + const GList *colocations = NULL; + bool everywhere = false; + + CRM_CHECK((list != NULL) && (instance != NULL), return); + + if (collective == NULL) { + return; + } + switch (collective->variant) { + case pe_clone: + case pe_container: + break; + default: + return; + } + + everywhere = can_run_everywhere(collective); + + if (with_this) { + colocations = collective->rsc_cons_lhs; + } else { + colocations = collective->rsc_cons; + } + + for (const GList *iter = colocations; iter != NULL; iter = iter->next) { + const pcmk__colocation_t *colocation = iter->data; + + if (with_this + && !pcmk__colocation_has_influence(colocation, instance)) { + continue; + } + if (!everywhere || (colocation->score < 0) + || (!with_this && (colocation->score == INFINITY))) { + + if (with_this) { + pcmk__add_with_this(list, colocation); + } else { + pcmk__add_this_with(list, colocation); + } + } + } +} diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c new file mode 100644 index 0000000..b4ce4ff --- /dev/null +++ b/lib/pacemaker/pcmk_sched_location.c @@ -0,0 +1,678 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <glib.h> + +#include <crm/crm.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +static int +get_node_score(const char *rule, const char *score, bool raw, + pe_node_t *node, pe_resource_t *rsc) +{ + int score_f = 0; + + if (score == NULL) { + pe_err("Rule %s: no score specified. Assuming 0.", rule); + + } else if (raw) { + score_f = char2score(score); + + } else { + const char *attr_score = pe_node_attribute_calculated(node, score, rsc); + + if (attr_score == NULL) { + crm_debug("Rule %s: %s did not have a value for %s", + rule, pe__node_name(node), score); + score_f = -INFINITY; + + } else { + crm_debug("Rule %s: %s had value %s for %s", + rule, pe__node_name(node), attr_score, score); + score_f = char2score(attr_score); + } + } + return score_f; +} + +static pe__location_t * +generate_location_rule(pe_resource_t *rsc, xmlNode *rule_xml, + const char *discovery, crm_time_t *next_change, + pe_working_set_t *data_set, + pe_re_match_data_t *re_match_data) +{ + const char *rule_id = NULL; + const char *score = NULL; + const char *boolean = NULL; + const char *role = NULL; + + GList *gIter = NULL; + GList *match_L = NULL; + + bool do_and = true; + bool accept = true; + bool raw_score = true; + bool score_allocated = false; + + pe__location_t *location_rule = NULL; + + rule_xml = expand_idref(rule_xml, data_set->input); + if (rule_xml == NULL) { + return NULL; + } + + rule_id = crm_element_value(rule_xml, XML_ATTR_ID); + boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP); + role = crm_element_value(rule_xml, XML_RULE_ATTR_ROLE); + + crm_trace("Processing rule: %s", rule_id); + + if ((role != NULL) && (text2role(role) == RSC_ROLE_UNKNOWN)) { + pe_err("Bad role specified for %s: %s", rule_id, role); + return NULL; + } + + score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE); + if (score == NULL) { + score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE_ATTRIBUTE); + if (score != NULL) { + raw_score = false; + } + } + if (pcmk__str_eq(boolean, "or", pcmk__str_casei)) { + do_and = false; + } + + location_rule = pcmk__new_location(rule_id, rsc, 0, discovery, NULL, + data_set); + + if (location_rule == NULL) { + return NULL; + } + + if ((re_match_data != NULL) && (re_match_data->nregs > 0) + && (re_match_data->pmatch[0].rm_so != -1) && !raw_score) { + + char *result = pe_expand_re_matches(score, re_match_data); + + if (result != NULL) { + score = result; + score_allocated = true; + } + } + + if (role != NULL) { + crm_trace("Setting role filter: %s", role); + location_rule->role_filter = text2role(role); + if (location_rule->role_filter == RSC_ROLE_UNPROMOTED) { + /* Any promotable clone cannot be promoted without being in the + * unpromoted role first. Ergo, any constraint for the unpromoted + * role applies to every role. + */ + location_rule->role_filter = RSC_ROLE_UNKNOWN; + } + } + if (do_and) { + GList *gIter = NULL; + + match_L = pcmk__copy_node_list(data_set->nodes, true); + for (gIter = match_L; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + + node->weight = get_node_score(rule_id, score, raw_score, node, rsc); + } + } + + for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { + int score_f = 0; + pe_node_t *node = (pe_node_t *) gIter->data; + pe_match_data_t match_data = { + .re = re_match_data, + .params = pe_rsc_params(rsc, node, data_set), + .meta = rsc->meta, + }; + + accept = pe_test_rule(rule_xml, node->details->attrs, RSC_ROLE_UNKNOWN, + data_set->now, next_change, &match_data); + + crm_trace("Rule %s %s on %s", ID(rule_xml), accept? "passed" : "failed", + pe__node_name(node)); + + score_f = get_node_score(rule_id, score, raw_score, node, rsc); + + if (accept) { + pe_node_t *local = pe_find_node_id(match_L, node->details->id); + + if ((local == NULL) && do_and) { + continue; + + } else if (local == NULL) { + local = pe__copy_node(node); + match_L = g_list_append(match_L, local); + } + + if (!do_and) { + local->weight = pcmk__add_scores(local->weight, score_f); + } + crm_trace("%s has score %s after %s", pe__node_name(node), + pcmk_readable_score(local->weight), rule_id); + + } else if (do_and && !accept) { + // Remove it + pe_node_t *delete = pe_find_node_id(match_L, node->details->id); + + if (delete != NULL) { + match_L = g_list_remove(match_L, delete); + crm_trace("%s did not match", pe__node_name(node)); + } + free(delete); + } + } + + if (score_allocated) { + free((char *)score); + } + + location_rule->node_list_rh = match_L; + if (location_rule->node_list_rh == NULL) { + crm_trace("No matching nodes for rule %s", rule_id); + return NULL; + } + + crm_trace("%s: %d nodes matched", + rule_id, g_list_length(location_rule->node_list_rh)); + return location_rule; +} + +static void +unpack_rsc_location(xmlNode *xml_obj, pe_resource_t *rsc, const char *role, + const char *score, pe_working_set_t *data_set, + pe_re_match_data_t *re_match_data) +{ + pe__location_t *location = NULL; + const char *rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE); + const char *discovery = crm_element_value(xml_obj, XML_LOCATION_ATTR_DISCOVERY); + + if (rsc == NULL) { + pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " + "does not exist", id, rsc_id); + return; + } + + if (score == NULL) { + score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + } + + if ((node != NULL) && (score != NULL)) { + int score_i = char2score(score); + pe_node_t *match = pe_find_node(data_set->nodes, node); + + if (!match) { + return; + } + location = pcmk__new_location(id, rsc, score_i, discovery, match, + data_set); + + } else { + bool empty = true; + crm_time_t *next_change = crm_time_new_undefined(); + + /* This loop is logically parallel to pe_evaluate_rules(), except + * instead of checking whether any rule is active, we set up location + * constraints for each active rule. + */ + for (xmlNode *rule_xml = first_named_child(xml_obj, XML_TAG_RULE); + rule_xml != NULL; rule_xml = crm_next_same_xml(rule_xml)) { + empty = false; + crm_trace("Unpacking %s/%s", id, ID(rule_xml)); + generate_location_rule(rsc, rule_xml, discovery, next_change, + data_set, re_match_data); + } + + if (empty) { + pcmk__config_err("Ignoring constraint '%s' because it contains " + "no rules", id); + } + + /* If there is a point in the future when the evaluation of a rule will + * change, make sure the scheduler is re-run by that time. + */ + if (crm_time_is_defined(next_change)) { + time_t t = (time_t) crm_time_get_seconds_since_epoch(next_change); + + pe__update_recheck_time(t, data_set); + } + crm_time_free(next_change); + return; + } + + if (role == NULL) { + role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); + } + + if ((location != NULL) && (role != NULL)) { + if (text2role(role) == RSC_ROLE_UNKNOWN) { + pe_err("Invalid constraint %s: Bad role %s", id, role); + return; + + } else { + enum rsc_role_e r = text2role(role); + switch(r) { + case RSC_ROLE_UNKNOWN: + case RSC_ROLE_STARTED: + case RSC_ROLE_UNPROMOTED: + /* Applies to all */ + location->role_filter = RSC_ROLE_UNKNOWN; + break; + default: + location->role_filter = r; + break; + } + } + } +} + +static void +unpack_simple_location(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + const char *value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); + + if (value) { + pe_resource_t *rsc; + + rsc = pcmk__find_constraint_resource(data_set->resources, value); + unpack_rsc_location(xml_obj, rsc, NULL, NULL, data_set, NULL); + } + + value = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE_PATTERN); + if (value) { + regex_t *r_patt = calloc(1, sizeof(regex_t)); + bool invert = false; + GList *rIter = NULL; + + if (value[0] == '!') { + value++; + invert = true; + } + + if (regcomp(r_patt, value, REG_EXTENDED) != 0) { + pcmk__config_err("Ignoring constraint '%s' because " + XML_LOC_ATTR_SOURCE_PATTERN + " has invalid value '%s'", id, value); + free(r_patt); + return; + } + + for (rIter = data_set->resources; rIter; rIter = rIter->next) { + pe_resource_t *r = rIter->data; + int nregs = 0; + regmatch_t *pmatch = NULL; + int status; + + if(r_patt->re_nsub > 0) { + nregs = r_patt->re_nsub + 1; + } else { + nregs = 1; + } + pmatch = calloc(nregs, sizeof(regmatch_t)); + + status = regexec(r_patt, r->id, nregs, pmatch, 0); + + if (!invert && (status == 0)) { + pe_re_match_data_t re_match_data = { + .string = r->id, + .nregs = nregs, + .pmatch = pmatch + }; + + crm_debug("'%s' matched '%s' for %s", r->id, value, id); + unpack_rsc_location(xml_obj, r, NULL, NULL, data_set, + &re_match_data); + + } else if (invert && (status != 0)) { + crm_debug("'%s' is an inverted match of '%s' for %s", + r->id, value, id); + unpack_rsc_location(xml_obj, r, NULL, NULL, data_set, NULL); + + } else { + crm_trace("'%s' does not match '%s' for %s", r->id, value, id); + } + + free(pmatch); + } + + regfree(r_patt); + free(r_patt); + } +} + +// \return Standard Pacemaker return code +static int +unpack_location_tags(xmlNode *xml_obj, xmlNode **expanded_xml, + pe_working_set_t *data_set) +{ + const char *id = NULL; + const char *rsc_id = NULL; + const char *state = NULL; + pe_resource_t *rsc = NULL; + pe_tag_t *tag = NULL; + xmlNode *rsc_set = NULL; + + *expanded_xml = NULL; + + CRM_CHECK(xml_obj != NULL, return EINVAL); + + id = ID(xml_obj); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return pcmk_rc_unpack_error; + } + + // Check whether there are any resource sets with template or tag references + *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); + if (*expanded_xml != NULL) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_location"); + return pcmk_rc_ok; + } + + rsc_id = crm_element_value(xml_obj, XML_LOC_ATTR_SOURCE); + if (rsc_id == NULL) { + return pcmk_rc_ok; + } + + if (!pcmk__valid_resource_or_tag(data_set, rsc_id, &rsc, &tag)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", id, rsc_id); + return pcmk_rc_unpack_error; + + } else if (rsc != NULL) { + // No template is referenced + return pcmk_rc_ok; + } + + state = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); + + *expanded_xml = copy_xml(xml_obj); + + // Convert template/tag reference in "rsc" into resource_set under constraint + if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, XML_LOC_ATTR_SOURCE, + false, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (rsc_set != NULL) { + if (state != NULL) { + // Move "rsc-role" into converted resource_set as "role" attribute + crm_xml_add(rsc_set, "role", state); + xml_remove_prop(*expanded_xml, XML_RULE_ATTR_ROLE); + } + crm_log_xml_trace(*expanded_xml, "Expanded rsc_location"); + + } else { + // No sets + free_xml(*expanded_xml); + *expanded_xml = NULL; + } + + return pcmk_rc_ok; +} + +// \return Standard Pacemaker return code +static int +unpack_location_set(xmlNode *location, xmlNode *set, pe_working_set_t *data_set) +{ + xmlNode *xml_rsc = NULL; + pe_resource_t *resource = NULL; + const char *set_id; + const char *role; + const char *local_score; + + CRM_CHECK(set != NULL, return EINVAL); + + set_id = ID(set); + if (set_id == NULL) { + pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET " without " + XML_ATTR_ID " in constraint '%s'", + pcmk__s(ID(location), "(missing ID)")); + return pcmk_rc_unpack_error; + } + + role = crm_element_value(set, "role"); + local_score = crm_element_value(set, XML_RULE_ATTR_SCORE); + + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + resource = pcmk__find_constraint_resource(data_set->resources, + ID(xml_rsc)); + if (resource == NULL) { + pcmk__config_err("%s: No resource found for %s", + set_id, ID(xml_rsc)); + return pcmk_rc_unpack_error; + } + + unpack_rsc_location(location, resource, role, local_score, data_set, + NULL); + } + + return pcmk_rc_ok; +} + +void +pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + xmlNode *set = NULL; + bool any_sets = false; + + xmlNode *orig_xml = NULL; + xmlNode *expanded_xml = NULL; + + if (unpack_location_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { + return; + } + + if (expanded_xml) { + orig_xml = xml_obj; + xml_obj = expanded_xml; + } + + for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; + set = crm_next_same_xml(set)) { + + any_sets = true; + set = expand_idref(set, data_set->input); + if ((set == NULL) // Configuration error, message already logged + || (unpack_location_set(xml_obj, set, data_set) != pcmk_rc_ok)) { + + if (expanded_xml) { + free_xml(expanded_xml); + } + return; + } + } + + if (expanded_xml) { + free_xml(expanded_xml); + xml_obj = orig_xml; + } + + if (!any_sets) { + unpack_simple_location(xml_obj, data_set); + } +} + +/*! + * \internal + * \brief Add a new location constraint to a cluster working set + * + * \param[in] id XML ID of location constraint + * \param[in,out] rsc Resource in location constraint + * \param[in] node_weight Constraint score + * \param[in] discover_mode Resource discovery option for constraint + * \param[in] node Node in constraint (or NULL if rule-based) + * \param[in,out] data_set Cluster working set to add constraint to + * + * \return Newly allocated location constraint + * \note The result will be added to \p data_set and should not be freed + * separately. + */ +pe__location_t * +pcmk__new_location(const char *id, pe_resource_t *rsc, + int node_weight, const char *discover_mode, + pe_node_t *node, pe_working_set_t *data_set) +{ + pe__location_t *new_con = NULL; + + if (id == NULL) { + pe_err("Invalid constraint: no ID specified"); + return NULL; + + } else if (rsc == NULL) { + pe_err("Invalid constraint %s: no resource specified", id); + return NULL; + + } else if (node == NULL) { + CRM_CHECK(node_weight == 0, return NULL); + } + + new_con = calloc(1, sizeof(pe__location_t)); + if (new_con != NULL) { + new_con->id = strdup(id); + new_con->rsc_lh = rsc; + new_con->node_list_rh = NULL; + new_con->role_filter = RSC_ROLE_UNKNOWN; + + if (pcmk__str_eq(discover_mode, "always", + pcmk__str_null_matches|pcmk__str_casei)) { + new_con->discover_mode = pe_discover_always; + + } else if (pcmk__str_eq(discover_mode, "never", pcmk__str_casei)) { + new_con->discover_mode = pe_discover_never; + + } else if (pcmk__str_eq(discover_mode, "exclusive", pcmk__str_casei)) { + new_con->discover_mode = pe_discover_exclusive; + rsc->exclusive_discover = TRUE; + + } else { + pe_err("Invalid " XML_LOCATION_ATTR_DISCOVERY " value %s " + "in location constraint", discover_mode); + } + + if (node != NULL) { + pe_node_t *copy = pe__copy_node(node); + + copy->weight = node_weight; + new_con->node_list_rh = g_list_prepend(NULL, copy); + } + + data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, + new_con); + rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); + } + + return new_con; +} + +/*! + * \internal + * \brief Apply all location constraints + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__apply_locations(pe_working_set_t *data_set) +{ + for (GList *iter = data_set->placement_constraints; + iter != NULL; iter = iter->next) { + pe__location_t *location = iter->data; + + location->rsc_lh->cmds->apply_location(location->rsc_lh, location); + } +} + +/*! + * \internal + * \brief Apply a location constraint to a resource's allowed node scores + * + * \param[in,out] rsc Resource to apply constraint to + * \param[in,out] location Location constraint to apply + * + * \note This does not consider the resource's children, so the resource's + * apply_location() method should be used instead in most cases. + */ +void +pcmk__apply_location(pe_resource_t *rsc, pe__location_t *location) +{ + bool need_role = false; + + CRM_CHECK((rsc != NULL) && (location != NULL), return); + + // If a role was specified, ensure constraint is applicable + need_role = (location->role_filter > RSC_ROLE_UNKNOWN); + if (need_role && (location->role_filter != rsc->next_role)) { + pe_rsc_trace(rsc, + "Not applying %s to %s because role will be %s not %s", + location->id, rsc->id, role2text(rsc->next_role), + role2text(location->role_filter)); + return; + } + + if (location->node_list_rh == NULL) { + pe_rsc_trace(rsc, "Not applying %s to %s because no nodes match", + location->id, rsc->id); + return; + } + + pe_rsc_trace(rsc, "Applying %s%s%s to %s", location->id, + (need_role? " for role " : ""), + (need_role? role2text(location->role_filter) : ""), rsc->id); + + for (GList *gIter = location->node_list_rh; gIter != NULL; + gIter = gIter->next) { + + pe_node_t *node = (pe_node_t *) gIter->data; + pe_node_t *weighted_node = NULL; + + weighted_node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, + node->details->id); + if (weighted_node == NULL) { + pe_rsc_trace(rsc, "* = %d on %s", + node->weight, pe__node_name(node)); + weighted_node = pe__copy_node(node); + g_hash_table_insert(rsc->allowed_nodes, + (gpointer) weighted_node->details->id, + weighted_node); + } else { + pe_rsc_trace(rsc, "* + %d on %s", + node->weight, pe__node_name(node)); + weighted_node->weight = pcmk__add_scores(weighted_node->weight, + node->weight); + } + + if (weighted_node->rsc_discover_mode < location->discover_mode) { + if (location->discover_mode == pe_discover_exclusive) { + rsc->exclusive_discover = TRUE; + } + /* exclusive > never > always... always is default */ + weighted_node->rsc_discover_mode = location->discover_mode; + } + } +} diff --git a/lib/pacemaker/pcmk_sched_migration.c b/lib/pacemaker/pcmk_sched_migration.c new file mode 100644 index 0000000..7e6ba8e --- /dev/null +++ b/lib/pacemaker/pcmk_sched_migration.c @@ -0,0 +1,386 @@ +/* + * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Add migration source and target meta-attributes to an action + * + * \param[in,out] action Action to add meta-attributes to + * \param[in] source Node to add as migration source + * \param[in] target Node to add as migration target + */ +static void +add_migration_meta(pe_action_t *action, const pe_node_t *source, + const pe_node_t *target) +{ + add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_SOURCE, + source->details->uname); + + add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_TARGET, + target->details->uname); +} + +/*! + * \internal + * \brief Create internal migration actions for a migrateable resource + * + * \param[in,out] rsc Resource to create migration actions for + * \param[in] current Node that resource is originally active on + */ +void +pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current) +{ + pe_action_t *migrate_to = NULL; + pe_action_t *migrate_from = NULL; + pe_action_t *start = NULL; + pe_action_t *stop = NULL; + + pe_rsc_trace(rsc, "Creating actions to %smigrate %s from %s to %s", + ((rsc->partial_migration_target == NULL)? "" : "partially "), + rsc->id, pe__node_name(current), + pe__node_name(rsc->allocated_to)); + start = start_action(rsc, rsc->allocated_to, TRUE); + stop = stop_action(rsc, current, TRUE); + + if (rsc->partial_migration_target == NULL) { + migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), + RSC_MIGRATE, current, TRUE, TRUE, + rsc->cluster); + } + migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), + RSC_MIGRATED, rsc->allocated_to, TRUE, TRUE, + rsc->cluster); + + if ((migrate_from != NULL) + && ((migrate_to != NULL) || (rsc->partial_migration_target != NULL))) { + + pe__set_action_flags(start, pe_action_migrate_runnable); + pe__set_action_flags(stop, pe_action_migrate_runnable); + + // This is easier than trying to delete it from the graph + pe__set_action_flags(start, pe_action_pseudo); + + if (rsc->partial_migration_target == NULL) { + pe__set_action_flags(migrate_from, pe_action_migrate_runnable); + + if (migrate_to != NULL) { + pe__set_action_flags(migrate_to, pe_action_migrate_runnable); + migrate_to->needs = start->needs; + } + + // Probe -> migrate_to -> migrate_from + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), + NULL, pe_order_optional, rsc->cluster); + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), + NULL, + pe_order_optional|pe_order_implies_first_migratable, + rsc->cluster); + } else { + pe__set_action_flags(migrate_from, pe_action_migrate_runnable); + migrate_from->needs = start->needs; + + // Probe -> migrate_from (migrate_to already completed) + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), + NULL, pe_order_optional, rsc->cluster); + } + + // migrate_from before stop or start + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, + pe_order_optional|pe_order_implies_first_migratable, + rsc->cluster); + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, + pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left, + rsc->cluster); + } + + if (migrate_to != NULL) { + add_migration_meta(migrate_to, current, rsc->allocated_to); + + if (!rsc->is_remote_node) { + /* migrate_to takes place on the source node, but can affect the + * target node depending on how the agent is written. Because of + * this, pending migrate_to actions must be recorded in the CIB, + * in case the source node loses membership while the migrate_to + * action is still in flight. + * + * However we know Pacemaker Remote connection resources don't + * require this, so we skip this for them. (Although it wouldn't + * hurt, and now that record-pending defaults to true, skipping it + * matters even less.) + */ + add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); + } + } + + if (migrate_from != NULL) { + add_migration_meta(migrate_from, current, rsc->allocated_to); + } +} + +/*! + * \internal + * \brief Abort a dangling migration by scheduling a stop (and possibly cleanup) + * + * \param[in] data Source node of dangling migration + * \param[in,out] user_data Resource involved in dangling migration + */ +void +pcmk__abort_dangling_migration(void *data, void *user_data) +{ + const pe_node_t *dangling_source = (const pe_node_t *) data; + pe_resource_t *rsc = (pe_resource_t *) user_data; + + pe_action_t *stop = NULL; + bool cleanup = pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop); + + pe_rsc_trace(rsc, + "Scheduling stop%s for %s on %s due to dangling migration", + (cleanup? " and cleanup" : ""), rsc->id, + pe__node_name(dangling_source)); + stop = stop_action(rsc, dangling_source, FALSE); + pe__set_action_flags(stop, pe_action_dangle); + if (cleanup) { + pcmk__schedule_cleanup(rsc, dangling_source, false); + } +} + +/*! + * \internal + * \brief Check whether a resource can migrate + * + * \param[in] rsc Resource to check + * \param[in] node Resource's current node + * + * \return true if \p rsc can migrate, otherwise false + */ +bool +pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current) +{ + CRM_CHECK(rsc != NULL, return false); + + if (!pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)) { + pe_rsc_trace(rsc, "%s cannot migrate because " + "the configuration does not allow it", + rsc->id); + return false; + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe_rsc_trace(rsc, "%s cannot migrate because it is not managed", + rsc->id); + return false; + } + + if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + pe_rsc_trace(rsc, "%s cannot migrate because it is failed", + rsc->id); + return false; + } + + if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { + pe_rsc_trace(rsc, "%s cannot migrate because it has a start pending", + rsc->id); + return false; + } + + if ((current == NULL) || current->details->unclean) { + pe_rsc_trace(rsc, "%s cannot migrate because " + "its current node (%s) is unclean", + rsc->id, pe__node_name(current)); + return false; + } + + if ((rsc->allocated_to == NULL) || rsc->allocated_to->details->unclean) { + pe_rsc_trace(rsc, "%s cannot migrate because " + "its next node (%s) is unclean", + rsc->id, pe__node_name(rsc->allocated_to)); + return false; + } + + return true; +} + +/*! + * \internal + * \brief Get an action name from an action or operation key + * + * \param[in] action If not NULL, get action name from here + * \param[in] key If not NULL, get action name from here + * + * \return Newly allocated copy of action name (or NULL if none available) + */ +static char * +task_from_action_or_key(const pe_action_t *action, const char *key) +{ + char *res = NULL; + + if (action != NULL) { + res = strdup(action->task); + CRM_ASSERT(res != NULL); + } else if (key != NULL) { + parse_op_key(key, NULL, &res, NULL); + } + return res; +} + +/*! + * \internal + * \brief Order migration actions equivalent to a given ordering + * + * Orderings involving start, stop, demote, and promote actions must be honored + * during a migration as well, so duplicate any such ordering for the + * corresponding migration actions. + * + * \param[in,out] order Ordering constraint to check + */ +void +pcmk__order_migration_equivalents(pe__ordering_t *order) +{ + char *first_task = NULL; + char *then_task = NULL; + bool then_migratable; + bool first_migratable; + + // Only orderings between unrelated resources are relevant + if ((order->lh_rsc == NULL) || (order->rh_rsc == NULL) + || (order->lh_rsc == order->rh_rsc) + || is_parent(order->lh_rsc, order->rh_rsc) + || is_parent(order->rh_rsc, order->lh_rsc)) { + return; + } + + // Only orderings involving at least one migratable resource are relevant + first_migratable = pcmk_is_set(order->lh_rsc->flags, pe_rsc_allow_migrate); + then_migratable = pcmk_is_set(order->rh_rsc->flags, pe_rsc_allow_migrate); + if (!first_migratable && !then_migratable) { + return; + } + + // Check which actions are involved + first_task = task_from_action_or_key(order->lh_action, + order->lh_action_task); + then_task = task_from_action_or_key(order->rh_action, + order->rh_action_task); + + if (pcmk__str_eq(first_task, RSC_START, pcmk__str_none) + && pcmk__str_eq(then_task, RSC_START, pcmk__str_none)) { + + uint32_t flags = pe_order_optional; + + if (first_migratable && then_migratable) { + /* A start then B start + * -> A migrate_from then B migrate_to */ + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_MIGRATED, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), + NULL, flags, order->lh_rsc->cluster); + } + + if (then_migratable) { + if (first_migratable) { + pe__set_order_flags(flags, pe_order_apply_first_non_migratable); + } + + /* A start then B start + * -> A start then B migrate_to (if start is not part of a + * migration) + */ + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_START, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), + NULL, flags, order->lh_rsc->cluster); + } + + } else if (then_migratable + && pcmk__str_eq(first_task, RSC_STOP, pcmk__str_none) + && pcmk__str_eq(then_task, RSC_STOP, pcmk__str_none)) { + + uint32_t flags = pe_order_optional; + + if (first_migratable) { + pe__set_order_flags(flags, pe_order_apply_first_non_migratable); + } + + /* For an ordering "stop A then stop B", if A is moving via restart, and + * B is migrating, enforce that B's migrate_to occurs after A's stop. + */ + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0), NULL, + order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), + NULL, flags, order->lh_rsc->cluster); + + // Also order B's migrate_from after A's stop during partial migrations + if (order->rh_rsc->partial_migration_target) { + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0), + NULL, flags, order->lh_rsc->cluster); + } + + } else if (pcmk__str_eq(first_task, RSC_PROMOTE, pcmk__str_none) + && pcmk__str_eq(then_task, RSC_START, pcmk__str_none)) { + + uint32_t flags = pe_order_optional; + + if (then_migratable) { + /* A promote then B start + * -> A promote then B migrate_to */ + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_PROMOTE, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), + NULL, flags, order->lh_rsc->cluster); + } + + } else if (pcmk__str_eq(first_task, RSC_DEMOTE, pcmk__str_none) + && pcmk__str_eq(then_task, RSC_STOP, pcmk__str_none)) { + + uint32_t flags = pe_order_optional; + + if (then_migratable) { + /* A demote then B stop + * -> A demote then B migrate_to */ + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), + NULL, flags, order->lh_rsc->cluster); + + // Also order B migrate_from after A demote during partial migrations + if (order->rh_rsc->partial_migration_target) { + pcmk__new_ordering(order->lh_rsc, + pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0), + NULL, order->rh_rsc, + pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0), + NULL, flags, order->lh_rsc->cluster); + } + } + } + + free(first_task); + free(then_task); +} diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c new file mode 100644 index 0000000..d7d5ba4 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_nodes.c @@ -0,0 +1,351 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/msg_xml.h> +#include <crm/lrmd.h> // lrmd_event_data_t +#include <crm/common/xml_internal.h> +#include <pacemaker-internal.h> +#include <pacemaker.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Check whether a node is available to run resources + * + * \param[in] node Node to check + * \param[in] consider_score If true, consider a negative score unavailable + * \param[in] consider_guest If true, consider a guest node unavailable whose + * resource will not be active + * + * \return true if node is online and not shutting down, unclean, or in standby + * or maintenance mode, otherwise false + */ +bool +pcmk__node_available(const pe_node_t *node, bool consider_score, + bool consider_guest) +{ + if ((node == NULL) || (node->details == NULL) || !node->details->online + || node->details->shutdown || node->details->unclean + || node->details->standby || node->details->maintenance) { + return false; + } + + if (consider_score && (node->weight < 0)) { + return false; + } + + // @TODO Go through all callers to see which should set consider_guest + if (consider_guest && pe__is_guest_node(node)) { + pe_resource_t *guest = node->details->remote_rsc->container; + + if (guest->fns->location(guest, NULL, FALSE) == NULL) { + return false; + } + } + + return true; +} + +/*! + * \internal + * \brief Copy a hash table of node objects + * + * \param[in] nodes Hash table to copy + * + * \return New copy of nodes (or NULL if nodes is NULL) + */ +GHashTable * +pcmk__copy_node_table(GHashTable *nodes) +{ + GHashTable *new_table = NULL; + GHashTableIter iter; + pe_node_t *node = NULL; + + if (nodes == NULL) { + return NULL; + } + new_table = pcmk__strkey_table(NULL, free); + g_hash_table_iter_init(&iter, nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + pe_node_t *new_node = pe__copy_node(node); + + g_hash_table_insert(new_table, (gpointer) new_node->details->id, + new_node); + } + return new_table; +} + +/*! + * \internal + * \brief Copy a list of node objects + * + * \param[in] list List to copy + * \param[in] reset Set copies' scores to 0 + * + * \return New list of shallow copies of nodes in original list + */ +GList * +pcmk__copy_node_list(const GList *list, bool reset) +{ + GList *result = NULL; + + for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { + pe_node_t *new_node = NULL; + pe_node_t *this_node = (pe_node_t *) gIter->data; + + new_node = pe__copy_node(this_node); + if (reset) { + new_node->weight = 0; + } + result = g_list_prepend(result, new_node); + } + return result; +} + +/*! + * \internal + * \brief Compare two nodes for allocation desirability + * + * Given two nodes, check which one is more preferred by allocation criteria + * such as node weight and utilization. + * + * \param[in] a First node to compare + * \param[in] b Second node to compare + * \param[in] data Node that resource being assigned is active on, if any + * + * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are + * equally preferred + */ +static gint +compare_nodes(gconstpointer a, gconstpointer b, gpointer data) +{ + const pe_node_t *node1 = (const pe_node_t *) a; + const pe_node_t *node2 = (const pe_node_t *) b; + const pe_node_t *active = (const pe_node_t *) data; + + int node1_weight = 0; + int node2_weight = 0; + + int result = 0; + + if (a == NULL) { + return 1; + } + if (b == NULL) { + return -1; + } + + // Compare node weights + + node1_weight = pcmk__node_available(node1, false, false)? node1->weight : -INFINITY; + node2_weight = pcmk__node_available(node2, false, false)? node2->weight : -INFINITY; + + if (node1_weight > node2_weight) { + crm_trace("%s (%d) > %s (%d) : weight", + pe__node_name(node1), node1_weight, pe__node_name(node2), + node2_weight); + return -1; + } + + if (node1_weight < node2_weight) { + crm_trace("%s (%d) < %s (%d) : weight", + pe__node_name(node1), node1_weight, pe__node_name(node2), + node2_weight); + return 1; + } + + crm_trace("%s (%d) == %s (%d) : weight", + pe__node_name(node1), node1_weight, pe__node_name(node2), + node2_weight); + + // If appropriate, compare node utilization + + if (pcmk__str_eq(node1->details->data_set->placement_strategy, "minimal", + pcmk__str_casei)) { + goto equal; + } + + if (pcmk__str_eq(node1->details->data_set->placement_strategy, "balanced", + pcmk__str_casei)) { + result = pcmk__compare_node_capacities(node1, node2); + if (result < 0) { + crm_trace("%s > %s : capacity (%d)", + pe__node_name(node1), pe__node_name(node2), result); + return -1; + } else if (result > 0) { + crm_trace("%s < %s : capacity (%d)", + pe__node_name(node1), pe__node_name(node2), result); + return 1; + } + } + + // Compare number of allocated resources + + if (node1->details->num_resources < node2->details->num_resources) { + crm_trace("%s (%d) > %s (%d) : resources", + pe__node_name(node1), node1->details->num_resources, + pe__node_name(node2), node2->details->num_resources); + return -1; + + } else if (node1->details->num_resources > node2->details->num_resources) { + crm_trace("%s (%d) < %s (%d) : resources", + pe__node_name(node1), node1->details->num_resources, + pe__node_name(node2), node2->details->num_resources); + return 1; + } + + // Check whether one node is already running desired resource + + if (active != NULL) { + if (active->details == node1->details) { + crm_trace("%s (%d) > %s (%d) : active", + pe__node_name(node1), node1->details->num_resources, + pe__node_name(node2), node2->details->num_resources); + return -1; + } else if (active->details == node2->details) { + crm_trace("%s (%d) < %s (%d) : active", + pe__node_name(node1), node1->details->num_resources, + pe__node_name(node2), node2->details->num_resources); + return 1; + } + } + + // If all else is equal, prefer node with lowest-sorting name +equal: + crm_trace("%s = %s", pe__node_name(node1), pe__node_name(node2)); + return strcmp(node1->details->uname, node2->details->uname); +} + +/*! + * \internal + * \brief Sort a list of nodes by allocation desirability + * + * \param[in,out] nodes Node list to sort + * \param[in] active_node Node where resource being assigned is active + * + * \return New head of sorted list + */ +GList * +pcmk__sort_nodes(GList *nodes, pe_node_t *active_node) +{ + return g_list_sort_with_data(nodes, compare_nodes, active_node); +} + +/*! + * \internal + * \brief Check whether any node is available to run resources + * + * \param[in] nodes Nodes to check + * + * \return true if any node in \p nodes is available to run resources, + * otherwise false + */ +bool +pcmk__any_node_available(GHashTable *nodes) +{ + GHashTableIter iter; + const pe_node_t *node = NULL; + + if (nodes == NULL) { + return false; + } + g_hash_table_iter_init(&iter, nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (pcmk__node_available(node, true, false)) { + return true; + } + } + return false; +} + +/*! + * \internal + * \brief Apply node health values for all nodes in cluster + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__apply_node_health(pe_working_set_t *data_set) +{ + int base_health = 0; + enum pcmk__health_strategy strategy; + const char *strategy_str = pe_pref(data_set->config_hash, + PCMK__OPT_NODE_HEALTH_STRATEGY); + + strategy = pcmk__parse_health_strategy(strategy_str); + if (strategy == pcmk__health_strategy_none) { + return; + } + crm_info("Applying node health strategy '%s'", strategy_str); + + // The progressive strategy can use a base health score + if (strategy == pcmk__health_strategy_progressive) { + base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, data_set); + } + + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + int health = pe__sum_node_health_scores(node, base_health); + + // An overall health score of 0 has no effect + if (health == 0) { + continue; + } + crm_info("Overall system health of %s is %d", + pe__node_name(node), health); + + // Use node health as a location score for each resource on the node + for (GList *r = data_set->resources; r != NULL; r = r->next) { + pe_resource_t *rsc = (pe_resource_t *) r->data; + + bool constrain = true; + + if (health < 0) { + /* Negative health scores do not apply to resources with + * allow-unhealthy-nodes=true. + */ + constrain = !crm_is_true(g_hash_table_lookup(rsc->meta, + PCMK__META_ALLOW_UNHEALTHY_NODES)); + } + if (constrain) { + pcmk__new_location(strategy_str, rsc, health, NULL, node, + data_set); + } else { + pe_rsc_trace(rsc, "%s is immune from health ban on %s", + rsc->id, pe__node_name(node)); + } + } + } +} + +/*! + * \internal + * \brief Check for a node in a resource's parent's allowed nodes + * + * \param[in] rsc Resource whose parent should be checked + * \param[in] node Node to check for + * + * \return Equivalent of \p node from \p rsc's parent's allowed nodes if any, + * otherwise NULL + */ +pe_node_t * +pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node) +{ + GHashTable *allowed_nodes = NULL; + + if ((rsc == NULL) || (node == NULL)) { + return NULL; + } else if (rsc->parent == NULL) { + allowed_nodes = rsc->allowed_nodes; + } else { + allowed_nodes = rsc->parent->allowed_nodes; + } + return pe_hash_table_lookup(allowed_nodes, node->details->id); +} diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c new file mode 100644 index 0000000..6629999 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_ordering.c @@ -0,0 +1,1463 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <inttypes.h> // PRIx32 +#include <stdbool.h> +#include <glib.h> + +#include <crm/crm.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +enum pe_order_kind { + pe_order_kind_optional, + pe_order_kind_mandatory, + pe_order_kind_serialize, +}; + +enum ordering_symmetry { + ordering_asymmetric, // the only relation in an asymmetric ordering + ordering_symmetric, // the normal relation in a symmetric ordering + ordering_symmetric_inverse, // the inverse relation in a symmetric ordering +}; + +#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ + __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \ + if (__rsc == NULL) { \ + pcmk__config_err("%s: No resource found for %s", __set, __name); \ + return pcmk_rc_unpack_error; \ + } \ + } while (0) + +static const char * +invert_action(const char *action) +{ + if (pcmk__str_eq(action, RSC_START, pcmk__str_casei)) { + return RSC_STOP; + + } else if (pcmk__str_eq(action, RSC_STOP, pcmk__str_casei)) { + return RSC_START; + + } else if (pcmk__str_eq(action, RSC_PROMOTE, pcmk__str_casei)) { + return RSC_DEMOTE; + + } else if (pcmk__str_eq(action, RSC_DEMOTE, pcmk__str_casei)) { + return RSC_PROMOTE; + + } else if (pcmk__str_eq(action, RSC_PROMOTED, pcmk__str_casei)) { + return RSC_DEMOTED; + + } else if (pcmk__str_eq(action, RSC_DEMOTED, pcmk__str_casei)) { + return RSC_PROMOTED; + + } else if (pcmk__str_eq(action, RSC_STARTED, pcmk__str_casei)) { + return RSC_STOPPED; + + } else if (pcmk__str_eq(action, RSC_STOPPED, pcmk__str_casei)) { + return RSC_STARTED; + } + crm_warn("Unknown action '%s' specified in order constraint", action); + return NULL; +} + +static enum pe_order_kind +get_ordering_type(const xmlNode *xml_obj) +{ + enum pe_order_kind kind_e = pe_order_kind_mandatory; + const char *kind = crm_element_value(xml_obj, XML_ORDER_ATTR_KIND); + + if (kind == NULL) { + const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + + kind_e = pe_order_kind_mandatory; + + if (score) { + // @COMPAT deprecated informally since 1.0.7, formally since 2.0.1 + int score_i = char2score(score); + + if (score_i == 0) { + kind_e = pe_order_kind_optional; + } + pe_warn_once(pe_wo_order_score, + "Support for 'score' in rsc_order is deprecated " + "and will be removed in a future release " + "(use 'kind' instead)"); + } + + } else if (pcmk__str_eq(kind, "Mandatory", pcmk__str_casei)) { + kind_e = pe_order_kind_mandatory; + + } else if (pcmk__str_eq(kind, "Optional", pcmk__str_casei)) { + kind_e = pe_order_kind_optional; + + } else if (pcmk__str_eq(kind, "Serialize", pcmk__str_casei)) { + kind_e = pe_order_kind_serialize; + + } else { + pcmk__config_err("Resetting '" XML_ORDER_ATTR_KIND "' for constraint " + "%s to 'Mandatory' because '%s' is not valid", + pcmk__s(ID(xml_obj), "missing ID"), kind); + } + return kind_e; +} + +/*! + * \internal + * \brief Get ordering symmetry from XML + * + * \param[in] xml_obj Ordering XML + * \param[in] parent_kind Default ordering kind + * \param[in] parent_symmetrical_s Parent element's symmetrical setting, if any + * + * \retval ordering_symmetric Ordering is symmetric + * \retval ordering_asymmetric Ordering is asymmetric + */ +static enum ordering_symmetry +get_ordering_symmetry(const xmlNode *xml_obj, enum pe_order_kind parent_kind, + const char *parent_symmetrical_s) +{ + int rc = pcmk_rc_ok; + bool symmetric = false; + enum pe_order_kind kind = parent_kind; // Default to parent's kind + + // Check ordering XML for explicit kind + if ((crm_element_value(xml_obj, XML_ORDER_ATTR_KIND) != NULL) + || (crm_element_value(xml_obj, XML_RULE_ATTR_SCORE) != NULL)) { + kind = get_ordering_type(xml_obj); + } + + // Check ordering XML (and parent) for explicit symmetrical setting + rc = pcmk__xe_get_bool_attr(xml_obj, XML_CONS_ATTR_SYMMETRICAL, &symmetric); + + if (rc != pcmk_rc_ok && parent_symmetrical_s != NULL) { + symmetric = crm_is_true(parent_symmetrical_s); + rc = pcmk_rc_ok; + } + + if (rc == pcmk_rc_ok) { + if (symmetric) { + if (kind == pe_order_kind_serialize) { + pcmk__config_warn("Ignoring " XML_CONS_ATTR_SYMMETRICAL + " for '%s' because not valid with " + XML_ORDER_ATTR_KIND " of 'Serialize'", + ID(xml_obj)); + } else { + return ordering_symmetric; + } + } + return ordering_asymmetric; + } + + // Use default symmetry + if (kind == pe_order_kind_serialize) { + return ordering_asymmetric; + } + return ordering_symmetric; +} + +/*! + * \internal + * \brief Get ordering flags appropriate to ordering kind + * + * \param[in] kind Ordering kind + * \param[in] first Action name for 'first' action + * \param[in] symmetry This ordering's symmetry role + * + * \return Minimal ordering flags appropriate to \p kind + */ +static uint32_t +ordering_flags_for_kind(enum pe_order_kind kind, const char *first, + enum ordering_symmetry symmetry) +{ + uint32_t flags = pe_order_none; // so we trace-log all flags set + + pe__set_order_flags(flags, pe_order_optional); + + switch (kind) { + case pe_order_kind_optional: + break; + + case pe_order_kind_serialize: + pe__set_order_flags(flags, pe_order_serialize_only); + break; + + case pe_order_kind_mandatory: + switch (symmetry) { + case ordering_asymmetric: + pe__set_order_flags(flags, pe_order_asymmetrical); + break; + + case ordering_symmetric: + pe__set_order_flags(flags, pe_order_implies_then); + if (pcmk__strcase_any_of(first, RSC_START, RSC_PROMOTE, + NULL)) { + pe__set_order_flags(flags, pe_order_runnable_left); + } + break; + + case ordering_symmetric_inverse: + pe__set_order_flags(flags, pe_order_implies_first); + break; + } + break; + } + return flags; +} + +/*! + * \internal + * \brief Find resource corresponding to ID specified in ordering + * + * \param[in] xml Ordering XML + * \param[in] resource_attr XML attribute name for resource ID + * \param[in] instance_attr XML attribute name for instance number. + * This option is deprecated and will be removed in a + * future release. + * \param[in] data_set Cluster working set + * + * \return Resource corresponding to \p id, or NULL if none + */ +static pe_resource_t * +get_ordering_resource(const xmlNode *xml, const char *resource_attr, + const char *instance_attr, + const pe_working_set_t *data_set) +{ + // @COMPAT: instance_attr and instance_id variables deprecated since 2.1.5 + pe_resource_t *rsc = NULL; + const char *rsc_id = crm_element_value(xml, resource_attr); + const char *instance_id = crm_element_value(xml, instance_attr); + + if (rsc_id == NULL) { + pcmk__config_err("Ignoring constraint '%s' without %s", + ID(xml), resource_attr); + return NULL; + } + + rsc = pcmk__find_constraint_resource(data_set->resources, rsc_id); + if (rsc == NULL) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "does not exist", ID(xml), rsc_id); + return NULL; + } + + if (instance_id != NULL) { + pe_warn_once(pe_wo_order_inst, + "Support for " XML_ORDER_ATTR_FIRST_INSTANCE " and " + XML_ORDER_ATTR_THEN_INSTANCE " is deprecated and will be " + "removed in a future release."); + + if (!pe_rsc_is_clone(rsc)) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "is not a clone but instance '%s' was requested", + ID(xml), rsc_id, instance_id); + return NULL; + } + rsc = find_clone_instance(rsc, instance_id); + if (rsc == NULL) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "does not have an instance '%s'", + "'%s'", ID(xml), rsc_id, instance_id); + return NULL; + } + } + return rsc; +} + +/*! + * \internal + * \brief Determine minimum number of 'first' instances required in ordering + * + * \param[in] rsc 'First' resource in ordering + * \param[in] xml Ordering XML + * + * \return Minimum 'first' instances required (or 0 if not applicable) + */ +static int +get_minimum_first_instances(const pe_resource_t *rsc, const xmlNode *xml) +{ + const char *clone_min = NULL; + bool require_all = false; + + if (!pe_rsc_is_clone(rsc)) { + return 0; + } + + clone_min = g_hash_table_lookup(rsc->meta, + XML_RSC_ATTR_INCARNATION_MIN); + if (clone_min != NULL) { + int clone_min_int = 0; + + pcmk__scan_min_int(clone_min, &clone_min_int, 0); + return clone_min_int; + } + + /* @COMPAT 1.1.13: + * require-all=false is deprecated equivalent of clone-min=1 + */ + if (pcmk__xe_get_bool_attr(xml, "require-all", &require_all) != ENODATA) { + pe_warn_once(pe_wo_require_all, + "Support for require-all in ordering constraints " + "is deprecated and will be removed in a future release" + " (use clone-min clone meta-attribute instead)"); + if (!require_all) { + return 1; + } + } + + return 0; +} + +/*! + * \internal + * \brief Create orderings for a constraint with clone-min > 0 + * + * \param[in] id Ordering ID + * \param[in,out] rsc_first 'First' resource in ordering (a clone) + * \param[in] action_first 'First' action in ordering + * \param[in] rsc_then 'Then' resource in ordering + * \param[in] action_then 'Then' action in ordering + * \param[in] flags Ordering flags + * \param[in] clone_min Minimum required instances of 'first' + * \param[in,out] data_set Cluster working set + */ +static void +clone_min_ordering(const char *id, + pe_resource_t *rsc_first, const char *action_first, + pe_resource_t *rsc_then, const char *action_then, + uint32_t flags, int clone_min, pe_working_set_t *data_set) +{ + // Create a pseudo-action for when the minimum instances are active + char *task = crm_strdup_printf(CRM_OP_RELAXED_CLONE ":%s", id); + pe_action_t *clone_min_met = get_pseudo_op(task, data_set); + + free(task); + + /* Require the pseudo-action to have the required number of actions to be + * considered runnable before allowing the pseudo-action to be runnable. + */ + clone_min_met->required_runnable_before = clone_min; + pe__set_action_flags(clone_min_met, pe_action_requires_any); + + // Order the actions for each clone instance before the pseudo-action + for (GList *rIter = rsc_first->children; rIter != NULL; + rIter = rIter->next) { + + pe_resource_t *child = rIter->data; + + pcmk__new_ordering(child, pcmk__op_key(child->id, action_first, 0), + NULL, NULL, NULL, clone_min_met, + pe_order_one_or_more|pe_order_implies_then_printed, + data_set); + } + + // Order "then" action after the pseudo-action (if runnable) + pcmk__new_ordering(NULL, NULL, clone_min_met, rsc_then, + pcmk__op_key(rsc_then->id, action_then, 0), + NULL, flags|pe_order_runnable_left, data_set); +} + +/*! + * \internal + * \brief Update ordering flags for restart-type=restart + * + * \param[in] rsc 'Then' resource in ordering + * \param[in] kind Ordering kind + * \param[in] flag Ordering flag to set (when applicable) + * \param[in,out] flags Ordering flag set to update + * + * \compat The restart-type resource meta-attribute is deprecated. Eventually, + * it will be removed, and pe_restart_ignore will be the only behavior, + * at which time this can just be removed entirely. + */ +#define handle_restart_type(rsc, kind, flag, flags) do { \ + if (((kind) == pe_order_kind_optional) \ + && ((rsc)->restart_type == pe_restart_restart)) { \ + pe__set_order_flags((flags), (flag)); \ + } \ + } while (0) + +/*! + * \internal + * \brief Create new ordering for inverse of symmetric constraint + * + * \param[in] id Ordering ID (for logging only) + * \param[in] kind Ordering kind + * \param[in] rsc_first 'First' resource in ordering (a clone) + * \param[in] action_first 'First' action in ordering + * \param[in,out] rsc_then 'Then' resource in ordering + * \param[in] action_then 'Then' action in ordering + */ +static void +inverse_ordering(const char *id, enum pe_order_kind kind, + pe_resource_t *rsc_first, const char *action_first, + pe_resource_t *rsc_then, const char *action_then) +{ + action_then = invert_action(action_then); + action_first = invert_action(action_first); + if ((action_then == NULL) || (action_first == NULL)) { + pcmk__config_warn("Cannot invert constraint '%s' " + "(please specify inverse manually)", id); + } else { + uint32_t flags = ordering_flags_for_kind(kind, action_first, + ordering_symmetric_inverse); + + handle_restart_type(rsc_then, kind, pe_order_implies_first, flags); + pcmk__order_resource_actions(rsc_then, action_then, rsc_first, + action_first, flags); + } +} + +static void +unpack_simple_rsc_order(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + pe_resource_t *rsc_then = NULL; + pe_resource_t *rsc_first = NULL; + int min_required_before = 0; + enum pe_order_kind kind = pe_order_kind_mandatory; + uint32_t cons_weight = pe_order_none; + enum ordering_symmetry symmetry; + + const char *action_then = NULL; + const char *action_first = NULL; + const char *id = NULL; + + CRM_CHECK(xml_obj != NULL, return); + + id = crm_element_value(xml_obj, XML_ATTR_ID); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return; + } + + rsc_first = get_ordering_resource(xml_obj, XML_ORDER_ATTR_FIRST, + XML_ORDER_ATTR_FIRST_INSTANCE, + data_set); + if (rsc_first == NULL) { + return; + } + + rsc_then = get_ordering_resource(xml_obj, XML_ORDER_ATTR_THEN, + XML_ORDER_ATTR_THEN_INSTANCE, + data_set); + if (rsc_then == NULL) { + return; + } + + action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); + if (action_first == NULL) { + action_first = RSC_START; + } + + action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); + if (action_then == NULL) { + action_then = action_first; + } + + kind = get_ordering_type(xml_obj); + + symmetry = get_ordering_symmetry(xml_obj, kind, NULL); + cons_weight = ordering_flags_for_kind(kind, action_first, symmetry); + + handle_restart_type(rsc_then, kind, pe_order_implies_then, cons_weight); + + /* If there is a minimum number of instances that must be runnable before + * the 'then' action is runnable, we use a pseudo-action for convenience: + * minimum number of clone instances have runnable actions -> + * pseudo-action is runnable -> dependency is runnable. + */ + min_required_before = get_minimum_first_instances(rsc_first, xml_obj); + if (min_required_before > 0) { + clone_min_ordering(id, rsc_first, action_first, rsc_then, action_then, + cons_weight, min_required_before, data_set); + } else { + pcmk__order_resource_actions(rsc_first, action_first, rsc_then, + action_then, cons_weight); + } + + if (symmetry == ordering_symmetric) { + inverse_ordering(id, kind, rsc_first, action_first, + rsc_then, action_then); + } +} + +/*! + * \internal + * \brief Create a new ordering between two actions + * + * \param[in,out] first_rsc Resource for 'first' action (if NULL and + * \p first_action is a resource action, that + * resource will be used) + * \param[in,out] first_action_task Action key for 'first' action (if NULL and + * \p first_action is not NULL, its UUID will + * be used) + * \param[in,out] first_action 'first' action (if NULL, \p first_rsc and + * \p first_action_task must be set) + * + * \param[in] then_rsc Resource for 'then' action (if NULL and + * \p then_action is a resource action, that + * resource will be used) + * \param[in,out] then_action_task Action key for 'then' action (if NULL and + * \p then_action is not NULL, its UUID will + * be used) + * \param[in] then_action 'then' action (if NULL, \p then_rsc and + * \p then_action_task must be set) + * + * \param[in] flags Flag set of enum pe_ordering + * \param[in,out] data_set Cluster working set to add ordering to + * + * \note This function takes ownership of first_action_task and + * then_action_task, which do not need to be freed by the caller. + */ +void +pcmk__new_ordering(pe_resource_t *first_rsc, char *first_action_task, + pe_action_t *first_action, pe_resource_t *then_rsc, + char *then_action_task, pe_action_t *then_action, + uint32_t flags, pe_working_set_t *data_set) +{ + pe__ordering_t *order = NULL; + + // One of action or resource must be specified for each side + CRM_CHECK(((first_action != NULL) || (first_rsc != NULL)) + && ((then_action != NULL) || (then_rsc != NULL)), + free(first_action_task); free(then_action_task); return); + + if ((first_rsc == NULL) && (first_action != NULL)) { + first_rsc = first_action->rsc; + } + if ((then_rsc == NULL) && (then_action != NULL)) { + then_rsc = then_action->rsc; + } + + order = calloc(1, sizeof(pe__ordering_t)); + CRM_ASSERT(order != NULL); + + order->id = data_set->order_id++; + order->flags = flags; + order->lh_rsc = first_rsc; + order->rh_rsc = then_rsc; + order->lh_action = first_action; + order->rh_action = then_action; + order->lh_action_task = first_action_task; + order->rh_action_task = then_action_task; + + if ((order->lh_action_task == NULL) && (first_action != NULL)) { + order->lh_action_task = strdup(first_action->uuid); + } + + if ((order->rh_action_task == NULL) && (then_action != NULL)) { + order->rh_action_task = strdup(then_action->uuid); + } + + if ((order->lh_rsc == NULL) && (first_action != NULL)) { + order->lh_rsc = first_action->rsc; + } + + if ((order->rh_rsc == NULL) && (then_action != NULL)) { + order->rh_rsc = then_action->rsc; + } + + pe_rsc_trace(first_rsc, "Created ordering %d for %s then %s", + (data_set->order_id - 1), + pcmk__s(order->lh_action_task, "an underspecified action"), + pcmk__s(order->rh_action_task, "an underspecified action")); + + data_set->ordering_constraints = g_list_prepend(data_set->ordering_constraints, + order); + pcmk__order_migration_equivalents(order); +} + +/*! + * \brief Unpack a set in an ordering constraint + * + * \param[in] set Set XML to unpack + * \param[in] parent_kind rsc_order XML "kind" attribute + * \param[in] parent_symmetrical_s rsc_order XML "symmetrical" attribute + * \param[in,out] data_set Cluster working set + * + * \return Standard Pacemaker return code + */ +static int +unpack_order_set(const xmlNode *set, enum pe_order_kind parent_kind, + const char *parent_symmetrical_s, pe_working_set_t *data_set) +{ + GList *set_iter = NULL; + GList *resources = NULL; + + pe_resource_t *last = NULL; + pe_resource_t *resource = NULL; + + int local_kind = parent_kind; + bool sequential = false; + uint32_t flags = pe_order_optional; + enum ordering_symmetry symmetry; + + char *key = NULL; + const char *id = ID(set); + const char *action = crm_element_value(set, "action"); + const char *sequential_s = crm_element_value(set, "sequential"); + const char *kind_s = crm_element_value(set, XML_ORDER_ATTR_KIND); + + if (action == NULL) { + action = RSC_START; + } + + if (kind_s) { + local_kind = get_ordering_type(set); + } + if (sequential_s == NULL) { + sequential_s = "1"; + } + + sequential = crm_is_true(sequential_s); + + symmetry = get_ordering_symmetry(set, parent_kind, parent_symmetrical_s); + flags = ordering_flags_for_kind(local_kind, action, symmetry); + + for (const xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, resource, ID(xml_rsc)); + resources = g_list_append(resources, resource); + } + + if (pcmk__list_of_1(resources)) { + crm_trace("Single set: %s", id); + goto done; + } + + set_iter = resources; + while (set_iter != NULL) { + resource = (pe_resource_t *) set_iter->data; + set_iter = set_iter->next; + + key = pcmk__op_key(resource->id, action, 0); + + if (local_kind == pe_order_kind_serialize) { + /* Serialize before everything that comes after */ + + for (GList *gIter = set_iter; gIter != NULL; gIter = gIter->next) { + pe_resource_t *then_rsc = (pe_resource_t *) gIter->data; + char *then_key = pcmk__op_key(then_rsc->id, action, 0); + + pcmk__new_ordering(resource, strdup(key), NULL, then_rsc, + then_key, NULL, flags, data_set); + } + + } else if (sequential) { + if (last != NULL) { + pcmk__order_resource_actions(last, action, resource, action, + flags); + } + last = resource; + } + free(key); + } + + if (symmetry == ordering_asymmetric) { + goto done; + } + + last = NULL; + action = invert_action(action); + + flags = ordering_flags_for_kind(local_kind, action, + ordering_symmetric_inverse); + + set_iter = resources; + while (set_iter != NULL) { + resource = (pe_resource_t *) set_iter->data; + set_iter = set_iter->next; + + if (sequential) { + if (last != NULL) { + pcmk__order_resource_actions(resource, action, last, action, + flags); + } + last = resource; + } + } + + done: + g_list_free(resources); + return pcmk_rc_ok; +} + +/*! + * \brief Order two resource sets relative to each other + * + * \param[in] id Ordering ID (for logging) + * \param[in] set1 First listed set + * \param[in] set2 Second listed set + * \param[in] kind Ordering kind + * \param[in,out] data_set Cluster working set + * \param[in] symmetry Which ordering symmetry applies to this relation + * + * \return Standard Pacemaker return code + */ +static int +order_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, + enum pe_order_kind kind, pe_working_set_t *data_set, + enum ordering_symmetry symmetry) +{ + + const xmlNode *xml_rsc = NULL; + const xmlNode *xml_rsc_2 = NULL; + + pe_resource_t *rsc_1 = NULL; + pe_resource_t *rsc_2 = NULL; + + const char *action_1 = crm_element_value(set1, "action"); + const char *action_2 = crm_element_value(set2, "action"); + + uint32_t flags = pe_order_none; + + bool require_all = true; + + (void) pcmk__xe_get_bool_attr(set1, "require-all", &require_all); + + if (action_1 == NULL) { + action_1 = RSC_START; + } + + if (action_2 == NULL) { + action_2 = RSC_START; + } + + if (symmetry == ordering_symmetric_inverse) { + action_1 = invert_action(action_1); + action_2 = invert_action(action_2); + } + + if (pcmk__str_eq(RSC_STOP, action_1, pcmk__str_casei) + || pcmk__str_eq(RSC_DEMOTE, action_1, pcmk__str_casei)) { + /* Assuming: A -> ( B || C) -> D + * The one-or-more logic only applies during the start/promote phase. + * During shutdown neither B nor can shutdown until D is down, so simply + * turn require_all back on. + */ + require_all = true; + } + + flags = ordering_flags_for_kind(kind, action_1, symmetry); + + /* If we have an unordered set1, whether it is sequential or not is + * irrelevant in regards to set2. + */ + if (!require_all) { + char *task = crm_strdup_printf(CRM_OP_RELAXED_SET ":%s", ID(set1)); + pe_action_t *unordered_action = get_pseudo_op(task, data_set); + + free(task); + pe__set_action_flags(unordered_action, pe_action_requires_any); + + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + + /* Add an ordering constraint between every element in set1 and the + * pseudo action. If any action in set1 is runnable the pseudo + * action will be runnable. + */ + pcmk__new_ordering(rsc_1, pcmk__op_key(rsc_1->id, action_1, 0), + NULL, NULL, NULL, unordered_action, + pe_order_one_or_more|pe_order_implies_then_printed, + data_set); + } + for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); + + /* Add an ordering constraint between the pseudo-action and every + * element in set2. If the pseudo-action is runnable, every action + * in set2 will be runnable. + */ + pcmk__new_ordering(NULL, NULL, unordered_action, + rsc_2, pcmk__op_key(rsc_2->id, action_2, 0), + NULL, flags|pe_order_runnable_left, data_set); + } + + return pcmk_rc_ok; + } + + if (pcmk__xe_attr_is_true(set1, "sequential")) { + if (symmetry == ordering_symmetric_inverse) { + // Get the first one + xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + if (xml_rsc != NULL) { + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + } + + } else { + // Get the last one + const char *rid = NULL; + + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + rid = ID(xml_rsc); + } + EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid); + } + } + + if (pcmk__xe_attr_is_true(set2, "sequential")) { + if (symmetry == ordering_symmetric_inverse) { + // Get the last one + const char *rid = NULL; + + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + rid = ID(xml_rsc); + } + EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); + + } else { + // Get the first one + xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + if (xml_rsc != NULL) { + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); + } + } + } + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { + pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); + + } else if (rsc_1 != NULL) { + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); + pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, + flags); + } + + } else if (rsc_2 != NULL) { + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, + flags); + } + + } else { + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + + for (xmlNode *xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); + pcmk__order_resource_actions(rsc_1, action_1, rsc_2, + action_2, flags); + } + } + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief If an ordering constraint uses resource tags, expand them + * + * \param[in,out] xml_obj Ordering constraint XML + * \param[out] expanded_xml Equivalent XML with tags expanded + * \param[in] data_set Cluster working set + * + * \return Standard Pacemaker return code (specifically, pcmk_rc_ok on success, + * and pcmk_rc_unpack_error on invalid configuration) + */ +static int +unpack_order_tags(xmlNode *xml_obj, xmlNode **expanded_xml, + const pe_working_set_t *data_set) +{ + const char *id_first = NULL; + const char *id_then = NULL; + const char *action_first = NULL; + const char *action_then = NULL; + + pe_resource_t *rsc_first = NULL; + pe_resource_t *rsc_then = NULL; + pe_tag_t *tag_first = NULL; + pe_tag_t *tag_then = NULL; + + xmlNode *rsc_set_first = NULL; + xmlNode *rsc_set_then = NULL; + bool any_sets = false; + + // Check whether there are any resource sets with template or tag references + *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); + if (*expanded_xml != NULL) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_order"); + return pcmk_rc_ok; + } + + id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); + id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); + if ((id_first == NULL) || (id_then == NULL)) { + return pcmk_rc_ok; + } + + if (!pcmk__valid_resource_or_tag(data_set, id_first, &rsc_first, + &tag_first)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", ID(xml_obj), id_first); + return pcmk_rc_unpack_error; + } + + if (!pcmk__valid_resource_or_tag(data_set, id_then, &rsc_then, &tag_then)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", ID(xml_obj), id_then); + return pcmk_rc_unpack_error; + } + + if ((rsc_first != NULL) && (rsc_then != NULL)) { + // Neither side references a template or tag + return pcmk_rc_ok; + } + + action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); + action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); + + *expanded_xml = copy_xml(xml_obj); + + // Convert template/tag reference in "first" into resource_set under constraint + if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_first, XML_ORDER_ATTR_FIRST, + true, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (rsc_set_first != NULL) { + if (action_first != NULL) { + // Move "first-action" into converted resource_set as "action" + crm_xml_add(rsc_set_first, "action", action_first); + xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_FIRST_ACTION); + } + any_sets = true; + } + + // Convert template/tag reference in "then" into resource_set under constraint + if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_then, XML_ORDER_ATTR_THEN, + true, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (rsc_set_then != NULL) { + if (action_then != NULL) { + // Move "then-action" into converted resource_set as "action" + crm_xml_add(rsc_set_then, "action", action_then); + xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_THEN_ACTION); + } + any_sets = true; + } + + if (any_sets) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_order"); + } else { + free_xml(*expanded_xml); + *expanded_xml = NULL; + } + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Unpack ordering constraint XML + * + * \param[in,out] xml_obj Ordering constraint XML to unpack + * \param[in,out] data_set Cluster working set + */ +void +pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + xmlNode *set = NULL; + xmlNode *last = NULL; + + xmlNode *orig_xml = NULL; + xmlNode *expanded_xml = NULL; + + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); + const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); + enum pe_order_kind kind = get_ordering_type(xml_obj); + + enum ordering_symmetry symmetry = get_ordering_symmetry(xml_obj, kind, + NULL); + + // Expand any resource tags in the constraint XML + if (unpack_order_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { + return; + } + if (expanded_xml != NULL) { + orig_xml = xml_obj; + xml_obj = expanded_xml; + } + + // If the constraint has resource sets, unpack them + for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); + set != NULL; set = crm_next_same_xml(set)) { + + set = expand_idref(set, data_set->input); + if ((set == NULL) // Configuration error, message already logged + || (unpack_order_set(set, kind, invert, data_set) != pcmk_rc_ok)) { + + if (expanded_xml != NULL) { + free_xml(expanded_xml); + } + return; + } + + if (last != NULL) { + + if (order_rsc_sets(id, last, set, kind, data_set, + symmetry) != pcmk_rc_ok) { + if (expanded_xml != NULL) { + free_xml(expanded_xml); + } + return; + } + + if ((symmetry == ordering_symmetric) + && (order_rsc_sets(id, set, last, kind, data_set, + ordering_symmetric_inverse) != pcmk_rc_ok)) { + if (expanded_xml != NULL) { + free_xml(expanded_xml); + } + return; + } + + } + last = set; + } + + if (expanded_xml) { + free_xml(expanded_xml); + xml_obj = orig_xml; + } + + // If the constraint has no resource sets, unpack it as a simple ordering + if (last == NULL) { + return unpack_simple_rsc_order(xml_obj, data_set); + } +} + +static bool +ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input) +{ + /* Prevent user-defined ordering constraints between resources + * running in a guest node and the resource that defines that node. + */ + if (!pcmk_is_set(input->type, pe_order_preserve) + && (input->action->rsc != NULL) + && pcmk__rsc_corresponds_to_guest(action->rsc, input->action->node)) { + + crm_warn("Invalid ordering constraint between %s and %s", + input->action->rsc->id, action->rsc->id); + return true; + } + + /* If there's an order like + * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" + * + * then rscA is being migrated from node1 to node2, while rscB is being + * migrated from node2 to node1. If there would be a graph loop, + * break the order "load_stopped_node2" -> "rscA_migrate_to node1". + */ + if ((input->type == pe_order_load) && action->rsc + && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei) + && pcmk__graph_has_loop(action, action, input)) { + return true; + } + + return false; +} + +void +pcmk__disable_invalid_orderings(pe_working_set_t *data_set) +{ + for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) { + pe_action_t *action = (pe_action_t *) iter->data; + pe_action_wrapper_t *input = NULL; + + for (GList *input_iter = action->actions_before; + input_iter != NULL; input_iter = input_iter->next) { + + input = (pe_action_wrapper_t *) input_iter->data; + if (ordering_is_invalid(action, input)) { + input->type = pe_order_none; + } + } + } +} + +/*! + * \internal + * \brief Order stops on a node before the node's shutdown + * + * \param[in,out] node Node being shut down + * \param[in] shutdown_op Shutdown action for node + */ +void +pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op) +{ + for (GList *iter = node->details->data_set->actions; + iter != NULL; iter = iter->next) { + + pe_action_t *action = (pe_action_t *) iter->data; + + // Only stops on the node shutting down are relevant + if ((action->rsc == NULL) || (action->node == NULL) + || (action->node->details != node->details) + || !pcmk__str_eq(action->task, RSC_STOP, pcmk__str_casei)) { + continue; + } + + // Resources and nodes in maintenance mode won't be touched + + if (pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)) { + pe_rsc_trace(action->rsc, + "Not ordering %s before shutdown of %s because " + "resource in maintenance mode", + action->uuid, pe__node_name(node)); + continue; + + } else if (node->details->maintenance) { + pe_rsc_trace(action->rsc, + "Not ordering %s before shutdown of %s because " + "node in maintenance mode", + action->uuid, pe__node_name(node)); + continue; + } + + /* Don't touch a resource that is unmanaged or blocked, to avoid + * blocking the shutdown (though if another action depends on this one, + * we may still end up blocking) + */ + if (!pcmk_any_flags_set(action->rsc->flags, + pe_rsc_managed|pe_rsc_block)) { + pe_rsc_trace(action->rsc, + "Not ordering %s before shutdown of %s because " + "resource is unmanaged or blocked", + action->uuid, pe__node_name(node)); + continue; + } + + pe_rsc_trace(action->rsc, "Ordering %s before shutdown of %s", + action->uuid, pe__node_name(node)); + pe__clear_action_flags(action, pe_action_optional); + pcmk__new_ordering(action->rsc, NULL, action, NULL, + strdup(CRM_OP_SHUTDOWN), shutdown_op, + pe_order_optional|pe_order_runnable_left, + node->details->data_set); + } +} + +/*! + * \brief Find resource actions matching directly or as child + * + * \param[in] rsc Resource to check + * \param[in] original_key Action key to search for (possibly referencing + * parent of \rsc) + * + * \return Newly allocated list of matching actions + * \note It is the caller's responsibility to free the result with g_list_free() + */ +static GList * +find_actions_by_task(const pe_resource_t *rsc, const char *original_key) +{ + // Search under given task key directly + GList *list = find_actions(rsc->actions, original_key, NULL); + + if (list == NULL) { + // Search again using this resource's ID + char *key = NULL; + char *task = NULL; + guint interval_ms = 0; + + if (parse_op_key(original_key, NULL, &task, &interval_ms)) { + key = pcmk__op_key(rsc->id, task, interval_ms); + list = find_actions(rsc->actions, key, NULL); + free(key); + free(task); + } else { + crm_err("Invalid operation key (bug?): %s", original_key); + } + } + return list; +} + +/*! + * \internal + * \brief Order relevant resource actions after a given action + * + * \param[in,out] first_action Action to order after (or NULL if none runnable) + * \param[in] rsc Resource whose actions should be ordered + * \param[in,out] order Ordering constraint being applied + */ +static void +order_resource_actions_after(pe_action_t *first_action, + const pe_resource_t *rsc, pe__ordering_t *order) +{ + GList *then_actions = NULL; + uint32_t flags = pe_order_none; + + CRM_CHECK((rsc != NULL) && (order != NULL), return); + + flags = order->flags; + pe_rsc_trace(rsc, "Applying ordering %d for 'then' resource %s", + order->id, rsc->id); + + if (order->rh_action != NULL) { + then_actions = g_list_prepend(NULL, order->rh_action); + + } else { + then_actions = find_actions_by_task(rsc, order->rh_action_task); + } + + if (then_actions == NULL) { + pe_rsc_trace(rsc, "Ignoring ordering %d: no %s actions found for %s", + order->id, order->rh_action_task, rsc->id); + return; + } + + if ((first_action != NULL) && (first_action->rsc == rsc) + && pcmk_is_set(first_action->flags, pe_action_dangle)) { + + pe_rsc_trace(rsc, + "Detected dangling migration ordering (%s then %s %s)", + first_action->uuid, order->rh_action_task, rsc->id); + pe__clear_order_flags(flags, pe_order_implies_then); + } + + if ((first_action == NULL) && !pcmk_is_set(flags, pe_order_implies_then)) { + pe_rsc_debug(rsc, + "Ignoring ordering %d for %s: No first action found", + order->id, rsc->id); + g_list_free(then_actions); + return; + } + + for (GList *iter = then_actions; iter != NULL; iter = iter->next) { + pe_action_t *then_action_iter = (pe_action_t *) iter->data; + + if (first_action != NULL) { + order_actions(first_action, then_action_iter, flags); + } else { + pe__clear_action_flags(then_action_iter, pe_action_runnable); + crm_warn("%s of %s is unrunnable because there is no %s of %s " + "to order it after", then_action_iter->task, rsc->id, + order->lh_action_task, order->lh_rsc->id); + } + } + + g_list_free(then_actions); +} + +static void +rsc_order_first(pe_resource_t *first_rsc, pe__ordering_t *order, + pe_working_set_t *data_set) +{ + GList *first_actions = NULL; + pe_action_t *first_action = order->lh_action; + pe_resource_t *then_rsc = order->rh_rsc; + + CRM_ASSERT(first_rsc != NULL); + pe_rsc_trace(first_rsc, "Applying ordering constraint %d (first: %s)", + order->id, first_rsc->id); + + if (first_action != NULL) { + first_actions = g_list_prepend(NULL, first_action); + + } else { + first_actions = find_actions_by_task(first_rsc, order->lh_action_task); + } + + if ((first_actions == NULL) && (first_rsc == then_rsc)) { + pe_rsc_trace(first_rsc, + "Ignoring constraint %d: first (%s for %s) not found", + order->id, order->lh_action_task, first_rsc->id); + + } else if (first_actions == NULL) { + char *key = NULL; + char *op_type = NULL; + guint interval_ms = 0; + + parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); + key = pcmk__op_key(first_rsc->id, op_type, interval_ms); + + if ((first_rsc->fns->state(first_rsc, TRUE) == RSC_ROLE_STOPPED) + && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) { + free(key); + pe_rsc_trace(first_rsc, + "Ignoring constraint %d: first (%s for %s) not found", + order->id, order->lh_action_task, first_rsc->id); + + } else if ((first_rsc->fns->state(first_rsc, TRUE) == RSC_ROLE_UNPROMOTED) + && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) { + free(key); + pe_rsc_trace(first_rsc, + "Ignoring constraint %d: first (%s for %s) not found", + order->id, order->lh_action_task, first_rsc->id); + + } else { + pe_rsc_trace(first_rsc, + "Creating first (%s for %s) for constraint %d ", + order->lh_action_task, first_rsc->id, order->id); + first_action = custom_action(first_rsc, key, op_type, NULL, TRUE, + TRUE, data_set); + first_actions = g_list_prepend(NULL, first_action); + } + + free(op_type); + } + + if (then_rsc == NULL) { + if (order->rh_action == NULL) { + pe_rsc_trace(first_rsc, "Ignoring constraint %d: then not found", + order->id); + return; + } + then_rsc = order->rh_action->rsc; + } + for (GList *gIter = first_actions; gIter != NULL; gIter = gIter->next) { + first_action = (pe_action_t *) gIter->data; + + if (then_rsc == NULL) { + order_actions(first_action, order->rh_action, order->flags); + + } else { + order_resource_actions_after(first_action, then_rsc, order); + } + } + + g_list_free(first_actions); +} + +void +pcmk__apply_orderings(pe_working_set_t *data_set) +{ + crm_trace("Applying ordering constraints"); + + /* Ordering constraints need to be processed in the order they were created. + * rsc_order_first() and order_resource_actions_after() require the relevant + * actions to already exist in some cases, but rsc_order_first() will create + * the 'first' action in certain cases. Thus calling rsc_order_first() can + * change the behavior of later-created orderings. + * + * Also, g_list_append() should be avoided for performance reasons, so we + * prepend orderings when creating them and reverse the list here. + * + * @TODO This is brittle and should be carefully redesigned so that the + * order of creation doesn't matter, and the reverse becomes unneeded. + */ + data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); + + for (GList *gIter = data_set->ordering_constraints; + gIter != NULL; gIter = gIter->next) { + + pe__ordering_t *order = gIter->data; + pe_resource_t *rsc = order->lh_rsc; + + if (rsc != NULL) { + rsc_order_first(rsc, order, data_set); + continue; + } + + rsc = order->rh_rsc; + if (rsc != NULL) { + order_resource_actions_after(order->lh_action, rsc, order); + + } else { + crm_trace("Applying ordering constraint %d (non-resource actions)", + order->id); + order_actions(order->lh_action, order->rh_action, order->flags); + } + } + + g_list_foreach(data_set->actions, (GFunc) pcmk__block_colocation_dependents, + data_set); + + crm_trace("Ordering probes"); + pcmk__order_probes(data_set); + + crm_trace("Updating %d actions", g_list_length(data_set->actions)); + g_list_foreach(data_set->actions, + (GFunc) pcmk__update_action_for_orderings, data_set); + + pcmk__disable_invalid_orderings(data_set); +} + +/*! + * \internal + * \brief Order a given action after each action in a given list + * + * \param[in,out] after "After" action + * \param[in,out] list List of "before" actions + */ +void +pcmk__order_after_each(pe_action_t *after, GList *list) +{ + const char *after_desc = (after->task == NULL)? after->uuid : after->task; + + for (GList *iter = list; iter != NULL; iter = iter->next) { + pe_action_t *before = (pe_action_t *) iter->data; + const char *before_desc = before->task? before->task : before->uuid; + + crm_debug("Ordering %s on %s before %s on %s", + before_desc, pe__node_name(before->node), + after_desc, pe__node_name(after->node)); + order_actions(before, after, pe_order_optional); + } +} + +/*! + * \internal + * \brief Order promotions and demotions for restarts of a clone or bundle + * + * \param[in,out] rsc Clone or bundle to order + */ +void +pcmk__promotable_restart_ordering(pe_resource_t *rsc) +{ + // Order start and promote after all instances are stopped + pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_PROMOTE, + pe_order_optional); + + // Order stop, start, and promote after all instances are demoted + pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_START, + pe_order_optional); + pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE, + pe_order_optional); + + // Order promote after all instances are started + pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, + pe_order_optional); + + // Order demote after all instances are demoted + pcmk__order_resource_actions(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, + pe_order_optional); +} diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c new file mode 100644 index 0000000..aefbf9a --- /dev/null +++ b/lib/pacemaker/pcmk_sched_primitive.c @@ -0,0 +1,1573 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); +static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); +static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); +static void promote_resource(pe_resource_t *rsc, pe_node_t *node, + bool optional); +static void assert_role_error(pe_resource_t *rsc, pe_node_t *node, + bool optional); + +static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { + /* This array lists the immediate next role when transitioning from one role + * to a target role. For example, when going from Stopped to Promoted, the + * next role is Unpromoted, because the resource must be started before it + * can be promoted. The current state then becomes Started, which is fed + * into this array again, giving a next role of Promoted. + * + * Current role Immediate next role Final target role + * ------------ ------------------- ----------------- + */ + /* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */ + RSC_ROLE_STOPPED, /* Stopped */ + RSC_ROLE_STOPPED, /* Started */ + RSC_ROLE_STOPPED, /* Unpromoted */ + RSC_ROLE_STOPPED, /* Promoted */ + }, + /* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */ + RSC_ROLE_STOPPED, /* Stopped */ + RSC_ROLE_STARTED, /* Started */ + RSC_ROLE_UNPROMOTED, /* Unpromoted */ + RSC_ROLE_UNPROMOTED, /* Promoted */ + }, + /* Started */ { RSC_ROLE_STOPPED, /* Unknown */ + RSC_ROLE_STOPPED, /* Stopped */ + RSC_ROLE_STARTED, /* Started */ + RSC_ROLE_UNPROMOTED, /* Unpromoted */ + RSC_ROLE_PROMOTED, /* Promoted */ + }, + /* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */ + RSC_ROLE_STOPPED, /* Stopped */ + RSC_ROLE_STOPPED, /* Started */ + RSC_ROLE_UNPROMOTED, /* Unpromoted */ + RSC_ROLE_PROMOTED, /* Promoted */ + }, + /* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */ + RSC_ROLE_UNPROMOTED, /* Stopped */ + RSC_ROLE_UNPROMOTED, /* Started */ + RSC_ROLE_UNPROMOTED, /* Unpromoted */ + RSC_ROLE_PROMOTED, /* Promoted */ + }, +}; + +/*! + * \internal + * \brief Function to schedule actions needed for a role change + * + * \param[in,out] rsc Resource whose role is changing + * \param[in,out] node Node where resource will be in its next role + * \param[in] optional Whether scheduled actions should be optional + */ +typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node, + bool optional); + +static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { + /* This array lists the function needed to transition directly from one role + * to another. NULL indicates that nothing is needed. + * + * Current role Transition function Next role + * ------------ ------------------- ---------- + */ + /* Unknown */ { assert_role_error, /* Unknown */ + stop_resource, /* Stopped */ + assert_role_error, /* Started */ + assert_role_error, /* Unpromoted */ + assert_role_error, /* Promoted */ + }, + /* Stopped */ { assert_role_error, /* Unknown */ + NULL, /* Stopped */ + start_resource, /* Started */ + start_resource, /* Unpromoted */ + assert_role_error, /* Promoted */ + }, + /* Started */ { assert_role_error, /* Unknown */ + stop_resource, /* Stopped */ + NULL, /* Started */ + NULL, /* Unpromoted */ + promote_resource, /* Promoted */ + }, + /* Unpromoted */ { assert_role_error, /* Unknown */ + stop_resource, /* Stopped */ + stop_resource, /* Started */ + NULL, /* Unpromoted */ + promote_resource, /* Promoted */ + }, + /* Promoted */ { assert_role_error, /* Unknown */ + demote_resource, /* Stopped */ + demote_resource, /* Started */ + demote_resource, /* Unpromoted */ + NULL, /* Promoted */ + }, +}; + +/*! + * \internal + * \brief Get a list of a resource's allowed nodes sorted by node weight + * + * \param[in] rsc Resource to check + * + * \return List of allowed nodes sorted by node weight + */ +static GList * +sorted_allowed_nodes(const pe_resource_t *rsc) +{ + if (rsc->allowed_nodes != NULL) { + GList *nodes = g_hash_table_get_values(rsc->allowed_nodes); + + if (nodes != NULL) { + return pcmk__sort_nodes(nodes, pe__current_node(rsc)); + } + } + return NULL; +} + +/*! + * \internal + * \brief Assign a resource to its best allowed node, if possible + * + * \param[in,out] rsc Resource to choose a node for + * \param[in] prefer If not NULL, prefer this node when all else equal + * + * \return true if \p rsc could be assigned to a node, otherwise false + */ +static bool +assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer) +{ + GList *nodes = NULL; + pe_node_t *chosen = NULL; + pe_node_t *best = NULL; + bool result = false; + const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc); + + if (prefer == NULL) { + prefer = most_free_node; + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + // We've already finished assignment of resources to nodes + return rsc->allocated_to != NULL; + } + + // Sort allowed nodes by weight + nodes = sorted_allowed_nodes(rsc); + if (nodes != NULL) { + best = (pe_node_t *) nodes->data; // First node has best score + } + + if ((prefer != NULL) && (nodes != NULL)) { + // Get the allowed node version of prefer + chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); + + if (chosen == NULL) { + pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", + pe__node_name(prefer), rsc->id); + + /* Favor the preferred node as long as its weight is at least as good as + * the best allowed node's. + * + * An alternative would be to favor the preferred node even if the best + * node is better, when the best node's weight is less than INFINITY. + */ + } else if (chosen->weight < best->weight) { + pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", + pe__node_name(chosen), rsc->id); + chosen = NULL; + + } else if (!pcmk__node_available(chosen, true, false)) { + pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", + pe__node_name(chosen), rsc->id); + chosen = NULL; + + } else { + pe_rsc_trace(rsc, + "Chose preferred node %s for %s (ignoring %d candidates)", + pe__node_name(chosen), rsc->id, g_list_length(nodes)); + } + } + + if ((chosen == NULL) && (best != NULL)) { + /* Either there is no preferred node, or the preferred node is not + * suitable, but another node is allowed to run the resource. + */ + + chosen = best; + + if (!pe_rsc_is_unique_clone(rsc->parent) + && (chosen->weight > 0) // Zero not acceptable + && pcmk__node_available(chosen, false, false)) { + /* If the resource is already running on a node, prefer that node if + * it is just as good as the chosen node. + * + * We don't do this for unique clone instances, because + * pcmk__assign_instances() has already assigned instances to their + * running nodes when appropriate, and if we get here, we don't want + * remaining unassigned instances to prefer a node that's already + * running another instance. + */ + pe_node_t *running = pe__current_node(rsc); + + if (running == NULL) { + // Nothing to do + + } else if (!pcmk__node_available(running, true, false)) { + pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", + rsc->id, pe__node_name(running)); + + } else { + int nodes_with_best_score = 1; + + for (GList *iter = nodes->next; iter; iter = iter->next) { + pe_node_t *allowed = (pe_node_t *) iter->data; + + if (allowed->weight != chosen->weight) { + // The nodes are sorted by weight, so no more are equal + break; + } + if (pe__same_node(allowed, running)) { + // Scores are equal, so prefer the current node + chosen = allowed; + } + nodes_with_best_score++; + } + + if (nodes_with_best_score > 1) { + do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO), + "Chose %s for %s from %d nodes with score %s", + pe__node_name(chosen), rsc->id, + nodes_with_best_score, + pcmk_readable_score(chosen->weight)); + } + } + } + + pe_rsc_trace(rsc, "Chose %s for %s from %d candidates", + pe__node_name(chosen), rsc->id, g_list_length(nodes)); + } + + result = pcmk__finalize_assignment(rsc, chosen, false); + g_list_free(nodes); + return result; +} + +/*! + * \internal + * \brief Apply a "this with" colocation to a node's allowed node scores + * + * \param[in,out] data Colocation to apply + * \param[in,out] user_data Resource being assigned + */ +static void +apply_this_with(gpointer data, gpointer user_data) +{ + pcmk__colocation_t *colocation = (pcmk__colocation_t *) data; + pe_resource_t *rsc = (pe_resource_t *) user_data; + + GHashTable *archive = NULL; + pe_resource_t *other = colocation->primary; + + // In certain cases, we will need to revert the node scores + if ((colocation->dependent_role >= RSC_ROLE_PROMOTED) + || ((colocation->score < 0) && (colocation->score > -INFINITY))) { + archive = pcmk__copy_node_table(rsc->allowed_nodes); + } + + if (pcmk_is_set(other->flags, pe_rsc_provisional)) { + pe_rsc_trace(rsc, + "%s: Assigning colocation %s primary %s first" + "(score=%d role=%s)", + rsc->id, colocation->id, other->id, + colocation->score, role2text(colocation->dependent_role)); + other->cmds->assign(other, NULL); + } + + // Apply the colocation score to this resource's allowed node scores + rsc->cmds->apply_coloc_score(rsc, other, colocation, true); + if ((archive != NULL) + && !pcmk__any_node_available(rsc->allowed_nodes)) { + pe_rsc_info(rsc, + "%s: Reverting scores from colocation with %s " + "because no nodes allowed", + rsc->id, other->id); + g_hash_table_destroy(rsc->allowed_nodes); + rsc->allowed_nodes = archive; + archive = NULL; + } + if (archive != NULL) { + g_hash_table_destroy(archive); + } +} + +/*! + * \internal + * \brief Update a Pacemaker Remote node once its connection has been assigned + * + * \param[in] connection Connection resource that has been assigned + */ +static void +remote_connection_assigned(const pe_resource_t *connection) +{ + pe_node_t *remote_node = pe_find_node(connection->cluster->nodes, + connection->id); + + CRM_CHECK(remote_node != NULL, return); + + if ((connection->allocated_to != NULL) + && (connection->next_role != RSC_ROLE_STOPPED)) { + + crm_trace("Pacemaker Remote node %s will be online", + remote_node->details->id); + remote_node->details->online = TRUE; + if (remote_node->details->unseen) { + // Avoid unnecessary fence, since we will attempt connection + remote_node->details->unclean = FALSE; + } + + } else { + crm_trace("Pacemaker Remote node %s will be shut down " + "(%sassigned connection's next role is %s)", + remote_node->details->id, + ((connection->allocated_to == NULL)? "un" : ""), + role2text(connection->next_role)); + remote_node->details->shutdown = TRUE; + } +} + +/*! + * \internal + * \brief Assign a primitive resource to a node + * + * \param[in,out] rsc Resource to assign to a node + * \param[in] prefer Node to prefer, if all else is equal + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node + */ +pe_node_t * +pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer) +{ + GList *this_with_colocations = NULL; + GList *with_this_colocations = NULL; + GList *iter = NULL; + pcmk__colocation_t *colocation = NULL; + + CRM_ASSERT(rsc != NULL); + + // Never assign a child without parent being assigned first + if ((rsc->parent != NULL) + && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "%s: Assigning parent %s first", + rsc->id, rsc->parent->id); + rsc->parent->cmds->assign(rsc->parent, prefer); + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return rsc->allocated_to; // Assignment has already been done + } + + // Ensure we detect assignment loops + if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); + return NULL; + } + pe__set_resource_flags(rsc, pe_rsc_allocating); + + pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes, + rsc->cluster); + + this_with_colocations = pcmk__this_with_colocations(rsc); + with_this_colocations = pcmk__with_this_colocations(rsc); + + // Apply mandatory colocations first, to satisfy as many as possible + for (iter = this_with_colocations; iter != NULL; iter = iter->next) { + colocation = iter->data; + if ((colocation->score <= -CRM_SCORE_INFINITY) + || (colocation->score >= CRM_SCORE_INFINITY)) { + apply_this_with(iter->data, rsc); + } + } + for (iter = with_this_colocations; iter != NULL; iter = iter->next) { + colocation = iter->data; + if ((colocation->score <= -CRM_SCORE_INFINITY) + || (colocation->score >= CRM_SCORE_INFINITY)) { + pcmk__add_dependent_scores(iter->data, rsc); + } + } + + pe__show_node_weights(true, rsc, "Mandatory-colocations", + rsc->allowed_nodes, rsc->cluster); + + // Then apply optional colocations + for (iter = this_with_colocations; iter != NULL; iter = iter->next) { + colocation = iter->data; + + if ((colocation->score > -CRM_SCORE_INFINITY) + && (colocation->score < CRM_SCORE_INFINITY)) { + apply_this_with(iter->data, rsc); + } + } + for (iter = with_this_colocations; iter != NULL; iter = iter->next) { + colocation = iter->data; + + if ((colocation->score > -CRM_SCORE_INFINITY) + && (colocation->score < CRM_SCORE_INFINITY)) { + pcmk__add_dependent_scores(iter->data, rsc); + } + } + + g_list_free(this_with_colocations); + g_list_free(with_this_colocations); + + if (rsc->next_role == RSC_ROLE_STOPPED) { + pe_rsc_trace(rsc, + "Banning %s from all nodes because it will be stopped", + rsc->id); + resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, + rsc->cluster); + + } else if ((rsc->next_role > rsc->role) + && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum) + && (rsc->cluster->no_quorum_policy == no_quorum_freeze)) { + crm_notice("Resource %s cannot be elevated from %s to %s due to " + "no-quorum-policy=freeze", + rsc->id, role2text(rsc->role), role2text(rsc->next_role)); + pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); + } + + pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), + rsc, __func__, rsc->allowed_nodes, rsc->cluster); + + // Unmanage resource if fencing is enabled but no device is configured + if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) + && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) { + pe__clear_resource_flags(rsc, pe_rsc_managed); + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + // Unmanaged resources stay on their current node + const char *reason = NULL; + pe_node_t *assign_to = NULL; + + pe__set_next_role(rsc, rsc->role, "unmanaged"); + assign_to = pe__current_node(rsc); + if (assign_to == NULL) { + reason = "inactive"; + } else if (rsc->role == RSC_ROLE_PROMOTED) { + reason = "promoted"; + } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + reason = "failed"; + } else { + reason = "active"; + } + pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, + (assign_to? assign_to->details->uname : "no node"), reason); + pcmk__finalize_assignment(rsc, assign_to, true); + + } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { + pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id); + pcmk__finalize_assignment(rsc, NULL, true); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) + && assign_best_node(rsc, prefer)) { + // Assignment successful + + } else if (rsc->allocated_to == NULL) { + if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); + } else if (rsc->running_on != NULL) { + pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); + } + + } else { + pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, + pe__node_name(rsc->allocated_to)); + } + + pe__clear_resource_flags(rsc, pe_rsc_allocating); + + if (rsc->is_remote_node) { + remote_connection_assigned(rsc); + } + + return rsc->allocated_to; +} + +/*! + * \internal + * \brief Schedule actions to bring resource down and back to current role + * + * \param[in,out] rsc Resource to restart + * \param[in,out] current Node that resource should be brought down on + * \param[in] need_stop Whether the resource must be stopped + * \param[in] need_promote Whether the resource must be promoted + * + * \return Role that resource would have after scheduled actions are taken + */ +static void +schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current, + bool need_stop, bool need_promote) +{ + enum rsc_role_e role = rsc->role; + enum rsc_role_e next_role; + rsc_transition_fn fn = NULL; + + pe__set_resource_flags(rsc, pe_rsc_restarting); + + // Bring resource down to a stop on its current node + while (role != RSC_ROLE_STOPPED) { + next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; + pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", + (need_stop? "required" : "optional"), rsc->id, + role2text(role), role2text(next_role)); + fn = rsc_action_matrix[role][next_role]; + if (fn == NULL) { + break; + } + fn(rsc, current, !need_stop); + role = next_role; + } + + // Bring resource up to its next role on its next node + while ((rsc->role <= rsc->next_role) && (role != rsc->role) + && !pcmk_is_set(rsc->flags, pe_rsc_block)) { + bool required = need_stop; + + next_role = rsc_state_matrix[role][rsc->role]; + if ((next_role == RSC_ROLE_PROMOTED) && need_promote) { + required = true; + } + pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", + (required? "required" : "optional"), rsc->id, + role2text(role), role2text(next_role)); + fn = rsc_action_matrix[role][next_role]; + if (fn == NULL) { + break; + } + fn(rsc, rsc->allocated_to, !required); + role = next_role; + } + + pe__clear_resource_flags(rsc, pe_rsc_restarting); +} + +/*! + * \internal + * \brief If a resource's next role is not explicitly specified, set a default + * + * \param[in,out] rsc Resource to set next role for + * + * \return "explicit" if next role was explicitly set, otherwise "implicit" + */ +static const char * +set_default_next_role(pe_resource_t *rsc) +{ + if (rsc->next_role != RSC_ROLE_UNKNOWN) { + return "explicit"; + } + + if (rsc->allocated_to == NULL) { + pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment"); + } else { + pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment"); + } + return "implicit"; +} + +/*! + * \internal + * \brief Create an action to represent an already pending start + * + * \param[in,out] rsc Resource to create start action for + */ +static void +create_pending_start(pe_resource_t *rsc) +{ + pe_action_t *start = NULL; + + pe_rsc_trace(rsc, + "Creating action for %s to represent already pending start", + rsc->id); + start = start_action(rsc, rsc->allocated_to, TRUE); + pe__set_action_flags(start, pe_action_print_always); +} + +/*! + * \internal + * \brief Schedule actions needed to take a resource to its next role + * + * \param[in,out] rsc Resource to schedule actions for + */ +static void +schedule_role_transition_actions(pe_resource_t *rsc) +{ + enum rsc_role_e role = rsc->role; + + while (role != rsc->next_role) { + enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role]; + rsc_transition_fn fn = NULL; + + pe_rsc_trace(rsc, + "Creating action to take %s from %s to %s (ending at %s)", + rsc->id, role2text(role), role2text(next_role), + role2text(rsc->next_role)); + fn = rsc_action_matrix[role][next_role]; + if (fn == NULL) { + break; + } + fn(rsc, rsc->allocated_to, false); + role = next_role; + } +} + +/*! + * \internal + * \brief Create all actions needed for a given primitive resource + * + * \param[in,out] rsc Primitive resource to create actions for + */ +void +pcmk__primitive_create_actions(pe_resource_t *rsc) +{ + bool need_stop = false; + bool need_promote = false; + bool is_moving = false; + bool allow_migrate = false; + bool multiply_active = false; + + pe_node_t *current = NULL; + unsigned int num_all_active = 0; + unsigned int num_clean_active = 0; + const char *next_role_source = NULL; + + CRM_ASSERT(rsc != NULL); + + next_role_source = set_default_next_role(rsc); + pe_rsc_trace(rsc, + "Creating all actions for %s transition from %s to %s " + "(%s) on %s", + rsc->id, role2text(rsc->role), role2text(rsc->next_role), + next_role_source, pe__node_name(rsc->allocated_to)); + + current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active); + + g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration, + rsc); + + if ((current != NULL) && (rsc->allocated_to != NULL) + && (current->details != rsc->allocated_to->details) + && (rsc->next_role >= RSC_ROLE_STARTED)) { + + pe_rsc_trace(rsc, "Moving %s from %s to %s", + rsc->id, pe__node_name(current), + pe__node_name(rsc->allocated_to)); + is_moving = true; + allow_migrate = pcmk__rsc_can_migrate(rsc, current); + + // This is needed even if migrating (though I'm not sure why ...) + need_stop = true; + } + + // Check whether resource is partially migrated and/or multiply active + if ((rsc->partial_migration_source != NULL) + && (rsc->partial_migration_target != NULL) + && allow_migrate && (num_all_active == 2) + && pe__same_node(current, rsc->partial_migration_source) + && pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) { + /* A partial migration is in progress, and the migration target remains + * the same as when the migration began. + */ + pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue", + rsc->id, pe__node_name(rsc->partial_migration_source), + pe__node_name(rsc->partial_migration_target)); + + } else if ((rsc->partial_migration_source != NULL) + || (rsc->partial_migration_target != NULL)) { + // A partial migration is in progress but can't be continued + + if (num_all_active > 2) { + // The resource is migrating *and* multiply active! + crm_notice("Forcing recovery of %s because it is migrating " + "from %s to %s and possibly active elsewhere", + rsc->id, pe__node_name(rsc->partial_migration_source), + pe__node_name(rsc->partial_migration_target)); + } else { + // The migration source or target isn't available + crm_notice("Forcing recovery of %s because it can no longer " + "migrate from %s to %s", + rsc->id, pe__node_name(rsc->partial_migration_source), + pe__node_name(rsc->partial_migration_target)); + } + need_stop = true; + rsc->partial_migration_source = rsc->partial_migration_target = NULL; + allow_migrate = false; + + } else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { + multiply_active = (num_all_active > 1); + } else { + /* If a resource has "requires" set to nothing or quorum, don't consider + * it active on unclean nodes (similar to how all resources behave when + * stonith-enabled is false). We can start such resources elsewhere + * before fencing completes, and if we considered the resource active on + * the failed node, we would attempt recovery for being active on + * multiple nodes. + */ + multiply_active = (num_clean_active > 1); + } + + if (multiply_active) { + const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + + // Resource was (possibly) incorrectly multiply active + pe_proc_err("%s resource %s might be active on %u nodes (%s)", + pcmk__s(class, "Untyped"), rsc->id, num_all_active, + recovery2text(rsc->recovery_type)); + crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ" + "#Resource_is_Too_Active for more information"); + + switch (rsc->recovery_type) { + case recovery_stop_start: + need_stop = true; + break; + case recovery_stop_unexpected: + need_stop = true; // stop_resource() will skip expected node + pe__set_resource_flags(rsc, pe_rsc_stop_unexpected); + break; + default: + break; + } + + } else { + pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected); + } + + if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { + create_pending_start(rsc); + } + + if (is_moving) { + // Remaining tests are only for resources staying where they are + + } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { + need_stop = true; + pe_rsc_trace(rsc, "Recovering %s", rsc->id); + } else { + pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); + if (rsc->next_role == RSC_ROLE_PROMOTED) { + need_promote = true; + } + } + + } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { + pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); + need_stop = true; + + } else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL) + && (rsc->allocated_to != NULL)) { + pe_action_t *start = NULL; + + pe_rsc_trace(rsc, "Creating start action for promoted resource %s", + rsc->id); + start = start_action(rsc, rsc->allocated_to, TRUE); + if (!pcmk_is_set(start->flags, pe_action_optional)) { + // Recovery of a promoted resource + pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); + need_stop = true; + } + } + + // Create any actions needed to bring resource down and back up to same role + schedule_restart_actions(rsc, current, need_stop, need_promote); + + // Create any actions needed to take resource from this role to the next + schedule_role_transition_actions(rsc); + + pcmk__create_recurring_actions(rsc); + + if (allow_migrate) { + pcmk__create_migration_actions(rsc, current); + } +} + +/*! + * \internal + * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes + * + * \param[in] rsc Resource to check + */ +static void +rsc_avoids_remote_nodes(const pe_resource_t *rsc) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (node->details->remote_rsc != NULL) { + node->weight = -INFINITY; + } + } +} + +/*! + * \internal + * \brief Return allowed nodes as (possibly sorted) list + * + * Convert a resource's hash table of allowed nodes to a list. If printing to + * stdout, sort the list, to keep action ID numbers consistent for regression + * test output (while avoiding the performance hit on a live cluster). + * + * \param[in] rsc Resource to check for allowed nodes + * + * \return List of resource's allowed nodes + * \note Callers should take care not to rely on the list being sorted. + */ +static GList * +allowed_nodes_as_list(const pe_resource_t *rsc) +{ + GList *allowed_nodes = NULL; + + if (rsc->allowed_nodes) { + allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); + } + + if (!pcmk__is_daemon) { + allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name); + } + + return allowed_nodes; +} + +/*! + * \internal + * \brief Create implicit constraints needed for a primitive resource + * + * \param[in,out] rsc Primitive resource to create implicit constraints for + */ +void +pcmk__primitive_internal_constraints(pe_resource_t *rsc) +{ + GList *allowed_nodes = NULL; + bool check_unfencing = false; + bool check_utilization = false; + + CRM_ASSERT(rsc != NULL); + + if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe_rsc_trace(rsc, + "Skipping implicit constraints for unmanaged resource %s", + rsc->id); + return; + } + + // Whether resource requires unfencing + check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) + && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing) + && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); + + // Whether a non-default placement strategy is used + check_utilization = (g_hash_table_size(rsc->utilization) > 0) + && !pcmk__str_eq(rsc->cluster->placement_strategy, + "default", pcmk__str_casei); + + // Order stops before starts (i.e. restart) + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, + pe_order_optional|pe_order_implies_then|pe_order_restart, + rsc->cluster); + + // Promotable ordering: demote before stop, start before promote + if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags, + pe_rsc_promotable) + || (rsc->role > RSC_ROLE_UNPROMOTED)) { + + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, + pe_order_promoted_implies_first, rsc->cluster); + + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, + rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, + pe_order_runnable_left, rsc->cluster); + } + + // Don't clear resource history if probing on same node + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), + NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), + NULL, pe_order_same_node|pe_order_then_cancels_first, + rsc->cluster); + + // Certain checks need allowed nodes + if (check_unfencing || check_utilization || (rsc->container != NULL)) { + allowed_nodes = allowed_nodes_as_list(rsc); + } + + if (check_unfencing) { + g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc); + } + + if (check_utilization) { + pcmk__create_utilization_constraints(rsc, allowed_nodes); + } + + if (rsc->container != NULL) { + pe_resource_t *remote_rsc = NULL; + + if (rsc->is_remote_node) { + // rsc is the implicit remote connection for a guest or bundle node + + /* Guest resources are not allowed to run on Pacemaker Remote nodes, + * to avoid nesting remotes. However, bundles are allowed. + */ + if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { + rsc_avoids_remote_nodes(rsc->container); + } + + /* If someone cleans up a guest or bundle node's container, we will + * likely schedule a (re-)probe of the container and recovery of the + * connection. Order the connection stop after the container probe, + * so that if we detect the container running, we will trigger a new + * transition and avoid the unnecessary recovery. + */ + pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc, + RSC_STOP, pe_order_optional); + + /* A user can specify that a resource must start on a Pacemaker Remote + * node by explicitly configuring it with the container=NODENAME + * meta-attribute. This is of questionable merit, since location + * constraints can accomplish the same thing. But we support it, so here + * we check whether a resource (that is not itself a remote connection) + * has container set to a remote node or guest node resource. + */ + } else if (rsc->container->is_remote_node) { + remote_rsc = rsc->container; + } else { + remote_rsc = pe__resource_contains_guest_node(rsc->cluster, + rsc->container); + } + + if (remote_rsc != NULL) { + /* Force the resource on the Pacemaker Remote node instead of + * colocating the resource with the container resource. + */ + for (GList *item = allowed_nodes; item; item = item->next) { + pe_node_t *node = item->data; + + if (node->details->remote_rsc != remote_rsc) { + node->weight = -INFINITY; + } + } + + } else { + /* This resource is either a filler for a container that does NOT + * represent a Pacemaker Remote node, or a Pacemaker Remote + * connection resource for a guest node or bundle. + */ + int score; + + crm_trace("Order and colocate %s relative to its container %s", + rsc->id, rsc->container->id); + + pcmk__new_ordering(rsc->container, + pcmk__op_key(rsc->container->id, RSC_START, 0), + NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), + NULL, + pe_order_implies_then|pe_order_runnable_left, + rsc->cluster); + + pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, + rsc->container, + pcmk__op_key(rsc->container->id, RSC_STOP, 0), + NULL, pe_order_implies_first, rsc->cluster); + + if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { + score = 10000; /* Highly preferred but not essential */ + } else { + score = INFINITY; /* Force them to run on the same host */ + } + pcmk__new_colocation("resource-with-container", NULL, score, rsc, + rsc->container, NULL, NULL, true, + rsc->cluster); + } + } + + if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { + /* Remote connections and fencing devices are not allowed to run on + * Pacemaker Remote nodes + */ + rsc_avoids_remote_nodes(rsc); + } + g_list_free(allowed_nodes); +} + +/*! + * \internal + * \brief Apply a colocation's score to node weights or resource priority + * + * Given a colocation constraint, apply its score to the dependent's + * allowed node weights (if we are still placing resources) or priority (if + * we are choosing promotable clone instance roles). + * + * \param[in,out] dependent Dependent resource in colocation + * \param[in] primary Primary resource in colocation + * \param[in] colocation Colocation constraint to apply + * \param[in] for_dependent true if called on behalf of dependent + */ +void +pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, + const pe_resource_t *primary, + const pcmk__colocation_t *colocation, + bool for_dependent) +{ + enum pcmk__coloc_affects filter_results; + + CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), + return); + + if (for_dependent) { + // Always process on behalf of primary resource + primary->cmds->apply_coloc_score(dependent, primary, colocation, false); + return; + } + + filter_results = pcmk__colocation_affects(dependent, primary, colocation, + false); + pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", + ((colocation->score > 0)? "Colocating" : "Anti-colocating"), + dependent->id, primary->id, colocation->id, colocation->score, + filter_results); + + switch (filter_results) { + case pcmk__coloc_affects_role: + pcmk__apply_coloc_to_priority(dependent, primary, colocation); + break; + case pcmk__coloc_affects_location: + pcmk__apply_coloc_to_weights(dependent, primary, colocation); + break; + default: // pcmk__coloc_affects_nothing + return; + } +} + +/* Primitive implementation of + * resource_alloc_functions_t:with_this_colocations() + */ +void +pcmk__with_primitive_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + // Primitives don't have children, so rsc should also be orig_rsc + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native) + && (rsc == orig_rsc) && (list != NULL), + return); + + // Add primitive's own colocations plus any relevant ones from parent + pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); + if (rsc->parent != NULL) { + rsc->parent->cmds->with_this_colocations(rsc->parent, rsc, list); + } +} + +/* Primitive implementation of + * resource_alloc_functions_t:this_with_colocations() + */ +void +pcmk__primitive_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) +{ + // Primitives don't have children, so rsc should also be orig_rsc + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native) + && (rsc == orig_rsc) && (list != NULL), + return); + + // Add primitive's own colocations plus any relevant ones from parent + pcmk__add_this_with_list(list, rsc->rsc_cons); + if (rsc->parent != NULL) { + rsc->parent->cmds->this_with_colocations(rsc->parent, rsc, list); + } +} + +/*! + * \internal + * \brief Return action flags for a given primitive resource action + * + * \param[in,out] action Action to get flags for + * \param[in] node If not NULL, limit effects to this node (ignored) + * + * \return Flags appropriate to \p action on \p node + */ +enum pe_action_flags +pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node) +{ + CRM_ASSERT(action != NULL); + return action->flags; +} + +/*! + * \internal + * \brief Check whether a node is a multiply active resource's expected node + * + * \param[in] rsc Resource to check + * \param[in] node Node to check + * + * \return true if \p rsc is multiply active with multiple-active set to + * stop_unexpected, and \p node is the node where it will remain active + * \note This assumes that the resource's next role cannot be changed to stopped + * after this is called, which should be reasonable if status has already + * been unpacked and resources have been assigned to nodes. + */ +static bool +is_expected_node(const pe_resource_t *rsc, const pe_node_t *node) +{ + return pcmk_all_flags_set(rsc->flags, + pe_rsc_stop_unexpected|pe_rsc_restarting) + && (rsc->next_role > RSC_ROLE_STOPPED) + && pe__same_node(rsc->allocated_to, node); +} + +/*! + * \internal + * \brief Schedule actions needed to stop a resource wherever it is active + * + * \param[in,out] rsc Resource being stopped + * \param[in] node Node where resource is being stopped (ignored) + * \param[in] optional Whether actions should be optional + */ +static void +stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) +{ + for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { + pe_node_t *current = (pe_node_t *) iter->data; + pe_action_t *stop = NULL; + + if (is_expected_node(rsc, current)) { + /* We are scheduling restart actions for a multiply active resource + * with multiple-active=stop_unexpected, and this is where it should + * not be stopped. + */ + pe_rsc_trace(rsc, + "Skipping stop of multiply active resource %s " + "on expected node %s", + rsc->id, pe__node_name(current)); + continue; + } + + if (rsc->partial_migration_target != NULL) { + // Continue migration if node originally was and remains target + if (pe__same_node(current, rsc->partial_migration_target) + && pe__same_node(current, rsc->allocated_to)) { + pe_rsc_trace(rsc, + "Skipping stop of %s on %s " + "because partial migration there will continue", + rsc->id, pe__node_name(current)); + continue; + } else { + pe_rsc_trace(rsc, + "Forcing stop of %s on %s " + "because migration target changed", + rsc->id, pe__node_name(current)); + optional = false; + } + } + + pe_rsc_trace(rsc, "Scheduling stop of %s on %s", + rsc->id, pe__node_name(current)); + stop = stop_action(rsc, current, optional); + + if (rsc->allocated_to == NULL) { + pe_action_set_reason(stop, "node availability", true); + } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting + |pe_rsc_stop_unexpected)) { + /* We are stopping a multiply active resource on a node that is + * not its expected node, and we are still scheduling restart + * actions, so the stop is for being multiply active. + */ + pe_action_set_reason(stop, "being multiply active", true); + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe__clear_action_flags(stop, pe_action_runnable); + } + + if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) { + pcmk__schedule_cleanup(rsc, current, optional); + } + + if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { + pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false, + rsc->cluster); + + order_actions(stop, unfence, pe_order_implies_first); + if (!pcmk__node_unfenced(current)) { + pe_proc_err("Stopping %s until %s can be unfenced", + rsc->id, pe__node_name(current)); + } + } + } +} + +/*! + * \internal + * \brief Schedule actions needed to start a resource on a node + * + * \param[in,out] rsc Resource being started + * \param[in,out] node Node where resource should be started + * \param[in] optional Whether actions should be optional + */ +static void +start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) +{ + pe_action_t *start = NULL; + + CRM_ASSERT(node != NULL); + + pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)", + (optional? "optional" : "required"), rsc->id, + pe__node_name(node), node->weight); + start = start_action(rsc, node, TRUE); + + pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then); + + if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { + pe__clear_action_flags(start, pe_action_optional); + } + + if (is_expected_node(rsc, node)) { + /* This could be a problem if the start becomes necessary for other + * reasons later. + */ + pe_rsc_trace(rsc, + "Start of multiply active resouce %s " + "on expected node %s will be a pseudo-action", + rsc->id, pe__node_name(node)); + pe__set_action_flags(start, pe_action_pseudo); + } +} + +/*! + * \internal + * \brief Schedule actions needed to promote a resource on a node + * + * \param[in,out] rsc Resource being promoted + * \param[in] node Node where resource should be promoted + * \param[in] optional Whether actions should be optional + */ +static void +promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) +{ + GList *iter = NULL; + GList *action_list = NULL; + bool runnable = true; + + CRM_ASSERT(node != NULL); + + // Any start must be runnable for promotion to be runnable + action_list = pe__resource_actions(rsc, node, RSC_START, true); + for (iter = action_list; iter != NULL; iter = iter->next) { + pe_action_t *start = (pe_action_t *) iter->data; + + if (!pcmk_is_set(start->flags, pe_action_runnable)) { + runnable = false; + } + } + g_list_free(action_list); + + if (runnable) { + pe_action_t *promote = promote_action(rsc, node, optional); + + pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s", + (optional? "optional" : "required"), rsc->id, + pe__node_name(node)); + + if (is_expected_node(rsc, node)) { + /* This could be a problem if the promote becomes necessary for + * other reasons later. + */ + pe_rsc_trace(rsc, + "Promotion of multiply active resouce %s " + "on expected node %s will be a pseudo-action", + rsc->id, pe__node_name(node)); + pe__set_action_flags(promote, pe_action_pseudo); + } + } else { + pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable", + rsc->id, pe__node_name(node)); + action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true); + for (iter = action_list; iter != NULL; iter = iter->next) { + pe_action_t *promote = (pe_action_t *) iter->data; + + pe__clear_action_flags(promote, pe_action_runnable); + } + g_list_free(action_list); + } +} + +/*! + * \internal + * \brief Schedule actions needed to demote a resource wherever it is active + * + * \param[in,out] rsc Resource being demoted + * \param[in] node Node where resource should be demoted (ignored) + * \param[in] optional Whether actions should be optional + */ +static void +demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) +{ + /* Since this will only be called for a primitive (possibly as an instance + * of a collective resource), the resource is multiply active if it is + * running on more than one node, so we want to demote on all of them as + * part of recovery, regardless of which one is the desired node. + */ + for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { + pe_node_t *current = (pe_node_t *) iter->data; + + if (is_expected_node(rsc, current)) { + pe_rsc_trace(rsc, + "Skipping demote of multiply active resource %s " + "on expected node %s", + rsc->id, pe__node_name(current)); + } else { + pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s", + (optional? "optional" : "required"), rsc->id, + pe__node_name(current)); + demote_action(rsc, current, optional); + } + } +} + +static void +assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional) +{ + CRM_ASSERT(false); +} + +/*! + * \internal + * \brief Schedule cleanup of a resource + * + * \param[in,out] rsc Resource to clean up + * \param[in] node Node to clean up on + * \param[in] optional Whether clean-up should be optional + */ +void +pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional) +{ + /* If the cleanup is required, its orderings are optional, because they're + * relevant only if both actions are required. Conversely, if the cleanup is + * optional, the orderings make the then action required if the first action + * becomes required. + */ + uint32_t flag = optional? pe_order_implies_then : pe_order_optional; + + CRM_CHECK((rsc != NULL) && (node != NULL), return); + + if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed", + rsc->id, pe__node_name(node)); + return; + } + + if (node->details->unclean || !node->details->online) { + pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable", + rsc->id, pe__node_name(node)); + return; + } + + crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node)); + delete_action(rsc, node, optional); + + // stop -> clean-up -> start + pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag); + pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag); +} + +/*! + * \internal + * \brief Add primitive meta-attributes relevant to graph actions to XML + * + * \param[in] rsc Primitive resource whose meta-attributes should be added + * \param[in,out] xml Transition graph action attributes XML to add to + */ +void +pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml) +{ + char *name = NULL; + char *value = NULL; + const pe_resource_t *parent = NULL; + + CRM_ASSERT((rsc != NULL) && (xml != NULL)); + + /* Clone instance numbers get set internally as meta-attributes, and are + * needed in the transition graph (for example, to tell unique clone + * instances apart). + */ + value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); + if (value != NULL) { + name = crm_meta_name(XML_RSC_ATTR_INCARNATION); + crm_xml_add(xml, name, value); + free(name); + } + + // Not sure if this one is really needed ... + value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); + if (value != NULL) { + name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); + crm_xml_add(xml, name, value); + free(name); + } + + /* The container meta-attribute can be set on the primitive itself or one of + * its parents (for example, a group inside a container resource), so check + * them all, and keep the highest one found. + */ + for (parent = rsc; parent != NULL; parent = parent->parent) { + if (parent->container != NULL) { + crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER, + parent->container->id); + } + } + + /* Bundle replica children will get their external-ip set internally as a + * meta-attribute. The graph action needs it, but under a different naming + * convention than other meta-attributes. + */ + value = g_hash_table_lookup(rsc->meta, "external-ip"); + if (value != NULL) { + crm_xml_add(xml, "pcmk_external_ip", value); + } +} + +// Primitive implementation of resource_alloc_functions_t:add_utilization() +void +pcmk__primitive_add_utilization(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList *all_rscs, + GHashTable *utilization) +{ + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return; + } + + pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization", + orig_rsc->id, rsc->id); + pcmk__release_node_capacity(utilization, rsc); +} + +/*! + * \internal + * \brief Get epoch time of node's shutdown attribute (or now if none) + * + * \param[in,out] node Node to check + * + * \return Epoch time corresponding to shutdown attribute if set or now if not + */ +static time_t +shutdown_time(pe_node_t *node) +{ + const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); + time_t result = 0; + + if (shutdown != NULL) { + long long result_ll; + + if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { + result = (time_t) result_ll; + } + } + return (result == 0)? get_effective_time(node->details->data_set) : result; +} + +/*! + * \internal + * \brief Ban a resource from a node if it's not locked to the node + * + * \param[in] data Node to check + * \param[in,out] user_data Resource to check + */ +static void +ban_if_not_locked(gpointer data, gpointer user_data) +{ + const pe_node_t *node = (const pe_node_t *) data; + pe_resource_t *rsc = (pe_resource_t *) user_data; + + if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) { + resource_location(rsc, node, -CRM_SCORE_INFINITY, + XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster); + } +} + +// Primitive implementation of resource_alloc_functions_t:shutdown_lock() +void +pcmk__primitive_shutdown_lock(pe_resource_t *rsc) +{ + const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + + // Fence devices and remote connections can't be locked + if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) + || pe__resource_is_remote_conn(rsc, rsc->cluster)) { + return; + } + + if (rsc->lock_node != NULL) { + // The lock was obtained from resource history + + if (rsc->running_on != NULL) { + /* The resource was started elsewhere even though it is now + * considered locked. This shouldn't be possible, but as a + * failsafe, we don't want to disturb the resource now. + */ + pe_rsc_info(rsc, + "Cancelling shutdown lock because %s is already active", + rsc->id); + pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster); + rsc->lock_node = NULL; + rsc->lock_time = 0; + } + + // Only a resource active on exactly one node can be locked + } else if (pcmk__list_of_1(rsc->running_on)) { + pe_node_t *node = rsc->running_on->data; + + if (node->details->shutdown) { + if (node->details->unclean) { + pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", + rsc->id, pe__node_name(node)); + } else { + rsc->lock_node = node; + rsc->lock_time = shutdown_time(node); + } + } + } + + if (rsc->lock_node == NULL) { + // No lock needed + return; + } + + if (rsc->cluster->shutdown_lock > 0) { + time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock; + + pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", + rsc->id, pe__node_name(rsc->lock_node), + (long long) lock_expiration); + pe__update_recheck_time(++lock_expiration, rsc->cluster); + } else { + pe_rsc_info(rsc, "Locking %s to %s due to shutdown", + rsc->id, pe__node_name(rsc->lock_node)); + } + + // If resource is locked to one node, ban it from all other nodes + g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc); +} diff --git a/lib/pacemaker/pcmk_sched_probes.c b/lib/pacemaker/pcmk_sched_probes.c new file mode 100644 index 0000000..919e523 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_probes.c @@ -0,0 +1,896 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <glib.h> + +#include <crm/crm.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Add the expected result to a newly created probe + * + * \param[in,out] probe Probe action to add expected result to + * \param[in] rsc Resource that probe is for + * \param[in] node Node that probe will run on + */ +static void +add_expected_result(pe_action_t *probe, const pe_resource_t *rsc, + const pe_node_t *node) +{ + // Check whether resource is currently active on node + pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id); + + // The expected result is what we think the resource's current state is + if (running == NULL) { + pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING); + + } else if (rsc->role == RSC_ROLE_PROMOTED) { + pe__add_action_expected_result(probe, CRM_EX_PROMOTED); + } +} + +/*! + * \internal + * \brief Create any needed robes on a node for a list of resources + * + * \param[in,out] rscs List of resources to create probes for + * \param[in,out] node Node to create probes on + * + * \return true if any probe was created, otherwise false + */ +bool +pcmk__probe_resource_list(GList *rscs, pe_node_t *node) +{ + bool any_created = false; + + for (GList *iter = rscs; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + if (rsc->cmds->create_probe(rsc, node)) { + any_created = true; + } + } + return any_created; +} + +/*! + * \internal + * \brief Order one resource's start after another's start-up probe + * + * \param[in,out] rsc1 Resource that might get start-up probe + * \param[in] rsc2 Resource that might be started + */ +static void +probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2) +{ + if ((rsc1->allocated_to != NULL) + && (g_hash_table_lookup(rsc1->known_on, + rsc1->allocated_to->details->id) == NULL)) { + + pcmk__new_ordering(rsc1, pcmk__op_key(rsc1->id, RSC_STATUS, 0), NULL, + rsc2, pcmk__op_key(rsc2->id, RSC_START, 0), NULL, + pe_order_optional, rsc1->cluster); + } +} + +/*! + * \internal + * \brief Check whether a guest resource will stop + * + * \param[in] node Guest node to check + * + * \return true if guest resource will likely stop, otherwise false + */ +static bool +guest_resource_will_stop(const pe_node_t *node) +{ + const pe_resource_t *guest_rsc = node->details->remote_rsc->container; + + /* Ideally, we'd check whether the guest has a required stop, but that + * information doesn't exist yet, so approximate it ... + */ + return node->details->remote_requires_reset + || node->details->unclean + || pcmk_is_set(guest_rsc->flags, pe_rsc_failed) + || (guest_rsc->next_role == RSC_ROLE_STOPPED) + + // Guest is moving + || ((guest_rsc->role > RSC_ROLE_STOPPED) + && (guest_rsc->allocated_to != NULL) + && (pe_find_node(guest_rsc->running_on, + guest_rsc->allocated_to->details->uname) == NULL)); +} + +/*! + * \internal + * \brief Create a probe action for a resource on a node + * + * \param[in,out] rsc Resource to create probe for + * \param[in,out] node Node to create probe on + * + * \return Newly created probe action + */ +static pe_action_t * +probe_action(pe_resource_t *rsc, pe_node_t *node) +{ + pe_action_t *probe = NULL; + char *key = pcmk__op_key(rsc->id, RSC_STATUS, 0); + + crm_debug("Scheduling probe of %s %s on %s", + role2text(rsc->role), rsc->id, pe__node_name(node)); + + probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, + rsc->cluster); + pe__clear_action_flags(probe, pe_action_optional); + + pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional); + add_expected_result(probe, rsc, node); + return probe; +} + +/*! + * \internal + * \brief Create probes for a resource on a node, if needed + * + * \brief Schedule any probes needed for a resource on a node + * + * \param[in,out] rsc Resource to create probe for + * \param[in,out] node Node to create probe on + * + * \return true if any probe was created, otherwise false + */ +bool +pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node) +{ + uint32_t flags = pe_order_optional; + pe_action_t *probe = NULL; + pe_node_t *allowed = NULL; + pe_resource_t *top = uber_parent(rsc); + const char *reason = NULL; + + CRM_CHECK((rsc != NULL) && (node != NULL), return false); + + if (!pcmk_is_set(rsc->cluster->flags, pe_flag_startup_probes)) { + reason = "start-up probes are disabled"; + goto no_probe; + } + + if (pe__is_guest_or_remote_node(node)) { + const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + + if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { + reason = "Pacemaker Remote nodes cannot run stonith agents"; + goto no_probe; + + } else if (pe__is_guest_node(node) + && pe__resource_contains_guest_node(rsc->cluster, rsc)) { + reason = "guest nodes cannot run resources containing guest nodes"; + goto no_probe; + + } else if (rsc->is_remote_node) { + reason = "Pacemaker Remote nodes cannot host remote connections"; + goto no_probe; + } + } + + // If this is a collective resource, probes are created for its children + if (rsc->children != NULL) { + return pcmk__probe_resource_list(rsc->children, node); + } + + if ((rsc->container != NULL) && !rsc->is_remote_node) { + reason = "resource is inside a container"; + goto no_probe; + + } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + reason = "resource is orphaned"; + goto no_probe; + + } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) { + reason = "resource state is already known"; + goto no_probe; + } + + allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); + + if (rsc->exclusive_discover || top->exclusive_discover) { + // Exclusive discovery is enabled ... + + if (allowed == NULL) { + // ... but this node is not allowed to run the resource + reason = "resource has exclusive discovery but is not allowed " + "on node"; + goto no_probe; + + } else if (allowed->rsc_discover_mode != pe_discover_exclusive) { + // ... but no constraint marks this node for discovery of resource + reason = "resource has exclusive discovery but is not enabled " + "on node"; + goto no_probe; + } + } + + if (allowed == NULL) { + allowed = node; + } + if (allowed->rsc_discover_mode == pe_discover_never) { + reason = "node has discovery disabled"; + goto no_probe; + } + + if (pe__is_guest_node(node)) { + pe_resource_t *guest = node->details->remote_rsc->container; + + if (guest->role == RSC_ROLE_STOPPED) { + // The guest is stopped, so we know no resource is active there + reason = "node's guest is stopped"; + probe_then_start(guest, top); + goto no_probe; + + } else if (guest_resource_will_stop(node)) { + reason = "node's guest will stop"; + + // Order resource start after guest stop (in case it's restarting) + pcmk__new_ordering(guest, pcmk__op_key(guest->id, RSC_STOP, 0), + NULL, top, pcmk__op_key(top->id, RSC_START, 0), + NULL, pe_order_optional, rsc->cluster); + goto no_probe; + } + } + + // We've eliminated all cases where a probe is not needed, so now it is + probe = probe_action(rsc, node); + + /* Below, we will order the probe relative to start or reload. If this is a + * clone instance, the start or reload is for the entire clone rather than + * just the instance. Otherwise, the start or reload is for the resource + * itself. + */ + if (!pe_rsc_is_clone(top)) { + top = rsc; + } + + /* Prevent a start if the resource can't be probed, but don't cause the + * resource or entire clone to stop if already active. + */ + if (!pcmk_is_set(probe->flags, pe_action_runnable) + && (top->running_on == NULL)) { + pe__set_order_flags(flags, pe_order_runnable_left); + } + + // Start or reload after probing the resource + pcmk__new_ordering(rsc, NULL, probe, + top, pcmk__op_key(top->id, RSC_START, 0), NULL, + flags, rsc->cluster); + pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL, + pe_order_optional, rsc->cluster); + + return true; + +no_probe: + pe_rsc_trace(rsc, + "Skipping probe for %s on %s because %s", + rsc->id, node->details->id, reason); + return false; +} + +/*! + * \internal + * \brief Check whether a probe should be ordered before another action + * + * \param[in] probe Probe action to check + * \param[in] then Other action to check + * + * \return true if \p probe should be ordered before \p then, otherwise false + */ +static bool +probe_needed_before_action(const pe_action_t *probe, const pe_action_t *then) +{ + // Probes on a node are performed after unfencing it, not before + if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei) + && (probe->node != NULL) && (then->node != NULL) + && (probe->node->details == then->node->details)) { + const char *op = g_hash_table_lookup(then->meta, "stonith_action"); + + if (pcmk__str_eq(op, "on", pcmk__str_casei)) { + return false; + } + } + + // Probes should be done on a node before shutting it down + if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none) + && (probe->node != NULL) && (then->node != NULL) + && (probe->node->details != then->node->details)) { + return false; + } + + // Otherwise probes should always be done before any other action + return true; +} + +/*! + * \internal + * \brief Add implicit "probe then X" orderings for "stop then X" orderings + * + * If the state of a resource is not known yet, a probe will be scheduled, + * expecting a "not running" result. If the probe fails, a stop will not be + * scheduled until the next transition. Thus, if there are ordering constraints + * like "stop this resource then do something else that's not for the same + * resource", add implicit "probe this resource then do something" equivalents + * so the relation is upheld until we know whether a stop is needed. + * + * \param[in,out] data_set Cluster working set + */ +static void +add_probe_orderings_for_stops(pe_working_set_t *data_set) +{ + for (GList *iter = data_set->ordering_constraints; iter != NULL; + iter = iter->next) { + + pe__ordering_t *order = iter->data; + uint32_t order_flags = pe_order_optional; + GList *probes = NULL; + GList *then_actions = NULL; + + // Skip disabled orderings + if (order->flags == pe_order_none) { + continue; + } + + // Skip non-resource orderings, and orderings for the same resource + if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) { + continue; + } + + // Skip invalid orderings (shouldn't be possible) + if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) || + ((order->rh_action == NULL) && (order->rh_action_task == NULL))) { + continue; + } + + // Skip orderings for first actions other than stop + if ((order->lh_action != NULL) + && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) { + continue; + } else if ((order->lh_action == NULL) + && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) { + continue; + } + + /* Do not imply a probe ordering for a resource inside of a stopping + * container. Otherwise, it might introduce a transition loop, since a + * probe could be scheduled after the container starts again. + */ + if ((order->rh_rsc != NULL) + && (order->lh_rsc->container == order->rh_rsc)) { + + if ((order->rh_action != NULL) + && pcmk__str_eq(order->rh_action->task, RSC_STOP, + pcmk__str_none)) { + continue; + } else if ((order->rh_action == NULL) + && pcmk__ends_with(order->rh_action_task, + "_" RSC_STOP "_0")) { + continue; + } + } + + // Preserve certain order options for future filtering + if (pcmk_is_set(order->flags, pe_order_apply_first_non_migratable)) { + pe__set_order_flags(order_flags, + pe_order_apply_first_non_migratable); + } + if (pcmk_is_set(order->flags, pe_order_same_node)) { + pe__set_order_flags(order_flags, pe_order_same_node); + } + + // Preserve certain order types for future filtering + if ((order->flags == pe_order_anti_colocation) + || (order->flags == pe_order_load)) { + order_flags = order->flags; + } + + // List all scheduled probes for the first resource + probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE); + if (probes == NULL) { // There aren't any + continue; + } + + // List all relevant "then" actions + if (order->rh_action != NULL) { + then_actions = g_list_prepend(NULL, order->rh_action); + + } else if (order->rh_rsc != NULL) { + then_actions = find_actions(order->rh_rsc->actions, + order->rh_action_task, NULL); + if (then_actions == NULL) { // There aren't any + g_list_free(probes); + continue; + } + } + + crm_trace("Implying 'probe then' orderings for '%s then %s' " + "(id=%d, type=%.6x)", + order->lh_action? order->lh_action->uuid : order->lh_action_task, + order->rh_action? order->rh_action->uuid : order->rh_action_task, + order->id, order->flags); + + for (GList *probe_iter = probes; probe_iter != NULL; + probe_iter = probe_iter->next) { + + pe_action_t *probe = (pe_action_t *) probe_iter->data; + + for (GList *then_iter = then_actions; then_iter != NULL; + then_iter = then_iter->next) { + + pe_action_t *then = (pe_action_t *) then_iter->data; + + if (probe_needed_before_action(probe, then)) { + order_actions(probe, then, order_flags); + } + } + } + + g_list_free(then_actions); + g_list_free(probes); + } +} + +/*! + * \internal + * \brief Add necessary orderings between probe and starts of clone instances + * + * , in additon to the ordering with the parent resource added upon creating + * the probe. + * + * \param[in,out] probe Probe as 'first' action in an ordering + * \param[in,out] after 'then' action wrapper in the ordering + */ +static void +add_start_orderings_for_probe(pe_action_t *probe, pe_action_wrapper_t *after) +{ + uint32_t flags = pe_order_optional|pe_order_runnable_left; + + /* Although the ordering between the probe of the clone instance and the + * start of its parent has been added in pcmk__probe_rsc_on_node(), we + * avoided enforcing `pe_order_runnable_left` order type for that as long as + * any of the clone instances are running to prevent them from being + * unexpectedly stopped. + * + * On the other hand, we still need to prevent any inactive instances from + * starting unless the probe is runnable so that we don't risk starting too + * many instances before we know the state on all nodes. + */ + if (after->action->rsc->variant <= pe_group + || pcmk_is_set(probe->flags, pe_action_runnable) + // The order type is already enforced for its parent. + || pcmk_is_set(after->type, pe_order_runnable_left) + || (pe__const_top_resource(probe->rsc, false) != after->action->rsc) + || !pcmk__str_eq(after->action->task, RSC_START, pcmk__str_none)) { + return; + } + + crm_trace("Adding probe start orderings for '%s@%s (%s) " + "then instances of %s@%s'", + probe->uuid, pe__node_name(probe->node), + pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable", + after->action->uuid, pe__node_name(after->action->node)); + + for (GList *then_iter = after->action->actions_after; then_iter != NULL; + then_iter = then_iter->next) { + + pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data; + + if (then->action->rsc->running_on + || (pe__const_top_resource(then->action->rsc, false) + != after->action->rsc) + || !pcmk__str_eq(then->action->task, RSC_START, pcmk__str_none)) { + continue; + } + + crm_trace("Adding probe start ordering for '%s@%s (%s) " + "then %s@%s' (type=%#.6x)", + probe->uuid, pe__node_name(probe->node), + pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable", + then->action->uuid, pe__node_name(then->action->node), + flags); + + /* Prevent the instance from starting if the instance can't, but don't + * cause any other intances to stop if already active. + */ + order_actions(probe, then->action, flags); + } + + return; +} + +/*! + * \internal + * \brief Order probes before restarts and re-promotes + * + * If a given ordering is a "probe then start" or "probe then promote" ordering, + * add an implicit "probe then stop/demote" ordering in case the action is part + * of a restart/re-promote, and do the same recursively for all actions ordered + * after the "then" action. + * + * \param[in,out] probe Probe as 'first' action in an ordering + * \param[in,out] after 'then' action in the ordering + * \param[in,out] data_set Cluster working set + */ +static void +add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after, + pe_working_set_t *data_set) +{ + GList *iter = NULL; + bool interleave = false; + pe_resource_t *compatible_rsc = NULL; + + // Validate that this is a resource probe followed by some action + if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL) + || (probe->rsc->variant != pe_native) + || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) { + return; + } + + // Avoid running into any possible loop + if (pcmk_is_set(after->flags, pe_action_tracking)) { + return; + } + pe__set_action_flags(after, pe_action_tracking); + + crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'", + probe->uuid, pe__node_name(probe->node), + after->uuid, pe__node_name(after->node)); + + /* Add restart orderings if "then" is for a different primitive. + * Orderings for collective resources will be added later. + */ + if ((after->rsc != NULL) && (after->rsc->variant == pe_native) + && (probe->rsc != after->rsc)) { + + GList *then_actions = NULL; + + if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) { + then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, + FALSE); + + } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) { + then_actions = pe__resource_actions(after->rsc, NULL, + RSC_DEMOTE, FALSE); + } + + for (iter = then_actions; iter != NULL; iter = iter->next) { + pe_action_t *then = (pe_action_t *) iter->data; + + // Skip pseudo-actions (for example, those implied by fencing) + if (!pcmk_is_set(then->flags, pe_action_pseudo)) { + order_actions(probe, then, pe_order_optional); + } + } + g_list_free(then_actions); + } + + /* Detect whether "then" is an interleaved clone action. For these, we want + * to add orderings only for the relevant instance. + */ + if ((after->rsc != NULL) + && (after->rsc->variant > pe_group)) { + const char *interleave_s = g_hash_table_lookup(after->rsc->meta, + XML_RSC_ATTR_INTERLEAVE); + + interleave = crm_is_true(interleave_s); + if (interleave) { + compatible_rsc = pcmk__find_compatible_instance(probe->rsc, + after->rsc, + RSC_ROLE_UNKNOWN, + false); + } + } + + /* Now recursively do the same for all actions ordered after "then". This + * also handles collective resources since the collective action will be + * ordered before its individual instances' actions. + */ + for (iter = after->actions_after; iter != NULL; iter = iter->next) { + pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data; + + /* pe_order_implies_then is the reason why a required A.start + * implies/enforces B.start to be required too, which is the cause of + * B.restart/re-promote. + * + * Not sure about pe_order_implies_then_on_node though. It's now only + * used for unfencing case, which tends to introduce transition + * loops... + */ + if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) { + /* The order type between a group/clone and its child such as + * B.start-> B_child.start is: + * pe_order_implies_first_printed | pe_order_runnable_left + * + * Proceed through the ordering chain and build dependencies with + * its children. + */ + if ((after->rsc == NULL) + || (after->rsc->variant < pe_group) + || (probe->rsc->parent == after->rsc) + || (after_wrapper->action->rsc == NULL) + || (after_wrapper->action->rsc->variant > pe_group) + || (after->rsc != after_wrapper->action->rsc->parent)) { + continue; + } + + /* Proceed to the children of a group or a non-interleaved clone. + * For an interleaved clone, proceed only to the relevant child. + */ + if ((after->rsc->variant > pe_group) && interleave + && ((compatible_rsc == NULL) + || (compatible_rsc != after_wrapper->action->rsc))) { + continue; + } + } + + crm_trace("Recursively adding probe restart orderings for " + "'%s@%s then %s@%s' (type=%#.6x)", + after->uuid, pe__node_name(after->node), + after_wrapper->action->uuid, + pe__node_name(after_wrapper->action->node), + after_wrapper->type); + + add_restart_orderings_for_probe(probe, after_wrapper->action, data_set); + } +} + +/*! + * \internal + * \brief Clear the tracking flag on all scheduled actions + * + * \param[in,out] data_set Cluster working set + */ +static void +clear_actions_tracking_flag(pe_working_set_t *data_set) +{ + GList *gIter = NULL; + + for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + pe__clear_action_flags(action, pe_action_tracking); + } +} + +/*! + * \internal + * \brief Add start and restart orderings for probes scheduled for a resource + * + * \param[in,out] rsc Resource whose probes should be ordered + * \param[in,out] data_set Cluster working set + */ +static void +add_start_restart_orderings_for_rsc(pe_resource_t *rsc, + pe_working_set_t *data_set) +{ + GList *probes = NULL; + + // For collective resources, order each instance recursively + if (rsc->variant != pe_native) { + g_list_foreach(rsc->children, + (GFunc) add_start_restart_orderings_for_rsc, data_set); + return; + } + + // Find all probes for given resource + probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); + + // Add probe restart orderings for each probe found + for (GList *iter = probes; iter != NULL; iter = iter->next) { + pe_action_t *probe = (pe_action_t *) iter->data; + + for (GList *then_iter = probe->actions_after; then_iter != NULL; + then_iter = then_iter->next) { + + pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data; + + add_start_orderings_for_probe(probe, then); + add_restart_orderings_for_probe(probe, then->action, data_set); + clear_actions_tracking_flag(data_set); + } + } + + g_list_free(probes); +} + +/*! + * \internal + * \brief Add "A then probe B" orderings for "A then B" orderings + * + * \param[in,out] data_set Cluster working set + * + * \note This function is currently disabled (see next comment). + */ +static void +order_then_probes(pe_working_set_t *data_set) +{ +#if 0 + /* Given an ordering "A then B", we would prefer to wait for A to be started + * before probing B. + * + * For example, if A is a filesystem which B can't even run without, it + * would be helpful if the author of B's agent could assume that A is + * running before B.monitor will be called. + * + * However, we can't _only_ probe after A is running, otherwise we wouldn't + * detect the state of B if A could not be started. We can't even do an + * opportunistic version of this, because B may be moving: + * + * A.stop -> A.start -> B.probe -> B.stop -> B.start + * + * and if we add B.stop -> A.stop here, we get a loop: + * + * A.stop -> A.start -> B.probe -> B.stop -> A.stop + * + * We could kill the "B.probe -> B.stop" dependency, but that could mean + * stopping B "too" soon, because B.start must wait for the probe, and + * we don't want to stop B if we can't start it. + * + * We could add the ordering only if A is an anonymous clone with + * clone-max == node-max (since we'll never be moving it). However, we could + * still be stopping one instance at the same time as starting another. + * + * The complexity of checking for allowed conditions combined with the ever + * narrowing use case suggests that this code should remain disabled until + * someone gets smarter. + */ + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + pe_action_t *start = NULL; + GList *actions = NULL; + GList *probes = NULL; + + actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE); + + if (actions) { + start = actions->data; + g_list_free(actions); + } + + if (start == NULL) { + crm_err("No start action for %s", rsc->id); + continue; + } + + probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); + + for (actions = start->actions_before; actions != NULL; + actions = actions->next) { + + pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data; + + pe_action_t *first = before->action; + pe_resource_t *first_rsc = first->rsc; + + if (first->required_runnable_before) { + for (GList *clone_actions = first->actions_before; + clone_actions != NULL; + clone_actions = clone_actions->next) { + + before = (pe_action_wrapper_t *) clone_actions->data; + + crm_trace("Testing '%s then %s' for %s", + first->uuid, before->action->uuid, start->uuid); + + CRM_ASSERT(before->action->rsc != NULL); + first_rsc = before->action->rsc; + break; + } + + } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) { + crm_trace("Not a start op %s for %s", first->uuid, start->uuid); + } + + if (first_rsc == NULL) { + continue; + + } else if (pe__const_top_resource(first_rsc, false) + == pe__const_top_resource(start->rsc, false)) { + crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); + continue; + + } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc, + false))) { + crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); + continue; + } + + crm_err("Applying %s before %s %d", first->uuid, start->uuid, + pe__const_top_resource(first_rsc, false)->variant); + + for (GList *probe_iter = probes; probe_iter != NULL; + probe_iter = probe_iter->next) { + + pe_action_t *probe = (pe_action_t *) probe_iter->data; + + crm_err("Ordering %s before %s", first->uuid, probe->uuid); + order_actions(first, probe, pe_order_optional); + } + } + } +#endif +} + +void +pcmk__order_probes(pe_working_set_t *data_set) +{ + // Add orderings for "probe then X" + g_list_foreach(data_set->resources, + (GFunc) add_start_restart_orderings_for_rsc, data_set); + add_probe_orderings_for_stops(data_set); + + order_then_probes(data_set); +} + +/*! + * \internal + * \brief Schedule any probes needed + * + * \param[in,out] data_set Cluster working set + * + * \note This may also schedule fencing of failed remote nodes. + */ +void +pcmk__schedule_probes(pe_working_set_t *data_set) +{ + // Schedule probes on each node in the cluster as needed + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + const char *probed = NULL; + + if (!node->details->online) { // Don't probe offline nodes + if (pcmk__is_failed_remote_node(node)) { + pe_fence_node(data_set, node, + "the connection is unrecoverable", FALSE); + } + continue; + + } else if (node->details->unclean) { // ... or nodes that need fencing + continue; + + } else if (!node->details->rsc_discovery_enabled) { + // The user requested that probes not be done on this node + continue; + } + + /* This is no longer needed for live clusters, since the probe_complete + * node attribute will never be in the CIB. However this is still useful + * for processing old saved CIBs (< 1.1.14), including the + * reprobe-target_rc regression test. + */ + probed = pe_node_attribute_raw(node, CRM_OP_PROBED); + if (probed != NULL && crm_is_true(probed) == FALSE) { + pe_action_t *probe_op = NULL; + + probe_op = custom_action(NULL, + crm_strdup_printf("%s-%s", CRM_OP_REPROBE, + node->details->uname), + CRM_OP_REPROBE, node, FALSE, TRUE, + data_set); + add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, + XML_BOOLEAN_TRUE); + continue; + } + + // Probe each resource in the cluster on this node, as needed + pcmk__probe_resource_list(data_set->resources, node); + } +} diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c new file mode 100644 index 0000000..d12d017 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_promotable.c @@ -0,0 +1,1286 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +/*! + * \internal + * \brief Add implicit promotion ordering for a promotable instance + * + * \param[in,out] clone Clone resource + * \param[in,out] child Instance of \p clone being ordered + * \param[in,out] last Previous instance ordered (NULL if \p child is first) + */ +static void +order_instance_promotion(pe_resource_t *clone, pe_resource_t *child, + pe_resource_t *last) +{ + // "Promote clone" -> promote instance -> "clone promoted" + pcmk__order_resource_actions(clone, RSC_PROMOTE, child, RSC_PROMOTE, + pe_order_optional); + pcmk__order_resource_actions(child, RSC_PROMOTE, clone, RSC_PROMOTED, + pe_order_optional); + + // If clone is ordered, order this instance relative to last + if ((last != NULL) && pe__clone_is_ordered(clone)) { + pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE, + pe_order_optional); + } +} + +/*! + * \internal + * \brief Add implicit demotion ordering for a promotable instance + * + * \param[in,out] clone Clone resource + * \param[in,out] child Instance of \p clone being ordered + * \param[in] last Previous instance ordered (NULL if \p child is first) + */ +static void +order_instance_demotion(pe_resource_t *clone, pe_resource_t *child, + pe_resource_t *last) +{ + // "Demote clone" -> demote instance -> "clone demoted" + pcmk__order_resource_actions(clone, RSC_DEMOTE, child, RSC_DEMOTE, + pe_order_implies_first_printed); + pcmk__order_resource_actions(child, RSC_DEMOTE, clone, RSC_DEMOTED, + pe_order_implies_then_printed); + + // If clone is ordered, order this instance relative to last + if ((last != NULL) && pe__clone_is_ordered(clone)) { + pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE, + pe_order_optional); + } +} + +/*! + * \internal + * \brief Check whether an instance will be promoted or demoted + * + * \param[in] rsc Instance to check + * \param[out] demoting If \p rsc will be demoted, this will be set to true + * \param[out] promoting If \p rsc will be promoted, this will be set to true + */ +static void +check_for_role_change(const pe_resource_t *rsc, bool *demoting, bool *promoting) +{ + const GList *iter = NULL; + + // If this is a cloned group, check group members recursively + if (rsc->children != NULL) { + for (iter = rsc->children; iter != NULL; iter = iter->next) { + check_for_role_change((const pe_resource_t *) iter->data, + demoting, promoting); + } + return; + } + + for (iter = rsc->actions; iter != NULL; iter = iter->next) { + const pe_action_t *action = (const pe_action_t *) iter->data; + + if (*promoting && *demoting) { + return; + + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + continue; + + } else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_none)) { + *demoting = true; + + } else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_none)) { + *promoting = true; + } + } +} + +/*! + * \internal + * \brief Add promoted-role location constraint scores to an instance's priority + * + * Adjust a promotable clone instance's promotion priority by the scores of any + * location constraints in a list that are both limited to the promoted role and + * for the node where the instance will be placed. + * + * \param[in,out] child Promotable clone instance + * \param[in] location_constraints List of location constraints to apply + * \param[in] chosen Node where \p child will be placed + */ +static void +apply_promoted_locations(pe_resource_t *child, + const GList *location_constraints, + const pe_node_t *chosen) +{ + for (const GList *iter = location_constraints; iter; iter = iter->next) { + const pe__location_t *location = iter->data; + pe_node_t *weighted_node = NULL; + + if (location->role_filter == RSC_ROLE_PROMOTED) { + weighted_node = pe_find_node_id(location->node_list_rh, + chosen->details->id); + } + if (weighted_node != NULL) { + int new_priority = pcmk__add_scores(child->priority, + weighted_node->weight); + + pe_rsc_trace(child, + "Applying location %s to %s promotion priority on %s: " + "%s + %s = %s", + location->id, child->id, pe__node_name(weighted_node), + pcmk_readable_score(child->priority), + pcmk_readable_score(weighted_node->weight), + pcmk_readable_score(new_priority)); + child->priority = new_priority; + } + } +} + +/*! + * \internal + * \brief Get the node that an instance will be promoted on + * + * \param[in] rsc Promotable clone instance to check + * + * \return Node that \p rsc will be promoted on, or NULL if none + */ +static pe_node_t * +node_to_be_promoted_on(const pe_resource_t *rsc) +{ + pe_node_t *node = NULL; + pe_node_t *local_node = NULL; + const pe_resource_t *parent = NULL; + + // If this is a cloned group, bail if any group member can't be promoted + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child = (pe_resource_t *) iter->data; + + if (node_to_be_promoted_on(child) == NULL) { + pe_rsc_trace(rsc, + "%s can't be promoted because member %s can't", + rsc->id, child->id); + return NULL; + } + } + + node = rsc->fns->location(rsc, NULL, FALSE); + if (node == NULL) { + pe_rsc_trace(rsc, "%s can't be promoted because it won't be active", + rsc->id); + return NULL; + + } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) { + crm_notice("Unmanaged instance %s will be left promoted on %s", + rsc->id, pe__node_name(node)); + } else { + pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged", + rsc->id); + return NULL; + } + + } else if (rsc->priority < 0) { + pe_rsc_trace(rsc, + "%s can't be promoted because its promotion priority %d " + "is negative", + rsc->id, rsc->priority); + return NULL; + + } else if (!pcmk__node_available(node, false, true)) { + pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources", + rsc->id, pe__node_name(node)); + return NULL; + } + + parent = pe__const_top_resource(rsc, false); + local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id); + + if (local_node == NULL) { + /* It should not be possible for the scheduler to have allocated the + * instance to a node where its parent is not allowed, but it's good to + * have a fail-safe. + */ + if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + crm_warn("%s can't be promoted because %s is not allowed on %s " + "(scheduler bug?)", + rsc->id, parent->id, pe__node_name(node)); + } // else the instance is unmanaged and already promoted + return NULL; + + } else if ((local_node->count >= pe__clone_promoted_node_max(parent)) + && pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe_rsc_trace(rsc, + "%s can't be promoted because %s has " + "maximum promoted instances already", + rsc->id, pe__node_name(node)); + return NULL; + } + + return local_node; +} + +/*! + * \internal + * \brief Compare two promotable clone instances by promotion priority + * + * \param[in] a First instance to compare + * \param[in] b Second instance to compare + * + * \return A negative number if \p a has higher promotion priority, + * a positive number if \p b has higher promotion priority, + * or 0 if promotion priorities are equal + */ +static gint +cmp_promotable_instance(gconstpointer a, gconstpointer b) +{ + const pe_resource_t *rsc1 = (const pe_resource_t *) a; + const pe_resource_t *rsc2 = (const pe_resource_t *) b; + + enum rsc_role_e role1 = RSC_ROLE_UNKNOWN; + enum rsc_role_e role2 = RSC_ROLE_UNKNOWN; + + CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL)); + + // Check sort index set by pcmk__set_instance_roles() + if (rsc1->sort_index > rsc2->sort_index) { + pe_rsc_trace(rsc1, + "%s has higher promotion priority than %s " + "(sort index %d > %d)", + rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index); + return -1; + } else if (rsc1->sort_index < rsc2->sort_index) { + pe_rsc_trace(rsc1, + "%s has lower promotion priority than %s " + "(sort index %d < %d)", + rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index); + return 1; + } + + // If those are the same, prefer instance whose current role is higher + role1 = rsc1->fns->state(rsc1, TRUE); + role2 = rsc2->fns->state(rsc2, TRUE); + if (role1 > role2) { + pe_rsc_trace(rsc1, + "%s has higher promotion priority than %s " + "(higher current role)", + rsc1->id, rsc2->id); + return -1; + } else if (role1 < role2) { + pe_rsc_trace(rsc1, + "%s has lower promotion priority than %s " + "(lower current role)", + rsc1->id, rsc2->id); + return 1; + } + + // Finally, do normal clone instance sorting + return pcmk__cmp_instance(a, b); +} + +/*! + * \internal + * \brief Add a promotable clone instance's sort index to its node's weight + * + * Add a promotable clone instance's sort index (which sums its promotion + * preferences and scores of relevant location constraints for the promoted + * role) to the node weight of the instance's allocated node. + * + * \param[in] data Promotable clone instance + * \param[in,out] user_data Clone parent of \p data + */ +static void +add_sort_index_to_node_weight(gpointer data, gpointer user_data) +{ + const pe_resource_t *child = (const pe_resource_t *) data; + pe_resource_t *clone = (pe_resource_t *) user_data; + + pe_node_t *node = NULL; + const pe_node_t *chosen = NULL; + + if (child->sort_index < 0) { + pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id); + return; + } + + chosen = child->fns->location(child, NULL, FALSE); + if (chosen == NULL) { + pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id); + return; + } + + node = (pe_node_t *) pe_hash_table_lookup(clone->allowed_nodes, + chosen->details->id); + CRM_ASSERT(node != NULL); + + node->weight = pcmk__add_scores(child->sort_index, node->weight); + pe_rsc_trace(clone, + "Added cumulative priority of %s (%s) to score on %s (now %s)", + child->id, pcmk_readable_score(child->sort_index), + pe__node_name(node), pcmk_readable_score(node->weight)); +} + +/*! + * \internal + * \brief Apply colocation to dependent's node weights if for promoted role + * + * \param[in,out] data Colocation constraint to apply + * \param[in,out] user_data Promotable clone that is constraint's dependent + */ +static void +apply_coloc_to_dependent(gpointer data, gpointer user_data) +{ + pcmk__colocation_t *constraint = (pcmk__colocation_t *) data; + pe_resource_t *clone = (pe_resource_t *) user_data; + pe_resource_t *primary = constraint->primary; + uint32_t flags = pcmk__coloc_select_default; + float factor = constraint->score / (float) INFINITY; + + if (constraint->dependent_role != RSC_ROLE_PROMOTED) { + return; + } + if (constraint->score < INFINITY) { + flags = pcmk__coloc_select_active; + } + pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s", + constraint->id, constraint->dependent->id, + constraint->primary->id, + pcmk_readable_score(constraint->score)); + primary->cmds->add_colocated_node_scores(primary, clone->id, + &clone->allowed_nodes, + constraint->node_attribute, factor, + flags); +} + +/*! + * \internal + * \brief Apply colocation to primary's node weights if for promoted role + * + * \param[in,out] data Colocation constraint to apply + * \param[in,out] user_data Promotable clone that is constraint's primary + */ +static void +apply_coloc_to_primary(gpointer data, gpointer user_data) +{ + pcmk__colocation_t *constraint = (pcmk__colocation_t *) data; + pe_resource_t *clone = (pe_resource_t *) user_data; + pe_resource_t *dependent = constraint->dependent; + const float factor = constraint->score / (float) INFINITY; + const uint32_t flags = pcmk__coloc_select_active + |pcmk__coloc_select_nonnegative; + + if ((constraint->primary_role != RSC_ROLE_PROMOTED) + || !pcmk__colocation_has_influence(constraint, NULL)) { + return; + } + + pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s", + constraint->id, constraint->dependent->id, + constraint->primary->id, + pcmk_readable_score(constraint->score)); + dependent->cmds->add_colocated_node_scores(dependent, clone->id, + &clone->allowed_nodes, + constraint->node_attribute, + factor, flags); +} + +/*! + * \internal + * \brief Set clone instance's sort index to its node's weight + * + * \param[in,out] data Promotable clone instance + * \param[in] user_data Parent clone of \p data + */ +static void +set_sort_index_to_node_weight(gpointer data, gpointer user_data) +{ + pe_resource_t *child = (pe_resource_t *) data; + const pe_resource_t *clone = (const pe_resource_t *) user_data; + + pe_node_t *chosen = child->fns->location(child, NULL, FALSE); + + if (!pcmk_is_set(child->flags, pe_rsc_managed) + && (child->next_role == RSC_ROLE_PROMOTED)) { + child->sort_index = INFINITY; + pe_rsc_trace(clone, + "Final sort index for %s is INFINITY (unmanaged promoted)", + child->id); + + } else if ((chosen == NULL) || (child->sort_index < 0)) { + pe_rsc_trace(clone, + "Final sort index for %s is %d (ignoring node weight)", + child->id, child->sort_index); + + } else { + const pe_node_t *node = NULL; + + node = pe_hash_table_lookup(clone->allowed_nodes, chosen->details->id); + CRM_ASSERT(node != NULL); + + child->sort_index = node->weight; + pe_rsc_trace(clone, + "Merging weights for %s: final sort index for %s is %d", + clone->id, child->id, child->sort_index); + } +} + +/*! + * \internal + * \brief Sort a promotable clone's instances by descending promotion priority + * + * \param[in,out] clone Promotable clone to sort + */ +static void +sort_promotable_instances(pe_resource_t *clone) +{ + if (pe__set_clone_flag(clone, pe__clone_promotion_constrained) + == pcmk_rc_already) { + return; + } + pe__set_resource_flags(clone, pe_rsc_merging); + + for (GList *iter = clone->children; iter != NULL; iter = iter->next) { + pe_resource_t *child = (pe_resource_t *) iter->data; + + pe_rsc_trace(clone, + "Merging weights for %s: initial sort index for %s is %d", + clone->id, child->id, child->sort_index); + } + pe__show_node_weights(true, clone, "Before", clone->allowed_nodes, + clone->cluster); + + /* Because the this_with_colocations() and with_this_colocations() methods + * boil down to copies of rsc_cons and rsc_cons_lhs for clones, we can use + * those here directly for efficiency. + */ + g_list_foreach(clone->children, add_sort_index_to_node_weight, clone); + g_list_foreach(clone->rsc_cons, apply_coloc_to_dependent, clone); + g_list_foreach(clone->rsc_cons_lhs, apply_coloc_to_primary, clone); + + // Ban resource from all nodes if it needs a ticket but doesn't have it + pcmk__require_promotion_tickets(clone); + + pe__show_node_weights(true, clone, "After", clone->allowed_nodes, + clone->cluster); + + // Reset sort indexes to final node weights + g_list_foreach(clone->children, set_sort_index_to_node_weight, clone); + + // Finally, sort instances in descending order of promotion priority + clone->children = g_list_sort(clone->children, cmp_promotable_instance); + pe__clear_resource_flags(clone, pe_rsc_merging); +} + +/*! + * \internal + * \brief Find the active instance (if any) of an anonymous clone on a node + * + * \param[in] clone Anonymous clone to check + * \param[in] id Instance ID (without instance number) to check + * \param[in] node Node to check + * + * \return + */ +static pe_resource_t * +find_active_anon_instance(const pe_resource_t *clone, const char *id, + const pe_node_t *node) +{ + for (GList *iter = clone->children; iter; iter = iter->next) { + pe_resource_t *child = iter->data; + pe_resource_t *active = NULL; + + // Use ->find_rsc() in case this is a cloned group + active = clone->fns->find_rsc(child, id, node, + pe_find_clone|pe_find_current); + if (active != NULL) { + return active; + } + } + return NULL; +} + +/* + * \brief Check whether an anonymous clone instance is known on a node + * + * \param[in] clone Anonymous clone to check + * \param[in] id Instance ID (without instance number) to check + * \param[in] node Node to check + * + * \return true if \p id instance of \p clone is known on \p node, + * otherwise false + */ +static bool +anonymous_known_on(const pe_resource_t *clone, const char *id, + const pe_node_t *node) +{ + for (GList *iter = clone->children; iter; iter = iter->next) { + pe_resource_t *child = iter->data; + + /* Use ->find_rsc() because this might be a cloned group, and knowing + * that other members of the group are known here implies nothing. + */ + child = clone->fns->find_rsc(child, id, NULL, pe_find_clone); + CRM_LOG_ASSERT(child != NULL); + if (child != NULL) { + if (g_hash_table_lookup(child->known_on, node->details->id)) { + return true; + } + } + } + return false; +} + +/*! + * \internal + * \brief Check whether a node is allowed to run a resource + * + * \param[in] rsc Resource to check + * \param[in] node Node to check + * + * \return true if \p node is allowed to run \p rsc, otherwise false + */ +static bool +is_allowed(const pe_resource_t *rsc, const pe_node_t *node) +{ + pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes, + node->details->id); + + return (allowed != NULL) && (allowed->weight >= 0); +} + +/*! + * \brief Check whether a clone instance's promotion score should be considered + * + * \param[in] rsc Promotable clone instance to check + * \param[in] node Node where score would be applied + * + * \return true if \p rsc's promotion score should be considered on \p node, + * otherwise false + */ +static bool +promotion_score_applies(const pe_resource_t *rsc, const pe_node_t *node) +{ + char *id = clone_strip(rsc->id); + const pe_resource_t *parent = pe__const_top_resource(rsc, false); + pe_resource_t *active = NULL; + const char *reason = "allowed"; + + // Some checks apply only to anonymous clone instances + if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + + // If instance is active on the node, its score definitely applies + active = find_active_anon_instance(parent, id, node); + if (active == rsc) { + reason = "active"; + goto check_allowed; + } + + /* If *no* instance is active on this node, this instance's score will + * count if it has been probed on this node. + */ + if ((active == NULL) && anonymous_known_on(parent, id, node)) { + reason = "probed"; + goto check_allowed; + } + } + + /* If this clone's status is unknown on *all* nodes (e.g. cluster startup), + * take all instances' scores into account, to make sure we use any + * permanent promotion scores. + */ + if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) { + reason = "none probed"; + goto check_allowed; + } + + /* Otherwise, we've probed and/or started the resource *somewhere*, so + * consider promotion scores on nodes where we know the status. + */ + if ((pe_hash_table_lookup(rsc->known_on, node->details->id) != NULL) + || (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) { + reason = "known"; + } else { + pe_rsc_trace(rsc, + "Ignoring %s promotion score (for %s) on %s: not probed", + rsc->id, id, pe__node_name(node)); + free(id); + return false; + } + +check_allowed: + if (is_allowed(rsc, node)) { + pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s", + rsc->id, id, pe__node_name(node), reason); + free(id); + return true; + } + + pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed", + rsc->id, id, pe__node_name(node)); + free(id); + return false; +} + +/*! + * \internal + * \brief Get the value of a promotion score node attribute + * + * \param[in] rsc Promotable clone instance to get promotion score for + * \param[in] node Node to get promotion score for + * \param[in] name Resource name to use in promotion score attribute name + * + * \return Value of promotion score node attribute for \p rsc on \p node + */ +static const char * +promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node, + const char *name) +{ + char *attr_name = NULL; + const char *attr_value = NULL; + + CRM_CHECK((rsc != NULL) && (node != NULL) && (name != NULL), return NULL); + + attr_name = pcmk_promotion_score_name(name); + attr_value = pe_node_attribute_calculated(node, attr_name, rsc); + free(attr_name); + return attr_value; +} + +/*! + * \internal + * \brief Get the promotion score for a clone instance on a node + * + * \param[in] rsc Promotable clone instance to get score for + * \param[in] node Node to get score for + * \param[out] is_default If non-NULL, will be set true if no score available + * + * \return Promotion score for \p rsc on \p node (or 0 if none) + */ +static int +promotion_score(const pe_resource_t *rsc, const pe_node_t *node, + bool *is_default) +{ + char *name = NULL; + const char *attr_value = NULL; + + if (is_default != NULL) { + *is_default = true; + } + + CRM_CHECK((rsc != NULL) && (node != NULL), return 0); + + /* If this is an instance of a cloned group, the promotion score is the sum + * of all members' promotion scores. + */ + if (rsc->children != NULL) { + int score = 0; + + for (const GList *iter = rsc->children; + iter != NULL; iter = iter->next) { + + const pe_resource_t *child = (const pe_resource_t *) iter->data; + bool child_default = false; + int child_score = promotion_score(child, node, &child_default); + + if (!child_default && (is_default != NULL)) { + *is_default = false; + } + score += child_score; + } + return score; + } + + if (!promotion_score_applies(rsc, node)) { + return 0; + } + + /* For the promotion score attribute name, use the name the resource is + * known as in resource history, since that's what crm_attribute --promotion + * would have used. + */ + name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name; + + attr_value = promotion_attr_value(rsc, node, name); + if (attr_value != NULL) { + pe_rsc_trace(rsc, "Promotion score for %s on %s = %s", + name, pe__node_name(node), pcmk__s(attr_value, "(unset)")); + } else if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + /* If we don't have any resource history yet, we won't have clone_name. + * In that case, for anonymous clones, try the resource name without + * any instance number. + */ + name = clone_strip(rsc->id); + if (strcmp(rsc->id, name) != 0) { + attr_value = promotion_attr_value(rsc, node, name); + pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s", + name, pe__node_name(node), rsc->id, + pcmk__s(attr_value, "(unset)")); + } + free(name); + } + + if (attr_value == NULL) { + return 0; + } + + if (is_default != NULL) { + *is_default = false; + } + return char2score(attr_value); +} + +/*! + * \internal + * \brief Include promotion scores in instances' node weights and priorities + * + * \param[in,out] rsc Promotable clone resource to update + */ +void +pcmk__add_promotion_scores(pe_resource_t *rsc) +{ + if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) { + return; + } + + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) iter->data; + + GHashTableIter iter; + pe_node_t *node = NULL; + int score, new_score; + + g_hash_table_iter_init(&iter, child_rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (!pcmk__node_available(node, false, false)) { + /* This node will never be promoted, so don't apply the + * promotion score, as that may lead to clone shuffling. + */ + continue; + } + + score = promotion_score(child_rsc, node, NULL); + if (score > 0) { + new_score = pcmk__add_scores(node->weight, score); + if (new_score != node->weight) { // Could remain INFINITY + node->weight = new_score; + pe_rsc_trace(rsc, + "Added %s promotion priority (%s) to score " + "on %s (now %s)", + child_rsc->id, pcmk_readable_score(score), + pe__node_name(node), + pcmk_readable_score(new_score)); + } + } + + if (score > child_rsc->priority) { + pe_rsc_trace(rsc, + "Updating %s priority to promotion score (%d->%d)", + child_rsc->id, child_rsc->priority, score); + child_rsc->priority = score; + } + } + } +} + +/*! + * \internal + * \brief If a resource's current role is started, change it to unpromoted + * + * \param[in,out] data Resource to update + * \param[in] user_data Ignored + */ +static void +set_current_role_unpromoted(void *data, void *user_data) +{ + pe_resource_t *rsc = (pe_resource_t *) data; + + if (rsc->role == RSC_ROLE_STARTED) { + // Promotable clones should use unpromoted role instead of started + rsc->role = RSC_ROLE_UNPROMOTED; + } + g_list_foreach(rsc->children, set_current_role_unpromoted, NULL); +} + +/*! + * \internal + * \brief Set a resource's next role to unpromoted (or stopped if unassigned) + * + * \param[in,out] data Resource to update + * \param[in] user_data Ignored + */ +static void +set_next_role_unpromoted(void *data, void *user_data) +{ + pe_resource_t *rsc = (pe_resource_t *) data; + GList *assigned = NULL; + + rsc->fns->location(rsc, &assigned, FALSE); + if (assigned == NULL) { + pe__set_next_role(rsc, RSC_ROLE_STOPPED, "stopped instance"); + } else { + pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "unpromoted instance"); + g_list_free(assigned); + } + g_list_foreach(rsc->children, set_next_role_unpromoted, NULL); +} + +/*! + * \internal + * \brief Set a resource's next role to promoted if not already set + * + * \param[in,out] data Resource to update + * \param[in] user_data Ignored + */ +static void +set_next_role_promoted(void *data, gpointer user_data) +{ + pe_resource_t *rsc = (pe_resource_t *) data; + + if (rsc->next_role == RSC_ROLE_UNKNOWN) { + pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance"); + } + g_list_foreach(rsc->children, set_next_role_promoted, NULL); +} + +/*! + * \internal + * \brief Show instance's promotion score on node where it will be active + * + * \param[in,out] instance Promotable clone instance to show + */ +static void +show_promotion_score(pe_resource_t *instance) +{ + pe_node_t *chosen = instance->fns->location(instance, NULL, FALSE); + + if (pcmk_is_set(instance->cluster->flags, pe_flag_show_scores) + && !pcmk__is_daemon && (instance->cluster->priv != NULL)) { + + pcmk__output_t *out = instance->cluster->priv; + + out->message(out, "promotion-score", instance, chosen, + pcmk_readable_score(instance->sort_index)); + } else { + pe_rsc_debug(pe__const_top_resource(instance, false), + "%s promotion score on %s: sort=%s priority=%s", + instance->id, + ((chosen == NULL)? "none" : pe__node_name(chosen)), + pcmk_readable_score(instance->sort_index), + pcmk_readable_score(instance->priority)); + } +} + +/*! + * \internal + * \brief Set a clone instance's promotion priority + * + * \param[in,out] data Promotable clone instance to update + * \param[in] user_data Instance's parent clone + */ +static void +set_instance_priority(gpointer data, gpointer user_data) +{ + pe_resource_t *instance = (pe_resource_t *) data; + const pe_resource_t *clone = (const pe_resource_t *) user_data; + const pe_node_t *chosen = NULL; + enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; + GList *list = NULL; + + pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id, + role2text(instance->next_role)); + + if (instance->fns->state(instance, TRUE) == RSC_ROLE_STARTED) { + set_current_role_unpromoted(instance, NULL); + } + + // Only an instance that will be active can be promoted + chosen = instance->fns->location(instance, &list, FALSE); + if (pcmk__list_of_multiple(list)) { + pcmk__config_err("Cannot promote non-colocated child %s", + instance->id); + } + g_list_free(list); + if (chosen == NULL) { + return; + } + + next_role = instance->fns->state(instance, FALSE); + switch (next_role) { + case RSC_ROLE_STARTED: + case RSC_ROLE_UNKNOWN: + // Set instance priority to its promotion score (or -1 if none) + { + bool is_default = false; + + instance->priority = promotion_score(instance, chosen, + &is_default); + if (is_default) { + /* + * Default to -1 if no value is set. This allows + * instances eligible for promotion to be specified + * based solely on rsc_location constraints, but + * prevents any instance from being promoted if neither + * a constraint nor a promotion score is present + */ + instance->priority = -1; + } + } + break; + + case RSC_ROLE_UNPROMOTED: + case RSC_ROLE_STOPPED: + // Instance can't be promoted + instance->priority = -INFINITY; + break; + + case RSC_ROLE_PROMOTED: + // Nothing needed (re-creating actions after scheduling fencing) + break; + + default: + CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s", + next_role, instance->id)); + } + + // Add relevant location constraint scores for promoted role + apply_promoted_locations(instance, instance->rsc_location, chosen); + apply_promoted_locations(instance, clone->rsc_location, chosen); + + // Consider instance's role-based colocations with other resources + list = pcmk__this_with_colocations(instance); + for (GList *iter = list; iter != NULL; iter = iter->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data; + + instance->cmds->apply_coloc_score(instance, cons->primary, cons, true); + } + g_list_free(list); + + instance->sort_index = instance->priority; + if (next_role == RSC_ROLE_PROMOTED) { + instance->sort_index = INFINITY; + } + pe_rsc_trace(clone, "Assigning %s priority = %d", + instance->id, instance->priority); +} + +/*! + * \internal + * \brief Set a promotable clone instance's role + * + * \param[in,out] data Promotable clone instance to update + * \param[in,out] user_data Pointer to count of instances chosen for promotion + */ +static void +set_instance_role(gpointer data, gpointer user_data) +{ + pe_resource_t *instance = (pe_resource_t *) data; + int *count = (int *) user_data; + + const pe_resource_t *clone = pe__const_top_resource(instance, false); + pe_node_t *chosen = NULL; + + show_promotion_score(instance); + + if (instance->sort_index < 0) { + pe_rsc_trace(clone, "Not supposed to promote instance %s", + instance->id); + + } else if ((*count < pe__clone_promoted_max(instance)) + || !pcmk_is_set(clone->flags, pe_rsc_managed)) { + chosen = node_to_be_promoted_on(instance); + } + + if (chosen == NULL) { + set_next_role_unpromoted(instance, NULL); + return; + } + + if ((instance->role < RSC_ROLE_PROMOTED) + && !pcmk_is_set(instance->cluster->flags, pe_flag_have_quorum) + && (instance->cluster->no_quorum_policy == no_quorum_freeze)) { + crm_notice("Clone instance %s cannot be promoted without quorum", + instance->id); + set_next_role_unpromoted(instance, NULL); + return; + } + + chosen->count++; + pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion", + instance->id, role2text(instance->role), + pe__node_name(chosen)); + set_next_role_promoted(instance, NULL); + (*count)++; +} + +/*! + * \internal + * \brief Set roles for all instances of a promotable clone + * + * \param[in,out] rsc Promotable clone resource to update + */ +void +pcmk__set_instance_roles(pe_resource_t *rsc) +{ + int promoted = 0; + GHashTableIter iter; + pe_node_t *node = NULL; + + // Repurpose count to track the number of promoted instances allocated + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + node->count = 0; + } + + // Set instances' promotion priorities and sort by highest priority first + g_list_foreach(rsc->children, set_instance_priority, rsc); + sort_promotable_instances(rsc); + + // Choose the first N eligible instances to be promoted + g_list_foreach(rsc->children, set_instance_role, &promoted); + pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d", + rsc->id, promoted, pe__clone_promoted_max(rsc)); +} + +/*! + * + * \internal + * \brief Create actions for promotable clone instances + * + * \param[in,out] clone Promotable clone to create actions for + * \param[out] any_promoting Will be set true if any instance is promoting + * \param[out] any_demoting Will be set true if any instance is demoting + */ +static void +create_promotable_instance_actions(pe_resource_t *clone, + bool *any_promoting, bool *any_demoting) +{ + for (GList *iter = clone->children; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + + instance->cmds->create_actions(instance); + check_for_role_change(instance, any_demoting, any_promoting); + } +} + +/*! + * \internal + * \brief Reset each promotable instance's resource priority + * + * Reset the priority of each instance of a promotable clone to the clone's + * priority (after promotion actions are scheduled, when instance priorities + * were repurposed as promotion scores). + * + * \param[in,out] clone Promotable clone to reset + */ +static void +reset_instance_priorities(pe_resource_t *clone) +{ + for (GList *iter = clone->children; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + + instance->priority = clone->priority; + } +} + +/*! + * \internal + * \brief Create actions specific to promotable clones + * + * \param[in,out] clone Promotable clone to create actions for + */ +void +pcmk__create_promotable_actions(pe_resource_t *clone) +{ + bool any_promoting = false; + bool any_demoting = false; + + // Create actions for each clone instance individually + create_promotable_instance_actions(clone, &any_promoting, &any_demoting); + + // Create pseudo-actions for clone as a whole + pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting); + + // Undo our temporary repurposing of resource priority for instances + reset_instance_priorities(clone); +} + +/*! + * \internal + * \brief Create internal orderings for a promotable clone's instances + * + * \param[in,out] clone Promotable clone instance to order + */ +void +pcmk__order_promotable_instances(pe_resource_t *clone) +{ + pe_resource_t *previous = NULL; // Needed for ordered clones + + pcmk__promotable_restart_ordering(clone); + + for (GList *iter = clone->children; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + + // Demote before promote + pcmk__order_resource_actions(instance, RSC_DEMOTE, + instance, RSC_PROMOTE, + pe_order_optional); + + order_instance_promotion(clone, instance, previous); + order_instance_demotion(clone, instance, previous); + previous = instance; + } +} + +/*! + * \internal + * \brief Update dependent's allowed nodes for colocation with promotable + * + * \param[in,out] dependent Dependent resource to update + * \param[in] primary_node Node where an instance of the primary will be + * \param[in] colocation Colocation constraint to apply + */ +static void +update_dependent_allowed_nodes(pe_resource_t *dependent, + const pe_node_t *primary_node, + const pcmk__colocation_t *colocation) +{ + GHashTableIter iter; + pe_node_t *node = NULL; + const char *primary_value = NULL; + const char *attr = NULL; + + if (colocation->score >= INFINITY) { + return; // Colocation is mandatory, so allowed node scores don't matter + } + + // Get value of primary's colocation node attribute + attr = colocation->node_attribute; + if (attr == NULL) { + attr = CRM_ATTR_UNAME; + } + primary_value = pe_node_attribute_raw(primary_node, attr); + + pe_rsc_trace(colocation->primary, + "Applying %s (%s with %s on %s by %s @%d) to %s", + colocation->id, colocation->dependent->id, + colocation->primary->id, pe__node_name(primary_node), attr, + colocation->score, dependent->id); + + g_hash_table_iter_init(&iter, dependent->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + const char *dependent_value = pe_node_attribute_raw(node, attr); + + if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) { + node->weight = pcmk__add_scores(node->weight, colocation->score); + pe_rsc_trace(colocation->primary, + "Added %s score (%s) to %s (now %s)", + colocation->id, pcmk_readable_score(colocation->score), + pe__node_name(node), + pcmk_readable_score(node->weight)); + } + } +} + +/*! + * \brief Update dependent for a colocation with a promotable clone + * + * \param[in] primary Primary resource in the colocation + * \param[in,out] dependent Dependent resource in the colocation + * \param[in] colocation Colocation constraint to apply + */ +void +pcmk__update_dependent_with_promotable(const pe_resource_t *primary, + pe_resource_t *dependent, + const pcmk__colocation_t *colocation) +{ + GList *affected_nodes = NULL; + + /* Build a list of all nodes where an instance of the primary will be, and + * (for optional colocations) update the dependent's allowed node scores for + * each one. + */ + for (GList *iter = primary->children; iter != NULL; iter = iter->next) { + pe_resource_t *instance = (pe_resource_t *) iter->data; + pe_node_t *node = instance->fns->location(instance, NULL, FALSE); + + if (node == NULL) { + continue; + } + if (instance->fns->state(instance, FALSE) == colocation->primary_role) { + update_dependent_allowed_nodes(dependent, node, colocation); + affected_nodes = g_list_prepend(affected_nodes, node); + } + } + + /* For mandatory colocations, add the primary's node weight to the + * dependent's node weight for each affected node, and ban the dependent + * from all other nodes. + * + * However, skip this for promoted-with-promoted colocations, otherwise + * inactive dependent instances can't start (in the unpromoted role). + */ + if ((colocation->score >= INFINITY) + && ((colocation->dependent_role != RSC_ROLE_PROMOTED) + || (colocation->primary_role != RSC_ROLE_PROMOTED))) { + + pe_rsc_trace(colocation->primary, + "Applying %s (mandatory %s with %s) to %s", + colocation->id, colocation->dependent->id, + colocation->primary->id, dependent->id); + node_list_exclude(dependent->allowed_nodes, affected_nodes, + TRUE); + } + g_list_free(affected_nodes); +} + +/*! + * \internal + * \brief Update dependent priority for colocation with promotable + * + * \param[in] primary Primary resource in the colocation + * \param[in,out] dependent Dependent resource in the colocation + * \param[in] colocation Colocation constraint to apply + */ +void +pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, + pe_resource_t *dependent, + const pcmk__colocation_t *colocation) +{ + pe_resource_t *primary_instance = NULL; + + // Look for a primary instance where dependent will be + primary_instance = pcmk__find_compatible_instance(dependent, primary, + colocation->primary_role, + false); + + if (primary_instance != NULL) { + // Add primary instance's priority to dependent's + int new_priority = pcmk__add_scores(dependent->priority, + colocation->score); + + pe_rsc_trace(colocation->primary, + "Applying %s (%s with %s) to %s priority (%s + %s = %s)", + colocation->id, colocation->dependent->id, + colocation->primary->id, dependent->id, + pcmk_readable_score(dependent->priority), + pcmk_readable_score(colocation->score), + pcmk_readable_score(new_priority)); + dependent->priority = new_priority; + + } else if (colocation->score >= INFINITY) { + // Mandatory colocation, but primary won't be here + pe_rsc_trace(colocation->primary, + "Applying %s (%s with %s) to %s: can't be promoted", + colocation->id, colocation->dependent->id, + colocation->primary->id, dependent->id); + dependent->priority = -INFINITY; + } +} diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c new file mode 100644 index 0000000..c1b929b --- /dev/null +++ b/lib/pacemaker/pcmk_sched_recurring.c @@ -0,0 +1,716 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> + +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +// Information parsed from an operation history entry in the CIB +struct op_history { + // XML attributes + const char *id; // ID of history entry + const char *name; // Action name + + // Parsed information + char *key; // Operation key for action + enum rsc_role_e role; // Action role (or RSC_ROLE_UNKNOWN for default) + guint interval_ms; // Action interval +}; + +/*! + * \internal + * \brief Parse an interval from XML + * + * \param[in] xml XML containing an interval attribute + * + * \return Interval parsed from XML (or 0 as default) + */ +static guint +xe_interval(const xmlNode *xml) +{ + return crm_parse_interval_spec(crm_element_value(xml, + XML_LRM_ATTR_INTERVAL)); +} + +/*! + * \internal + * \brief Check whether an operation exists multiple times in resource history + * + * \param[in] rsc Resource with history to search + * \param[in] name Name of action to search for + * \param[in] interval_ms Interval (in milliseconds) of action to search for + * + * \return true if an operation with \p name and \p interval_ms exists more than + * once in the operation history of \p rsc, otherwise false + */ +static bool +is_op_dup(const pe_resource_t *rsc, const char *name, guint interval_ms) +{ + const char *id = NULL; + + for (xmlNode *op = first_named_child(rsc->ops_xml, "op"); + op != NULL; op = crm_next_same_xml(op)) { + + // Check whether action name and interval match + if (!pcmk__str_eq(crm_element_value(op, "name"), + name, pcmk__str_none) + || (xe_interval(op) != interval_ms)) { + continue; + } + + if (ID(op) == NULL) { + continue; // Shouldn't be possible + } + + if (id == NULL) { + id = ID(op); // First matching op + } else { + pcmk__config_err("Operation %s is duplicate of %s (do not use " + "same name and interval combination more " + "than once per resource)", ID(op), id); + return true; + } + } + return false; +} + +/*! + * \internal + * \brief Check whether an action name is one that can be recurring + * + * \param[in] name Action name to check + * + * \return true if \p name is an action known to be unsuitable as a recurring + * operation, otherwise false + * + * \note Pacemaker's current philosophy is to allow users to configure recurring + * operations except for a short list of actions known not to be suitable + * for that (as opposed to allowing only actions known to be suitable, + * which includes only monitor). Among other things, this approach allows + * users to define their own custom operations and make them recurring, + * though that use case is not well tested. + */ +static bool +op_cannot_recur(const char *name) +{ + return pcmk__str_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, + CRMD_ACTION_RELOAD_AGENT, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL); +} + +/*! + * \internal + * \brief Check whether a resource history entry is for a recurring action + * + * \param[in] rsc Resource that history entry is for + * \param[in] xml XML of resource history entry to check + * \param[out] op Where to store parsed info if recurring + * + * \return true if \p xml is for a recurring action, otherwise false + */ +static bool +is_recurring_history(const pe_resource_t *rsc, const xmlNode *xml, + struct op_history *op) +{ + const char *role = NULL; + + op->interval_ms = xe_interval(xml); + if (op->interval_ms == 0) { + return false; // Not recurring + } + + op->id = ID(xml); + if (pcmk__str_empty(op->id)) { + pcmk__config_err("Ignoring resource history entry without ID"); + return false; // Shouldn't be possible (unless CIB was manually edited) + } + + op->name = crm_element_value(xml, "name"); + if (op_cannot_recur(op->name)) { + pcmk__config_err("Ignoring %s because %s action cannot be recurring", + op->id, pcmk__s(op->name, "unnamed")); + return false; + } + + // There should only be one recurring operation per action/interval + if (is_op_dup(rsc, op->name, op->interval_ms)) { + return false; + } + + // Ensure role is valid if specified + role = crm_element_value(xml, "role"); + if (role == NULL) { + op->role = RSC_ROLE_UNKNOWN; + } else { + op->role = text2role(role); + if (op->role == RSC_ROLE_UNKNOWN) { + pcmk__config_err("Ignoring %s because %s is not a valid role", + op->id, role); + } + } + + // Disabled resources don't get monitored + op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms); + if (find_rsc_op_entry(rsc, op->key) == NULL) { + crm_trace("Not creating recurring action %s for disabled resource %s", + op->id, rsc->id); + free(op->key); + return false; + } + + return true; +} + +/*! + * \internal + * \brief Check whether a recurring action for an active role should be optional + * + * \param[in] rsc Resource that recurring action is for + * \param[in] node Node that \p rsc will be active on (if any) + * \param[in] key Operation key for recurring action to check + * \param[in,out] start Start action for \p rsc + * + * \return true if recurring action should be optional, otherwise false + */ +static bool +active_recurring_should_be_optional(const pe_resource_t *rsc, + const pe_node_t *node, const char *key, + pe_action_t *start) +{ + GList *possible_matches = NULL; + + if (node == NULL) { // Should only be possible if unmanaged and stopped + pe_rsc_trace(rsc, "%s will be mandatory because resource is unmanaged", + key); + return false; + } + + if (!pcmk_is_set(rsc->cmds->action_flags(start, NULL), + pe_action_optional)) { + pe_rsc_trace(rsc, "%s will be mandatory because %s is", + key, start->uuid); + return false; + } + + possible_matches = find_actions_exact(rsc->actions, key, node); + if (possible_matches == NULL) { + pe_rsc_trace(rsc, "%s will be mandatory because it is not active on %s", + key, pe__node_name(node)); + return false; + } + + for (const GList *iter = possible_matches; + iter != NULL; iter = iter->next) { + + const pe_action_t *op = (const pe_action_t *) iter->data; + + if (pcmk_is_set(op->flags, pe_action_reschedule)) { + pe_rsc_trace(rsc, + "%s will be mandatory because " + "it needs to be rescheduled", key); + g_list_free(possible_matches); + return false; + } + } + + g_list_free(possible_matches); + return true; +} + +/*! + * \internal + * \brief Create recurring action from resource history entry for an active role + * + * \param[in,out] rsc Resource that resource history is for + * \param[in,out] start Start action for \p rsc on \p node + * \param[in] node Node that resource will be active on (if any) + * \param[in] op Resource history entry + */ +static void +recurring_op_for_active(pe_resource_t *rsc, pe_action_t *start, + const pe_node_t *node, const struct op_history *op) +{ + pe_action_t *mon = NULL; + bool is_optional = true; + + // We're only interested in recurring actions for active roles + if (op->role == RSC_ROLE_STOPPED) { + return; + } + + is_optional = active_recurring_should_be_optional(rsc, node, op->key, + start); + + if (((op->role != RSC_ROLE_UNKNOWN) && (rsc->next_role != op->role)) + || ((op->role == RSC_ROLE_UNKNOWN) + && (rsc->next_role == RSC_ROLE_PROMOTED))) { + // Configured monitor role doesn't match role resource will have + + if (is_optional) { // It's running, so cancel it + char *after_key = NULL; + pe_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name, + op->interval_ms, + node); + + switch (rsc->role) { + case RSC_ROLE_UNPROMOTED: + case RSC_ROLE_STARTED: + if (rsc->next_role == RSC_ROLE_PROMOTED) { + after_key = promote_key(rsc); + + } else if (rsc->next_role == RSC_ROLE_STOPPED) { + after_key = stop_key(rsc); + } + + break; + case RSC_ROLE_PROMOTED: + after_key = demote_key(rsc); + break; + default: + break; + } + + if (after_key) { + pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL, + pe_order_runnable_left, rsc->cluster); + } + } + + do_crm_log((is_optional? LOG_INFO : LOG_TRACE), + "%s recurring action %s because %s configured for %s role " + "(not %s)", + (is_optional? "Cancelling" : "Ignoring"), op->key, op->id, + role2text((op->role == RSC_ROLE_UNKNOWN)? RSC_ROLE_UNPROMOTED : op->role), + role2text(rsc->next_role)); + return; + } + + pe_rsc_trace(rsc, + "Creating %s recurring action %s for %s (%s %s on %s)", + (is_optional? "optional" : "mandatory"), op->key, + op->id, rsc->id, role2text(rsc->next_role), + pe__node_name(node)); + + mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional, TRUE, + rsc->cluster); + + if (!pcmk_is_set(start->flags, pe_action_runnable)) { + pe_rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid); + pe__clear_action_flags(mon, pe_action_runnable); + + } else if ((node == NULL) || !node->details->online + || node->details->unclean) { + pe_rsc_trace(rsc, "%s is unrunnable because no node is available", + mon->uuid); + pe__clear_action_flags(mon, pe_action_runnable); + + } else if (!pcmk_is_set(mon->flags, pe_action_optional)) { + pe_rsc_info(rsc, "Start %s-interval %s for %s on %s", + pcmk__readable_interval(op->interval_ms), mon->task, + rsc->id, pe__node_name(node)); + } + + if (rsc->next_role == RSC_ROLE_PROMOTED) { + pe__add_action_expected_result(mon, CRM_EX_PROMOTED); + } + + // Order monitor relative to other actions + if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pcmk__new_ordering(rsc, start_key(rsc), NULL, + NULL, strdup(mon->uuid), mon, + pe_order_implies_then|pe_order_runnable_left, + rsc->cluster); + + pcmk__new_ordering(rsc, reload_key(rsc), NULL, + NULL, strdup(mon->uuid), mon, + pe_order_implies_then|pe_order_runnable_left, + rsc->cluster); + + if (rsc->next_role == RSC_ROLE_PROMOTED) { + pcmk__new_ordering(rsc, promote_key(rsc), NULL, + rsc, NULL, mon, + pe_order_optional|pe_order_runnable_left, + rsc->cluster); + + } else if (rsc->role == RSC_ROLE_PROMOTED) { + pcmk__new_ordering(rsc, demote_key(rsc), NULL, + rsc, NULL, mon, + pe_order_optional|pe_order_runnable_left, + rsc->cluster); + } + } +} + +/*! + * \internal + * \brief Cancel a recurring action if running on a node + * + * \param[in,out] rsc Resource that action is for + * \param[in] node Node to cancel action on + * \param[in] key Operation key for action + * \param[in] name Action name + * \param[in] interval_ms Action interval (in milliseconds) + */ +static void +cancel_if_running(pe_resource_t *rsc, const pe_node_t *node, const char *key, + const char *name, guint interval_ms) +{ + GList *possible_matches = find_actions_exact(rsc->actions, key, node); + pe_action_t *cancel_op = NULL; + + if (possible_matches == NULL) { + return; // Recurring action isn't running on this node + } + g_list_free(possible_matches); + + cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); + + switch (rsc->next_role) { + case RSC_ROLE_STARTED: + case RSC_ROLE_UNPROMOTED: + /* Order starts after cancel. If the current role is + * stopped, this cancels the monitor before the resource + * starts; if the current role is started, then this cancels + * the monitor on a migration target before starting there. + */ + pcmk__new_ordering(rsc, NULL, cancel_op, + rsc, start_key(rsc), NULL, + pe_order_runnable_left, rsc->cluster); + break; + default: + break; + } + pe_rsc_info(rsc, + "Cancelling %s-interval %s action for %s on %s because " + "configured for " RSC_ROLE_STOPPED_S " role (not %s)", + pcmk__readable_interval(interval_ms), name, rsc->id, + pe__node_name(node), role2text(rsc->next_role)); +} + +/*! + * \internal + * \brief Order an action after all probes of a resource on a node + * + * \param[in,out] rsc Resource to check for probes + * \param[in] node Node to check for probes of \p rsc + * \param[in,out] action Action to order after probes of \p rsc on \p node + */ +static void +order_after_probes(pe_resource_t *rsc, const pe_node_t *node, + pe_action_t *action) +{ + GList *probes = pe__resource_actions(rsc, node, RSC_STATUS, FALSE); + + for (GList *iter = probes; iter != NULL; iter = iter->next) { + order_actions((pe_action_t *) iter->data, action, + pe_order_runnable_left); + } + g_list_free(probes); +} + +/*! + * \internal + * \brief Order an action after all stops of a resource on a node + * + * \param[in,out] rsc Resource to check for stops + * \param[in] node Node to check for stops of \p rsc + * \param[in,out] action Action to order after stops of \p rsc on \p node + */ +static void +order_after_stops(pe_resource_t *rsc, const pe_node_t *node, + pe_action_t *action) +{ + GList *stop_ops = pe__resource_actions(rsc, node, RSC_STOP, TRUE); + + for (GList *iter = stop_ops; iter != NULL; iter = iter->next) { + pe_action_t *stop = (pe_action_t *) iter->data; + + if (!pcmk_is_set(stop->flags, pe_action_optional) + && !pcmk_is_set(action->flags, pe_action_optional) + && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pe_rsc_trace(rsc, "%s optional on %s: unmanaged", + action->uuid, pe__node_name(node)); + pe__set_action_flags(action, pe_action_optional); + } + + if (!pcmk_is_set(stop->flags, pe_action_runnable)) { + crm_debug("%s unrunnable on %s: stop is unrunnable", + action->uuid, pe__node_name(node)); + pe__clear_action_flags(action, pe_action_runnable); + } + + if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + pcmk__new_ordering(rsc, stop_key(rsc), stop, + NULL, NULL, action, + pe_order_implies_then|pe_order_runnable_left, + rsc->cluster); + } + } + g_list_free(stop_ops); +} + +/*! + * \internal + * \brief Create recurring action from resource history entry for inactive role + * + * \param[in,out] rsc Resource that resource history is for + * \param[in] node Node that resource will be active on (if any) + * \param[in] op Resource history entry + */ +static void +recurring_op_for_inactive(pe_resource_t *rsc, const pe_node_t *node, + const struct op_history *op) +{ + GList *possible_matches = NULL; + + // We're only interested in recurring actions for the inactive role + if (op->role != RSC_ROLE_STOPPED) { + return; + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + crm_notice("Ignoring %s (recurring monitors for " RSC_ROLE_STOPPED_S + " role are not supported for anonymous clones)", op->id); + return; // @TODO add support + } + + pe_rsc_trace(rsc, "Creating recurring action %s for %s on nodes " + "where it should not be running", op->id, rsc->id); + + for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { + pe_node_t *stop_node = (pe_node_t *) iter->data; + + bool is_optional = true; + pe_action_t *stopped_mon = NULL; + + // Cancel action on node where resource will be active + if ((node != NULL) + && pcmk__str_eq(stop_node->details->uname, node->details->uname, + pcmk__str_casei)) { + cancel_if_running(rsc, node, op->key, op->name, op->interval_ms); + continue; + } + + // Recurring action on this node is optional if it's already active here + possible_matches = find_actions_exact(rsc->actions, op->key, stop_node); + is_optional = (possible_matches != NULL); + g_list_free(possible_matches); + + pe_rsc_trace(rsc, + "Creating %s recurring action %s for %s (%s " + RSC_ROLE_STOPPED_S " on %s)", + (is_optional? "optional" : "mandatory"), + op->key, op->id, rsc->id, pe__node_name(stop_node)); + + stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node, + is_optional, TRUE, rsc->cluster); + + pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING); + + if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + order_after_probes(rsc, stop_node, stopped_mon); + } + + /* The recurring action is for the inactive role, so it shouldn't be + * performed until the resource is inactive. + */ + order_after_stops(rsc, stop_node, stopped_mon); + + if (!stop_node->details->online || stop_node->details->unclean) { + pe_rsc_debug(rsc, "%s unrunnable on %s: node unavailable)", + stopped_mon->uuid, pe__node_name(stop_node)); + pe__clear_action_flags(stopped_mon, pe_action_runnable); + } + + if (pcmk_is_set(stopped_mon->flags, pe_action_runnable) + && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) { + crm_notice("Start recurring %s-interval %s for " + RSC_ROLE_STOPPED_S " %s on %s", + pcmk__readable_interval(op->interval_ms), + stopped_mon->task, rsc->id, pe__node_name(stop_node)); + } + } +} + +/*! + * \internal + * \brief Create recurring actions for a resource + * + * \param[in,out] rsc Resource to create recurring actions for + */ +void +pcmk__create_recurring_actions(pe_resource_t *rsc) +{ + pe_action_t *start = NULL; + + if (pcmk_is_set(rsc->flags, pe_rsc_block)) { + pe_rsc_trace(rsc, "Skipping recurring actions for blocked resource %s", + rsc->id); + return; + } + + if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) { + pe_rsc_trace(rsc, "Skipping recurring actions for %s " + "in maintenance mode", rsc->id); + return; + } + + if (rsc->allocated_to == NULL) { + // Recurring actions for active roles not needed + + } else if (rsc->allocated_to->details->maintenance) { + pe_rsc_trace(rsc, + "Skipping recurring actions for %s on %s " + "in maintenance mode", + rsc->id, pe__node_name(rsc->allocated_to)); + + } else if ((rsc->next_role != RSC_ROLE_STOPPED) + || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + // Recurring actions for active roles needed + start = start_action(rsc, rsc->allocated_to, TRUE); + } + + pe_rsc_trace(rsc, "Creating any recurring actions needed for %s", rsc->id); + + for (xmlNode *op = first_named_child(rsc->ops_xml, "op"); + op != NULL; op = crm_next_same_xml(op)) { + + struct op_history op_history = { NULL, }; + + if (!is_recurring_history(rsc, op, &op_history)) { + continue; + } + + if (start != NULL) { + recurring_op_for_active(rsc, start, rsc->allocated_to, &op_history); + } + recurring_op_for_inactive(rsc, rsc->allocated_to, &op_history); + + free(op_history.key); + } +} + +/*! + * \internal + * \brief Create an executor cancel action + * + * \param[in,out] rsc Resource of action to cancel + * \param[in] task Name of action to cancel + * \param[in] interval_ms Interval of action to cancel + * \param[in] node Node of action to cancel + * + * \return Created op + */ +pe_action_t * +pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms, + const pe_node_t *node) +{ + pe_action_t *cancel_op = NULL; + char *key = NULL; + char *interval_ms_s = NULL; + + CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL)); + + // @TODO dangerous if possible to schedule another action with this key + key = pcmk__op_key(rsc->id, task, interval_ms); + + cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE, + rsc->cluster); + + pcmk__str_update(&cancel_op->task, RSC_CANCEL); + pcmk__str_update(&cancel_op->cancel_task, task); + + interval_ms_s = crm_strdup_printf("%u", interval_ms); + add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task); + add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s); + free(interval_ms_s); + + return cancel_op; +} + +/*! + * \internal + * \brief Schedule cancellation of a recurring action + * + * \param[in,out] rsc Resource that action is for + * \param[in] call_id Action's call ID from history + * \param[in] task Action name + * \param[in] interval_ms Action interval + * \param[in] node Node that history entry is for + * \param[in] reason Short description of why action is being cancelled + */ +void +pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task, + guint interval_ms, const pe_node_t *node, + const char *reason) +{ + pe_action_t *cancel = NULL; + + CRM_CHECK((rsc != NULL) && (task != NULL) + && (node != NULL) && (reason != NULL), + return); + + crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s", + pcmk__readable_interval(interval_ms), task, rsc->id, + pe__node_name(node), reason); + cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node); + add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); + + // Cancellations happen after stops + pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, + pe_order_optional, rsc->cluster); +} + +/*! + * \internal + * \brief Reschedule a recurring action + * + * \param[in,out] rsc Resource that action is for + * \param[in] task Name of action being rescheduled + * \param[in] interval_ms Action interval (in milliseconds) + * \param[in,out] node Node where action should be rescheduled + */ +void +pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, + guint interval_ms, pe_node_t *node) +{ + pe_action_t *op = NULL; + + trigger_unfencing(rsc, node, "Device parameters changed (reschedule)", + NULL, rsc->cluster); + op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms), + task, node, TRUE, TRUE, rsc->cluster); + pe__set_action_flags(op, pe_action_reschedule); +} + +/*! + * \internal + * \brief Check whether an action is recurring + * + * \param[in] action Action to check + * + * \return true if \p action has a nonzero interval, otherwise false + */ +bool +pcmk__action_is_recurring(const pe_action_t *action) +{ + guint interval_ms = 0; + + if (pcmk__guint_from_hash(action->meta, + XML_LRM_ATTR_INTERVAL_MS, 0, + &interval_ms) != pcmk_rc_ok) { + return false; + } + return (interval_ms > 0); +} diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c new file mode 100644 index 0000000..6adb5d4 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_remote.c @@ -0,0 +1,729 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <sys/param.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> + +#include <glib.h> + +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +enum remote_connection_state { + remote_state_unknown = 0, + remote_state_alive = 1, + remote_state_resting = 2, + remote_state_failed = 3, + remote_state_stopped = 4 +}; + +static const char * +state2text(enum remote_connection_state state) +{ + switch (state) { + case remote_state_unknown: + return "unknown"; + case remote_state_alive: + return "alive"; + case remote_state_resting: + return "resting"; + case remote_state_failed: + return "failed"; + case remote_state_stopped: + return "stopped"; + } + + return "impossible"; +} + +/* We always use pe_order_preserve with these convenience functions to exempt + * internally generated constraints from the prohibition of user constraints + * involving remote connection resources. + * + * The start ordering additionally uses pe_order_runnable_left so that the + * specified action is not runnable if the start is not runnable. + */ + +static inline void +order_start_then_action(pe_resource_t *first_rsc, pe_action_t *then_action, + uint32_t extra, pe_working_set_t *data_set) +{ + if ((first_rsc != NULL) && (then_action != NULL) && (data_set != NULL)) { + pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL, + then_action->rsc, NULL, then_action, + pe_order_preserve|pe_order_runnable_left|extra, + data_set); + } +} + +static inline void +order_action_then_stop(pe_action_t *first_action, pe_resource_t *then_rsc, + uint32_t extra, pe_working_set_t *data_set) +{ + if ((first_action != NULL) && (then_rsc != NULL) && (data_set != NULL)) { + pcmk__new_ordering(first_action->rsc, NULL, first_action, + then_rsc, stop_key(then_rsc), NULL, + pe_order_preserve|extra, data_set); + } +} + +static enum remote_connection_state +get_remote_node_state(const pe_node_t *node) +{ + const pe_resource_t *remote_rsc = NULL; + const pe_node_t *cluster_node = NULL; + + CRM_ASSERT(node != NULL); + + remote_rsc = node->details->remote_rsc; + CRM_ASSERT(remote_rsc != NULL); + + cluster_node = pe__current_node(remote_rsc); + + /* If the cluster node the remote connection resource resides on + * is unclean or went offline, we can't process any operations + * on that remote node until after it starts elsewhere. + */ + if ((remote_rsc->next_role == RSC_ROLE_STOPPED) + || (remote_rsc->allocated_to == NULL)) { + + // The connection resource is not going to run anywhere + + if ((cluster_node != NULL) && cluster_node->details->unclean) { + /* The remote connection is failed because its resource is on a + * failed node and can't be recovered elsewhere, so we must fence. + */ + return remote_state_failed; + } + + if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) { + /* Connection resource is cleanly stopped */ + return remote_state_stopped; + } + + /* Connection resource is failed */ + + if ((remote_rsc->next_role == RSC_ROLE_STOPPED) + && remote_rsc->remote_reconnect_ms + && node->details->remote_was_fenced + && !pe__shutdown_requested(node)) { + + /* We won't know whether the connection is recoverable until the + * reconnect interval expires and we reattempt connection. + */ + return remote_state_unknown; + } + + /* The remote connection is in a failed state. If there are any + * resources known to be active on it (stop) or in an unknown state + * (probe), we must assume the worst and fence it. + */ + return remote_state_failed; + + } else if (cluster_node == NULL) { + /* Connection is recoverable but not currently running anywhere, so see + * if we can recover it first + */ + return remote_state_unknown; + + } else if (cluster_node->details->unclean + || !(cluster_node->details->online)) { + // Connection is running on a dead node, see if we can recover it first + return remote_state_resting; + + } else if (pcmk__list_of_multiple(remote_rsc->running_on) + && (remote_rsc->partial_migration_source != NULL) + && (remote_rsc->partial_migration_target != NULL)) { + /* We're in the middle of migrating a connection resource, so wait until + * after the migration completes before performing any actions. + */ + return remote_state_resting; + + } + return remote_state_alive; +} + +/*! + * \internal + * \brief Order actions on remote node relative to actions for the connection + * + * \param[in,out] action An action scheduled on a Pacemaker Remote node + */ +static void +apply_remote_ordering(pe_action_t *action) +{ + pe_resource_t *remote_rsc = NULL; + enum action_tasks task = text2task(action->task); + enum remote_connection_state state = get_remote_node_state(action->node); + + uint32_t order_opts = pe_order_none; + + if (action->rsc == NULL) { + return; + } + + CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); + + remote_rsc = action->node->details->remote_rsc; + CRM_ASSERT(remote_rsc != NULL); + + crm_trace("Order %s action %s relative to %s%s (state: %s)", + action->task, action->uuid, + pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", + remote_rsc->id, state2text(state)); + + if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + /* Migration ops map to "no_action", but we need to apply the same + * ordering as for stop or demote (see get_router_node()). + */ + task = stop_rsc; + } + + switch (task) { + case start_rsc: + case action_promote: + order_opts = pe_order_none; + + if (state == remote_state_failed) { + /* Force recovery, by making this action required */ + pe__set_order_flags(order_opts, pe_order_implies_then); + } + + /* Ensure connection is up before running this action */ + order_start_then_action(remote_rsc, action, order_opts, + remote_rsc->cluster); + break; + + case stop_rsc: + if (state == remote_state_alive) { + order_action_then_stop(action, remote_rsc, + pe_order_implies_first, + remote_rsc->cluster); + + } else if (state == remote_state_failed) { + /* The resource is active on the node, but since we don't have a + * valid connection, the only way to stop the resource is by + * fencing the node. There is no need to order the stop relative + * to the remote connection, since the stop will become implied + * by the fencing. + */ + pe_fence_node(remote_rsc->cluster, action->node, + "resources are active but connection is unrecoverable", + FALSE); + + } else if (remote_rsc->next_role == RSC_ROLE_STOPPED) { + /* State must be remote_state_unknown or remote_state_stopped. + * Since the connection is not coming back up in this + * transition, stop this resource first. + */ + order_action_then_stop(action, remote_rsc, + pe_order_implies_first, + remote_rsc->cluster); + + } else { + /* The connection is going to be started somewhere else, so + * stop this resource after that completes. + */ + order_start_then_action(remote_rsc, action, pe_order_none, + remote_rsc->cluster); + } + break; + + case action_demote: + /* Only order this demote relative to the connection start if the + * connection isn't being torn down. Otherwise, the demote would be + * blocked because the connection start would not be allowed. + */ + if ((state == remote_state_resting) + || (state == remote_state_unknown)) { + + order_start_then_action(remote_rsc, action, pe_order_none, + remote_rsc->cluster); + } /* Otherwise we can rely on the stop ordering */ + break; + + default: + /* Wait for the connection resource to be up */ + if (pcmk__action_is_recurring(action)) { + /* In case we ever get the recovery logic wrong, force + * recurring monitors to be restarted, even if just + * the connection was re-established + */ + order_start_then_action(remote_rsc, action, + pe_order_implies_then, + remote_rsc->cluster); + + } else { + pe_node_t *cluster_node = pe__current_node(remote_rsc); + + if ((task == monitor_rsc) && (state == remote_state_failed)) { + /* We would only be here if we do not know the state of the + * resource on the remote node. Since we have no way to find + * out, it is necessary to fence the node. + */ + pe_fence_node(remote_rsc->cluster, action->node, + "resources are in unknown state " + "and connection is unrecoverable", FALSE); + } + + if ((cluster_node != NULL) && (state == remote_state_stopped)) { + /* The connection is currently up, but is going down + * permanently. Make sure we check services are actually + * stopped _before_ we let the connection get closed. + */ + order_action_then_stop(action, remote_rsc, + pe_order_runnable_left, + remote_rsc->cluster); + + } else { + order_start_then_action(remote_rsc, action, pe_order_none, + remote_rsc->cluster); + } + } + break; + } +} + +static void +apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) +{ + /* VMs are also classified as containers for these purposes... in + * that they both involve a 'thing' running on a real or remote + * cluster node. + * + * This allows us to be smarter about the type and extent of + * recovery actions required in various scenarios + */ + pe_resource_t *remote_rsc = NULL; + pe_resource_t *container = NULL; + enum action_tasks task = text2task(action->task); + + CRM_ASSERT(action->rsc != NULL); + CRM_ASSERT(action->node != NULL); + CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); + + remote_rsc = action->node->details->remote_rsc; + CRM_ASSERT(remote_rsc != NULL); + + container = remote_rsc->container; + CRM_ASSERT(container != NULL); + + if (pcmk_is_set(container->flags, pe_rsc_failed)) { + pe_fence_node(data_set, action->node, "container failed", FALSE); + } + + crm_trace("Order %s action %s relative to %s%s for %s%s", + action->task, action->uuid, + pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", + remote_rsc->id, + pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "", + container->id); + + if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, + CRMD_ACTION_MIGRATED, NULL)) { + /* Migration ops map to "no_action", but we need to apply the same + * ordering as for stop or demote (see get_router_node()). + */ + task = stop_rsc; + } + + switch (task) { + case start_rsc: + case action_promote: + // Force resource recovery if the container is recovered + order_start_then_action(container, action, pe_order_implies_then, + data_set); + + // Wait for the connection resource to be up, too + order_start_then_action(remote_rsc, action, pe_order_none, + data_set); + break; + + case stop_rsc: + case action_demote: + if (pcmk_is_set(container->flags, pe_rsc_failed)) { + /* When the container representing a guest node fails, any stop + * or demote actions for resources running on the guest node + * are implied by the container stopping. This is similar to + * how fencing operations work for cluster nodes and remote + * nodes. + */ + } else { + /* Ensure the operation happens before the connection is brought + * down. + * + * If we really wanted to, we could order these after the + * connection start, IFF the container's current role was + * stopped (otherwise we re-introduce an ordering loop when the + * connection is restarting). + */ + order_action_then_stop(action, remote_rsc, pe_order_none, + data_set); + } + break; + + default: + /* Wait for the connection resource to be up */ + if (pcmk__action_is_recurring(action)) { + /* In case we ever get the recovery logic wrong, force + * recurring monitors to be restarted, even if just + * the connection was re-established + */ + if(task != no_action) { + order_start_then_action(remote_rsc, action, + pe_order_implies_then, data_set); + } + } else { + order_start_then_action(remote_rsc, action, pe_order_none, + data_set); + } + break; + } +} + +/*! + * \internal + * \brief Order all relevant actions relative to remote connection actions + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__order_remote_connection_actions(pe_working_set_t *data_set) +{ + if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { + return; + } + + crm_trace("Creating remote connection orderings"); + + for (GList *gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + pe_resource_t *remote = NULL; + + // We are only interested in resource actions + if (action->rsc == NULL) { + continue; + } + + /* Special case: If we are clearing the failcount of an actual + * remote connection resource, then make sure this happens before + * any start of the resource in this transition. + */ + if (action->rsc->is_remote_node && + pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { + + pcmk__new_ordering(action->rsc, NULL, action, action->rsc, + pcmk__op_key(action->rsc->id, RSC_START, 0), + NULL, pe_order_optional, data_set); + + continue; + } + + // We are only interested in actions allocated to a node + if (action->node == NULL) { + continue; + } + + if (!pe__is_guest_or_remote_node(action->node)) { + continue; + } + + /* We are only interested in real actions. + * + * @TODO This is probably wrong; pseudo-actions might be converted to + * real actions and vice versa later in update_actions() at the end of + * pcmk__apply_orderings(). + */ + if (pcmk_is_set(action->flags, pe_action_pseudo)) { + continue; + } + + remote = action->node->details->remote_rsc; + if (remote == NULL) { + // Orphaned + continue; + } + + /* Another special case: if a resource is moving to a Pacemaker Remote + * node, order the stop on the original node after any start of the + * remote connection. This ensures that if the connection fails to + * start, we leave the resource running on the original node. + */ + if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { + for (GList *item = action->rsc->actions; item != NULL; + item = item->next) { + pe_action_t *rsc_action = item->data; + + if ((rsc_action->node->details != action->node->details) + && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) { + pcmk__new_ordering(remote, start_key(remote), NULL, + action->rsc, NULL, rsc_action, + pe_order_optional, data_set); + } + } + } + + /* The action occurs across a remote connection, so create + * ordering constraints that guarantee the action occurs while the node + * is active (after start, before stop ... things like that). + * + * This is somewhat brittle in that we need to make sure the results of + * this ordering are compatible with the result of get_router_node(). + * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part + * of this logic rather than create_graph_action(). + */ + if (remote->container) { + crm_trace("Container ordering for %s", action->uuid); + apply_container_ordering(action, data_set); + + } else { + crm_trace("Remote ordering for %s", action->uuid); + apply_remote_ordering(action); + } + } +} + +/*! + * \internal + * \brief Check whether a node is a failed remote node + * + * \param[in] node Node to check + * + * \return true if \p node is a failed remote node, false otherwise + */ +bool +pcmk__is_failed_remote_node(const pe_node_t *node) +{ + return pe__is_remote_node(node) && (node->details->remote_rsc != NULL) + && (get_remote_node_state(node) == remote_state_failed); +} + +/*! + * \internal + * \brief Check whether a given resource corresponds to a given node as guest + * + * \param[in] rsc Resource to check + * \param[in] node Node to check + * + * \return true if \p node is a guest node and \p rsc is its containing + * resource, otherwise false + */ +bool +pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node) +{ + return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL) + && (node->details->remote_rsc != NULL) + && (node->details->remote_rsc->container == rsc); +} + +/*! + * \internal + * \brief Get proper connection host that a remote action must be routed through + * + * A remote connection resource might be starting, stopping, or migrating in the + * same transition that an action needs to be executed on its Pacemaker Remote + * node. Determine the proper node that the remote action should be routed + * through. + * + * \param[in] action (Potentially remote) action to route + * + * \return Connection host that action should be routed through if remote, + * otherwise NULL + */ +pe_node_t * +pcmk__connection_host_for_action(const pe_action_t *action) +{ + pe_node_t *began_on = NULL; + pe_node_t *ended_on = NULL; + bool partial_migration = false; + const char *task = action->task; + + if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei) + || !pe__is_guest_or_remote_node(action->node)) { + return NULL; + } + + CRM_ASSERT(action->node->details->remote_rsc != NULL); + + began_on = pe__current_node(action->node->details->remote_rsc); + ended_on = action->node->details->remote_rsc->allocated_to; + if (action->node->details->remote_rsc + && (action->node->details->remote_rsc->container == NULL) + && action->node->details->remote_rsc->partial_migration_target) { + partial_migration = true; + } + + if (began_on == NULL) { + crm_trace("Routing %s for %s through remote connection's " + "next node %s (starting)%s", + action->task, (action->rsc? action->rsc->id : "no resource"), + (ended_on? ended_on->details->uname : "none"), + partial_migration? " (partial migration)" : ""); + return ended_on; + } + + if (ended_on == NULL) { + crm_trace("Routing %s for %s through remote connection's " + "current node %s (stopping)%s", + action->task, (action->rsc? action->rsc->id : "no resource"), + (began_on? began_on->details->uname : "none"), + partial_migration? " (partial migration)" : ""); + return began_on; + } + + if (began_on->details == ended_on->details) { + crm_trace("Routing %s for %s through remote connection's " + "current node %s (not moving)%s", + action->task, (action->rsc? action->rsc->id : "no resource"), + (began_on? began_on->details->uname : "none"), + partial_migration? " (partial migration)" : ""); + return began_on; + } + + /* If we get here, the remote connection is moving during this transition. + * This means some actions for resources behind the connection will get + * routed through the cluster node the connection resource is currently on, + * and others are routed through the cluster node the connection will end up + * on. + */ + + if (pcmk__str_eq(task, "notify", pcmk__str_casei)) { + task = g_hash_table_lookup(action->meta, "notify_operation"); + } + + /* + * Stop, demote, and migration actions must occur before the connection can + * move (these actions are required before the remote resource can stop). In + * this case, we know these actions have to be routed through the initial + * cluster node the connection resource lived on before the move takes + * place. + * + * The exception is a partial migration of a (non-guest) remote connection + * resource; in that case, all actions (even these) will be ordered after + * the connection's pseudo-start on the migration target, so the target is + * the router node. + */ + if (pcmk__strcase_any_of(task, "cancel", "stop", "demote", "migrate_from", + "migrate_to", NULL) && !partial_migration) { + crm_trace("Routing %s for %s through remote connection's " + "current node %s (moving)%s", + action->task, (action->rsc? action->rsc->id : "no resource"), + (began_on? began_on->details->uname : "none"), + partial_migration? " (partial migration)" : ""); + return began_on; + } + + /* Everything else (start, promote, monitor, probe, refresh, + * clear failcount, delete, ...) must occur after the connection starts on + * the node it is moving to. + */ + crm_trace("Routing %s for %s through remote connection's " + "next node %s (moving)%s", + action->task, (action->rsc? action->rsc->id : "no resource"), + (ended_on? ended_on->details->uname : "none"), + partial_migration? " (partial migration)" : ""); + return ended_on; +} + +/*! + * \internal + * \brief Replace remote connection's addr="#uname" with actual address + * + * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource + * with its "addr" parameter set to "#uname", pull the actual value from the + * parameters evaluated without a node (which was put there earlier in + * pcmk__create_graph() when the bundle's expand() method was called). + * + * \param[in,out] rsc Resource to check + * \param[in,out] params Resource parameters evaluated per node + */ +void +pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params) +{ + const char *remote_addr = g_hash_table_lookup(params, + XML_RSC_ATTR_REMOTE_RA_ADDR); + + if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) { + GHashTable *base = pe_rsc_params(rsc, NULL, rsc->cluster); + + remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR); + if (remote_addr != NULL) { + g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), + strdup(remote_addr)); + } + } +} + +/*! + * \brief Add special bundle meta-attributes to XML + * + * If a given action will be executed on a guest node (including a bundle), + * add the special bundle meta-attribute "container-attribute-target" and + * environment variable "physical_host" as XML attributes (using meta-attribute + * naming). + * + * \param[in,out] args_xml XML to add attributes to + * \param[in] action Action to check + */ +void +pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action) +{ + const pe_node_t *host = NULL; + enum action_tasks task; + + if (!pe__is_guest_node(action->node)) { + return; + } + + task = text2task(action->task); + if ((task == action_notify) || (task == action_notified)) { + task = text2task(g_hash_table_lookup(action->meta, "notify_operation")); + } + + switch (task) { + case stop_rsc: + case stopped_rsc: + case action_demote: + case action_demoted: + // "Down" actions take place on guest's current host + host = pe__current_node(action->node->details->remote_rsc->container); + break; + + case start_rsc: + case started_rsc: + case monitor_rsc: + case action_promote: + case action_promoted: + // "Up" actions take place on guest's next host + host = action->node->details->remote_rsc->container->allocated_to; + break; + + default: + break; + } + + if (host != NULL) { + hash2metafield((gpointer) XML_RSC_ATTR_TARGET, + (gpointer) g_hash_table_lookup(action->rsc->meta, + XML_RSC_ATTR_TARGET), + (gpointer) args_xml); + hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST, + (gpointer) host->details->uname, + (gpointer) args_xml); + } +} diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c new file mode 100644 index 0000000..b855499 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_resource.c @@ -0,0 +1,722 @@ +/* + * Copyright 2014-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdlib.h> +#include <string.h> +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +// Resource allocation methods that vary by resource variant +static resource_alloc_functions_t allocation_methods[] = { + { + pcmk__primitive_assign, + pcmk__primitive_create_actions, + pcmk__probe_rsc_on_node, + pcmk__primitive_internal_constraints, + pcmk__primitive_apply_coloc_score, + pcmk__colocated_resources, + pcmk__with_primitive_colocations, + pcmk__primitive_with_colocations, + pcmk__add_colocated_node_scores, + pcmk__apply_location, + pcmk__primitive_action_flags, + pcmk__update_ordered_actions, + pcmk__output_resource_actions, + pcmk__add_rsc_actions_to_graph, + pcmk__primitive_add_graph_meta, + pcmk__primitive_add_utilization, + pcmk__primitive_shutdown_lock, + }, + { + pcmk__group_assign, + pcmk__group_create_actions, + pcmk__probe_rsc_on_node, + pcmk__group_internal_constraints, + pcmk__group_apply_coloc_score, + pcmk__group_colocated_resources, + pcmk__with_group_colocations, + pcmk__group_with_colocations, + pcmk__group_add_colocated_node_scores, + pcmk__group_apply_location, + pcmk__group_action_flags, + pcmk__group_update_ordered_actions, + pcmk__output_resource_actions, + pcmk__add_rsc_actions_to_graph, + pcmk__noop_add_graph_meta, + pcmk__group_add_utilization, + pcmk__group_shutdown_lock, + }, + { + pcmk__clone_assign, + clone_create_actions, + clone_create_probe, + clone_internal_constraints, + pcmk__clone_apply_coloc_score, + pcmk__colocated_resources, + pcmk__with_clone_colocations, + pcmk__clone_with_colocations, + pcmk__add_colocated_node_scores, + clone_rsc_location, + clone_action_flags, + pcmk__instance_update_ordered_actions, + pcmk__output_resource_actions, + clone_expand, + clone_append_meta, + pcmk__clone_add_utilization, + pcmk__clone_shutdown_lock, + }, + { + pcmk__bundle_allocate, + pcmk__bundle_create_actions, + pcmk__bundle_create_probe, + pcmk__bundle_internal_constraints, + pcmk__bundle_apply_coloc_score, + pcmk__colocated_resources, + pcmk__with_bundle_colocations, + pcmk__bundle_with_colocations, + pcmk__add_colocated_node_scores, + pcmk__bundle_rsc_location, + pcmk__bundle_action_flags, + pcmk__instance_update_ordered_actions, + pcmk__output_bundle_actions, + pcmk__bundle_expand, + pcmk__noop_add_graph_meta, + pcmk__bundle_add_utilization, + pcmk__bundle_shutdown_lock, + } +}; + +/*! + * \internal + * \brief Check whether a resource's agent standard, provider, or type changed + * + * \param[in,out] rsc Resource to check + * \param[in,out] node Node needing unfencing if agent changed + * \param[in] rsc_entry XML with previously known agent information + * \param[in] active_on_node Whether \p rsc is active on \p node + * + * \return true if agent for \p rsc changed, otherwise false + */ +bool +pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, + const xmlNode *rsc_entry, bool active_on_node) +{ + bool changed = false; + const char *attr_list[] = { + XML_ATTR_TYPE, + XML_AGENT_ATTR_CLASS, + XML_AGENT_ATTR_PROVIDER + }; + + for (int i = 0; i < PCMK__NELEM(attr_list); i++) { + const char *value = crm_element_value(rsc->xml, attr_list[i]); + const char *old_value = crm_element_value(rsc_entry, attr_list[i]); + + if (!pcmk__str_eq(value, old_value, pcmk__str_none)) { + changed = true; + trigger_unfencing(rsc, node, "Device definition changed", NULL, + rsc->cluster); + if (active_on_node) { + crm_notice("Forcing restart of %s on %s " + "because %s changed from '%s' to '%s'", + rsc->id, pe__node_name(node), attr_list[i], + pcmk__s(old_value, ""), pcmk__s(value, "")); + } + } + } + if (changed && active_on_node) { + // Make sure the resource is restarted + custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE, + rsc->cluster); + pe__set_resource_flags(rsc, pe_rsc_start_pending); + } + return changed; +} + +/*! + * \internal + * \brief Add resource (and any matching children) to list if it matches ID + * + * \param[in] result List to add resource to + * \param[in] rsc Resource to check + * \param[in] id ID to match + * + * \return (Possibly new) head of list + */ +static GList * +add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id) +{ + if ((strcmp(rsc->id, id) == 0) + || ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) { + result = g_list_prepend(result, rsc); + } + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child = (pe_resource_t *) iter->data; + + result = add_rsc_if_matching(result, child, id); + } + return result; +} + +/*! + * \internal + * \brief Find all resources matching a given ID by either ID or clone name + * + * \param[in] id Resource ID to check + * \param[in] data_set Cluster working set + * + * \return List of all resources that match \p id + * \note The caller is responsible for freeing the return value with + * g_list_free(). + */ +GList * +pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set) +{ + GList *result = NULL; + + CRM_CHECK((id != NULL) && (data_set != NULL), return NULL); + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id); + } + return result; +} + +/*! + * \internal + * \brief Set the variant-appropriate allocation methods for a resource + * + * \param[in,out] rsc Resource to set allocation methods for + * \param[in] ignored Here so function can be used with g_list_foreach() + */ +static void +set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored) +{ + rsc->cmds = &allocation_methods[rsc->variant]; + g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL); +} + +/*! + * \internal + * \brief Set the variant-appropriate allocation methods for all resources + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__set_allocation_methods(pe_working_set_t *data_set) +{ + g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc, + NULL); +} + +// Shared implementation of resource_alloc_functions_t:colocated_resources() +GList * +pcmk__colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, + GList *colocated_rscs) +{ + const GList *iter = NULL; + GList *colocations = NULL; + + if (orig_rsc == NULL) { + orig_rsc = rsc; + } + + if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) { + return colocated_rscs; + } + + pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s", + rsc->id, orig_rsc->id); + colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc); + + // Follow colocations where this resource is the dependent resource + colocations = pcmk__this_with_colocations(rsc); + for (iter = colocations; iter != NULL; iter = iter->next) { + const pcmk__colocation_t *constraint = iter->data; + const pe_resource_t *primary = constraint->primary; + + if (primary == orig_rsc) { + continue; // Break colocation loop + } + + if ((constraint->score == INFINITY) && + (pcmk__colocation_affects(rsc, primary, constraint, + true) == pcmk__coloc_affects_location)) { + + colocated_rscs = primary->cmds->colocated_resources(primary, + orig_rsc, + colocated_rscs); + } + } + g_list_free(colocations); + + // Follow colocations where this resource is the primary resource + colocations = pcmk__with_this_colocations(rsc); + for (iter = colocations; iter != NULL; iter = iter->next) { + const pcmk__colocation_t *constraint = iter->data; + const pe_resource_t *dependent = constraint->dependent; + + if (dependent == orig_rsc) { + continue; // Break colocation loop + } + + if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) { + continue; // We can't be sure whether dependent will be colocated + } + + if ((constraint->score == INFINITY) && + (pcmk__colocation_affects(dependent, rsc, constraint, + true) == pcmk__coloc_affects_location)) { + + colocated_rscs = dependent->cmds->colocated_resources(dependent, + orig_rsc, + colocated_rscs); + } + } + g_list_free(colocations); + + return colocated_rscs; +} + +// No-op function for variants that don't need to implement add_graph_meta() +void +pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml) +{ +} + +void +pcmk__output_resource_actions(pe_resource_t *rsc) +{ + pcmk__output_t *out = rsc->cluster->priv; + + pe_node_t *next = NULL; + pe_node_t *current = NULL; + + if (rsc->children != NULL) { + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child = (pe_resource_t *) iter->data; + + child->cmds->output_actions(child); + } + return; + } + + next = rsc->allocated_to; + if (rsc->running_on) { + current = pe__current_node(rsc); + if (rsc->role == RSC_ROLE_STOPPED) { + /* This can occur when resources are being recovered because + * the current role can change in pcmk__primitive_create_actions() + */ + rsc->role = RSC_ROLE_STARTED; + } + } + + if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + /* Don't log stopped orphans */ + return; + } + + out->message(out, "rsc-action", rsc, current, next); +} + +/*! + * \internal + * \brief Assign a specified primitive resource to a node + * + * Assign a specified primitive resource to a specified node, if the node can + * run the resource (or unconditionally, if \p force is true). Mark the resource + * as no longer provisional. If the primitive can't be assigned (or \p chosen is + * NULL), unassign any previous assignment for it, set its next role to stopped, + * and update any existing actions scheduled for it. This is not done + * recursively for children, so it should be called only for primitives. + * + * \param[in,out] rsc Resource to assign + * \param[in,out] chosen Node to assign \p rsc to + * \param[in] force If true, assign to \p chosen even if unavailable + * + * \return true if \p rsc could be assigned, otherwise false + * + * \note Assigning a resource to the NULL node using this function is different + * from calling pcmk__unassign_resource(), in that it will also update any + * actions created for the resource. + */ +bool +pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force) +{ + pcmk__output_t *out = rsc->cluster->priv; + + CRM_ASSERT(rsc->variant == pe_native); + + if (!force && (chosen != NULL)) { + if ((chosen->weight < 0) + // Allow the graph to assume that guest node connections will come up + || (!pcmk__node_available(chosen, true, false) + && !pe__is_guest_node(chosen))) { + + crm_debug("All nodes for resource %s are unavailable, unclean or " + "shutting down (%s can%s run resources, with weight %d)", + rsc->id, pe__node_name(chosen), + (pcmk__node_available(chosen, true, false)? "" : "not"), + chosen->weight); + pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); + chosen = NULL; + } + } + + pcmk__unassign_resource(rsc); + pe__clear_resource_flags(rsc, pe_rsc_provisional); + + if (chosen == NULL) { + crm_debug("Could not allocate a node for %s", rsc->id); + pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate"); + + for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { + pe_action_t *op = (pe_action_t *) iter->data; + + crm_debug("Updating %s for allocation failure", op->uuid); + + if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) { + pe__clear_action_flags(op, pe_action_optional); + + } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) { + pe__clear_action_flags(op, pe_action_runnable); + //pe__set_resource_flags(rsc, pe_rsc_block); + + } else { + // Cancel recurring actions, unless for stopped state + const char *interval_ms_s = NULL; + const char *target_rc_s = NULL; + char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); + + interval_ms_s = g_hash_table_lookup(op->meta, + XML_LRM_ATTR_INTERVAL_MS); + target_rc_s = g_hash_table_lookup(op->meta, + XML_ATTR_TE_TARGET_RC); + if ((interval_ms_s != NULL) + && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none) + && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) { + pe__clear_action_flags(op, pe_action_runnable); + } + free(rc_stopped); + } + } + return false; + } + + crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen)); + rsc->allocated_to = pe__copy_node(chosen); + + chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, + rsc); + chosen->details->num_resources++; + chosen->count++; + pcmk__consume_node_capacity(chosen->details->utilization, rsc); + + if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) { + out->message(out, "resource-util", rsc, chosen, __func__); + } + return true; +} + +/*! + * \internal + * \brief Assign a specified resource (of any variant) to a node + * + * Assign a specified resource and its children (if any) to a specified node, if + * the node can run the resource (or unconditionally, if \p force is true). Mark + * the resources as no longer provisional. If the resources can't be assigned + * (or \p chosen is NULL), unassign any previous assignments, set next role to + * stopped, and update any existing actions scheduled for them. + * + * \param[in,out] rsc Resource to assign + * \param[in,out] chosen Node to assign \p rsc to + * \param[in] force If true, assign to \p chosen even if unavailable + * + * \return true if \p rsc could be assigned, otherwise false + * + * \note Assigning a resource to the NULL node using this function is different + * from calling pcmk__unassign_resource(), in that it will also update any + * actions created for the resource. + */ +bool +pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) +{ + bool changed = false; + + if (rsc->children == NULL) { + if (rsc->allocated_to != NULL) { + changed = true; + } + pcmk__finalize_assignment(rsc, node, force); + + } else { + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) iter->data; + + changed |= pcmk__assign_resource(child_rsc, node, force); + } + } + return changed; +} + +/*! + * \internal + * \brief Remove any assignment of a specified resource to a node + * + * If a specified resource has been assigned to a node, remove that assignment + * and mark the resource as provisional again. This is not done recursively for + * children, so it should be called only for primitives. + * + * \param[in,out] rsc Resource to unassign + */ +void +pcmk__unassign_resource(pe_resource_t *rsc) +{ + pe_node_t *old = rsc->allocated_to; + + if (old == NULL) { + return; + } + + crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old)); + pe__set_resource_flags(rsc, pe_rsc_provisional); + rsc->allocated_to = NULL; + + /* We're going to free the pe_node_t, but its details member is shared and + * will remain, so update that appropriately first. + */ + old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, + rsc); + old->details->num_resources--; + pcmk__release_node_capacity(old->details->utilization, rsc); + free(old); +} + +/*! + * \internal + * \brief Check whether a resource has reached its migration threshold on a node + * + * \param[in,out] rsc Resource to check + * \param[in] node Node to check + * \param[out] failed If threshold has been reached, this will be set to + * resource that failed (possibly a parent of \p rsc) + * + * \return true if the migration threshold has been reached, false otherwise + */ +bool +pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node, + pe_resource_t **failed) +{ + int fail_count, remaining_tries; + pe_resource_t *rsc_to_ban = rsc; + + // Migration threshold of 0 means never force away + if (rsc->migration_threshold == 0) { + return false; + } + + // If we're ignoring failures, also ignore the migration threshold + if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) { + return false; + } + + // If there are no failures, there's no need to force away + fail_count = pe_get_failcount(node, rsc, NULL, + pe_fc_effective|pe_fc_fillers, NULL); + if (fail_count <= 0) { + return false; + } + + // If failed resource is anonymous clone instance, we'll force clone away + if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + rsc_to_ban = uber_parent(rsc); + } + + // How many more times recovery will be tried on this node + remaining_tries = rsc->migration_threshold - fail_count; + + if (remaining_tries <= 0) { + crm_warn("%s cannot run on %s due to reaching migration threshold " + "(clean up resource to allow again)" + CRM_XS " failures=%d migration-threshold=%d", + rsc_to_ban->id, pe__node_name(node), fail_count, + rsc->migration_threshold); + if (failed != NULL) { + *failed = rsc_to_ban; + } + return true; + } + + crm_info("%s can fail %d more time%s on " + "%s before reaching migration threshold (%d)", + rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries), + pe__node_name(node), rsc->migration_threshold); + return false; +} + +static void * +convert_const_pointer(const void *ptr) +{ + /* Worst function ever */ + return (void *)ptr; +} + +/*! + * \internal + * \brief Get a node's weight + * + * \param[in] node Unweighted node to check (for node ID) + * \param[in] nodes List of weighted nodes to look for \p node in + * + * \return Node's weight, or -INFINITY if not found + */ +static int +get_node_weight(const pe_node_t *node, GHashTable *nodes) +{ + pe_node_t *weighted_node = NULL; + + if ((node != NULL) && (nodes != NULL)) { + weighted_node = g_hash_table_lookup(nodes, node->details->id); + } + return (weighted_node == NULL)? -INFINITY : weighted_node->weight; +} + +/*! + * \internal + * \brief Compare two resources according to which should be allocated first + * + * \param[in] a First resource to compare + * \param[in] b Second resource to compare + * \param[in] data Sorted list of all nodes in cluster + * + * \return -1 if \p a should be allocated before \b, 0 if they are equal, + * or +1 if \p a should be allocated after \b + */ +static gint +cmp_resources(gconstpointer a, gconstpointer b, gpointer data) +{ + const pe_resource_t *resource1 = a; + const pe_resource_t *resource2 = b; + const GList *nodes = (const GList *) data; + + int rc = 0; + int r1_weight = -INFINITY; + int r2_weight = -INFINITY; + pe_node_t *r1_node = NULL; + pe_node_t *r2_node = NULL; + GHashTable *r1_nodes = NULL; + GHashTable *r2_nodes = NULL; + const char *reason = NULL; + + // Resources with highest priority should be allocated first + reason = "priority"; + r1_weight = resource1->priority; + r2_weight = resource2->priority; + if (r1_weight > r2_weight) { + rc = -1; + goto done; + } + if (r1_weight < r2_weight) { + rc = 1; + goto done; + } + + // We need nodes to make any other useful comparisons + reason = "no node list"; + if (nodes == NULL) { + goto done; + } + + // Calculate and log node weights + resource1->cmds->add_colocated_node_scores(convert_const_pointer(resource1), + resource1->id, &r1_nodes, NULL, + 1, pcmk__coloc_select_this_with); + resource2->cmds->add_colocated_node_scores(convert_const_pointer(resource2), + resource2->id, &r2_nodes, NULL, + 1, pcmk__coloc_select_this_with); + pe__show_node_weights(true, NULL, resource1->id, r1_nodes, + resource1->cluster); + pe__show_node_weights(true, NULL, resource2->id, r2_nodes, + resource2->cluster); + + // The resource with highest score on its current node goes first + reason = "current location"; + if (resource1->running_on != NULL) { + r1_node = pe__current_node(resource1); + } + if (resource2->running_on != NULL) { + r2_node = pe__current_node(resource2); + } + r1_weight = get_node_weight(r1_node, r1_nodes); + r2_weight = get_node_weight(r2_node, r2_nodes); + if (r1_weight > r2_weight) { + rc = -1; + goto done; + } + if (r1_weight < r2_weight) { + rc = 1; + goto done; + } + + // Otherwise a higher weight on any node will do + reason = "score"; + for (const GList *iter = nodes; iter != NULL; iter = iter->next) { + const pe_node_t *node = (const pe_node_t *) iter->data; + + r1_weight = get_node_weight(node, r1_nodes); + r2_weight = get_node_weight(node, r2_nodes); + if (r1_weight > r2_weight) { + rc = -1; + goto done; + } + if (r1_weight < r2_weight) { + rc = 1; + goto done; + } + } + +done: + crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s", + resource1->id, r1_weight, + ((r1_node == NULL)? "" : " on "), + ((r1_node == NULL)? "" : r1_node->details->id), + ((rc < 0)? '>' : ((rc > 0)? '<' : '=')), + resource2->id, r2_weight, + ((r2_node == NULL)? "" : " on "), + ((r2_node == NULL)? "" : r2_node->details->id), + reason); + if (r1_nodes != NULL) { + g_hash_table_destroy(r1_nodes); + } + if (r2_nodes != NULL) { + g_hash_table_destroy(r2_nodes); + } + return rc; +} + +/*! + * \internal + * \brief Sort resources in the order they should be allocated to nodes + * + * \param[in,out] data_set Cluster working set + */ +void +pcmk__sort_resources(pe_working_set_t *data_set) +{ + GList *nodes = g_list_copy(data_set->nodes); + + nodes = pcmk__sort_nodes(nodes, NULL); + data_set->resources = g_list_sort_with_data(data_set->resources, + cmp_resources, nodes); + g_list_free(nodes); +} diff --git a/lib/pacemaker/pcmk_sched_tickets.c b/lib/pacemaker/pcmk_sched_tickets.c new file mode 100644 index 0000000..30206d7 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_tickets.c @@ -0,0 +1,531 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <glib.h> + +#include <crm/crm.h> +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +enum loss_ticket_policy { + loss_ticket_stop, + loss_ticket_demote, + loss_ticket_fence, + loss_ticket_freeze +}; + +typedef struct { + const char *id; + pe_resource_t *rsc; + pe_ticket_t *ticket; + enum loss_ticket_policy loss_policy; + int role; +} rsc_ticket_t; + +/*! + * \brief Check whether a ticket constraint matches a resource by role + * + * \param[in] rsc_ticket Ticket constraint + * \param[in] rsc Resource to compare with ticket + * + * \param[in] true if constraint has no role or resource's role matches + * constraint's, otherwise false + */ +static bool +ticket_role_matches(const pe_resource_t *rsc, const rsc_ticket_t *rsc_ticket) +{ + if ((rsc_ticket->role == RSC_ROLE_UNKNOWN) + || (rsc_ticket->role == rsc->role)) { + return true; + } + pe_rsc_trace(rsc, "Skipping constraint: \"%s\" state filter", + role2text(rsc_ticket->role)); + return false; +} + +/*! + * \brief Create location constraints and fencing as needed for a ticket + * + * \param[in,out] rsc Resource affected by ticket + * \param[in] rsc_ticket Ticket + * \param[in,out] data_set Cluster working set + */ +static void +constraints_for_ticket(pe_resource_t *rsc, const rsc_ticket_t *rsc_ticket, + pe_working_set_t *data_set) +{ + GList *gIter = NULL; + + CRM_CHECK((rsc != NULL) && (rsc_ticket != NULL), return); + + if (rsc_ticket->ticket->granted && !rsc_ticket->ticket->standby) { + return; + } + + if (rsc->children) { + pe_rsc_trace(rsc, "Processing ticket dependencies from %s", rsc->id); + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + constraints_for_ticket((pe_resource_t *) gIter->data, rsc_ticket, + data_set); + } + return; + } + + pe_rsc_trace(rsc, "%s: Processing ticket dependency on %s (%s, %s)", + rsc->id, rsc_ticket->ticket->id, rsc_ticket->id, + role2text(rsc_ticket->role)); + + if (!rsc_ticket->ticket->granted && (rsc->running_on != NULL)) { + + switch (rsc_ticket->loss_policy) { + case loss_ticket_stop: + resource_location(rsc, NULL, -INFINITY, "__loss_of_ticket__", + data_set); + break; + + case loss_ticket_demote: + // Promotion score will be set to -INFINITY in promotion_order() + if (rsc_ticket->role != RSC_ROLE_PROMOTED) { + resource_location(rsc, NULL, -INFINITY, + "__loss_of_ticket__", data_set); + } + break; + + case loss_ticket_fence: + if (!ticket_role_matches(rsc, rsc_ticket)) { + return; + } + + resource_location(rsc, NULL, -INFINITY, "__loss_of_ticket__", + data_set); + + for (gIter = rsc->running_on; gIter != NULL; + gIter = gIter->next) { + pe_fence_node(data_set, (pe_node_t *) gIter->data, + "deadman ticket was lost", FALSE); + } + break; + + case loss_ticket_freeze: + if (!ticket_role_matches(rsc, rsc_ticket)) { + return; + } + if (rsc->running_on != NULL) { + pe__clear_resource_flags(rsc, pe_rsc_managed); + pe__set_resource_flags(rsc, pe_rsc_block); + } + break; + } + + } else if (!rsc_ticket->ticket->granted) { + + if ((rsc_ticket->role != RSC_ROLE_PROMOTED) + || (rsc_ticket->loss_policy == loss_ticket_stop)) { + resource_location(rsc, NULL, -INFINITY, "__no_ticket__", + data_set); + } + + } else if (rsc_ticket->ticket->standby) { + + if ((rsc_ticket->role != RSC_ROLE_PROMOTED) + || (rsc_ticket->loss_policy == loss_ticket_stop)) { + resource_location(rsc, NULL, -INFINITY, "__ticket_standby__", + data_set); + } + } +} + +static void +rsc_ticket_new(const char *id, pe_resource_t *rsc, pe_ticket_t *ticket, + const char *state, const char *loss_policy, + pe_working_set_t *data_set) +{ + rsc_ticket_t *new_rsc_ticket = NULL; + + if (rsc == NULL) { + pcmk__config_err("Ignoring ticket '%s' because resource " + "does not exist", id); + return; + } + + new_rsc_ticket = calloc(1, sizeof(rsc_ticket_t)); + if (new_rsc_ticket == NULL) { + return; + } + + if (pcmk__str_eq(state, RSC_ROLE_STARTED_S, + pcmk__str_null_matches|pcmk__str_casei)) { + state = RSC_ROLE_UNKNOWN_S; + } + + new_rsc_ticket->id = id; + new_rsc_ticket->ticket = ticket; + new_rsc_ticket->rsc = rsc; + new_rsc_ticket->role = text2role(state); + + if (pcmk__str_eq(loss_policy, "fence", pcmk__str_casei)) { + if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { + new_rsc_ticket->loss_policy = loss_ticket_fence; + } else { + pcmk__config_err("Resetting '" XML_TICKET_ATTR_LOSS_POLICY + "' for ticket '%s' to 'stop' " + "because fencing is not configured", ticket->id); + loss_policy = "stop"; + } + } + + if (new_rsc_ticket->loss_policy == loss_ticket_fence) { + crm_debug("On loss of ticket '%s': Fence the nodes running %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + + } else if (pcmk__str_eq(loss_policy, "freeze", pcmk__str_casei)) { + crm_debug("On loss of ticket '%s': Freeze %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + new_rsc_ticket->loss_policy = loss_ticket_freeze; + + } else if (pcmk__str_eq(loss_policy, "demote", pcmk__str_casei)) { + crm_debug("On loss of ticket '%s': Demote %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + new_rsc_ticket->loss_policy = loss_ticket_demote; + + } else if (pcmk__str_eq(loss_policy, "stop", pcmk__str_casei)) { + crm_debug("On loss of ticket '%s': Stop %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + new_rsc_ticket->loss_policy = loss_ticket_stop; + + } else { + if (new_rsc_ticket->role == RSC_ROLE_PROMOTED) { + crm_debug("On loss of ticket '%s': Default to demote %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + new_rsc_ticket->loss_policy = loss_ticket_demote; + + } else { + crm_debug("On loss of ticket '%s': Default to stop %s (%s)", + new_rsc_ticket->ticket->id, new_rsc_ticket->rsc->id, + role2text(new_rsc_ticket->role)); + new_rsc_ticket->loss_policy = loss_ticket_stop; + } + } + + pe_rsc_trace(rsc, "%s (%s) ==> %s", + rsc->id, role2text(new_rsc_ticket->role), ticket->id); + + rsc->rsc_tickets = g_list_append(rsc->rsc_tickets, new_rsc_ticket); + + data_set->ticket_constraints = g_list_append(data_set->ticket_constraints, + new_rsc_ticket); + + if (!(new_rsc_ticket->ticket->granted) || new_rsc_ticket->ticket->standby) { + constraints_for_ticket(rsc, new_rsc_ticket, data_set); + } +} + +// \return Standard Pacemaker return code +static int +unpack_rsc_ticket_set(xmlNode *set, pe_ticket_t *ticket, + const char *loss_policy, pe_working_set_t *data_set) +{ + const char *set_id = NULL; + const char *role = NULL; + + CRM_CHECK(set != NULL, return EINVAL); + CRM_CHECK(ticket != NULL, return EINVAL); + + set_id = ID(set); + if (set_id == NULL) { + pcmk__config_err("Ignoring <" XML_CONS_TAG_RSC_SET "> without " + XML_ATTR_ID); + return pcmk_rc_unpack_error; + } + + role = crm_element_value(set, "role"); + + for (xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + pe_resource_t *resource = NULL; + + resource = pcmk__find_constraint_resource(data_set->resources, + ID(xml_rsc)); + if (resource == NULL) { + pcmk__config_err("%s: No resource found for %s", + set_id, ID(xml_rsc)); + return pcmk_rc_unpack_error; + } + pe_rsc_trace(resource, "Resource '%s' depends on ticket '%s'", + resource->id, ticket->id); + rsc_ticket_new(set_id, resource, ticket, role, loss_policy, data_set); + } + + return pcmk_rc_ok; +} + +static void +unpack_simple_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + const char *id = NULL; + const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET); + const char *loss_policy = crm_element_value(xml_obj, + XML_TICKET_ATTR_LOSS_POLICY); + + pe_ticket_t *ticket = NULL; + + const char *rsc_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); + const char *state = crm_element_value(xml_obj, + XML_COLOC_ATTR_SOURCE_ROLE); + + // @COMPAT: Deprecated since 2.1.5 + const char *instance = crm_element_value(xml_obj, + XML_COLOC_ATTR_SOURCE_INSTANCE); + + pe_resource_t *rsc = NULL; + + if (instance != NULL) { + pe_warn_once(pe_wo_coloc_inst, + "Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is " + "deprecated and will be removed in a future release."); + } + + CRM_CHECK(xml_obj != NULL, return); + + id = ID(xml_obj); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return; + } + + if (ticket_str == NULL) { + pcmk__config_err("Ignoring constraint '%s' without ticket specified", + id); + return; + } else { + ticket = g_hash_table_lookup(data_set->tickets, ticket_str); + } + + if (ticket == NULL) { + pcmk__config_err("Ignoring constraint '%s' because ticket '%s' " + "does not exist", id, ticket_str); + return; + } + + if (rsc_id == NULL) { + pcmk__config_err("Ignoring constraint '%s' without resource", id); + return; + } else { + rsc = pcmk__find_constraint_resource(data_set->resources, rsc_id); + } + + if (rsc == NULL) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "does not exist", id, rsc_id); + return; + + } else if ((instance != NULL) && !pe_rsc_is_clone(rsc)) { + pcmk__config_err("Ignoring constraint '%s' because resource '%s' " + "is not a clone but instance '%s' was requested", + id, rsc_id, instance); + return; + } + + if (instance != NULL) { + rsc = find_clone_instance(rsc, instance); + if (rsc == NULL) { + pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " + "does not have an instance '%s'", + "'%s'", id, rsc_id, instance); + return; + } + } + + rsc_ticket_new(id, rsc, ticket, state, loss_policy, data_set); +} + +// \return Standard Pacemaker return code +static int +unpack_rsc_ticket_tags(xmlNode *xml_obj, xmlNode **expanded_xml, + pe_working_set_t *data_set) +{ + const char *id = NULL; + const char *rsc_id = NULL; + const char *state = NULL; + + pe_resource_t *rsc = NULL; + pe_tag_t *tag = NULL; + + xmlNode *rsc_set = NULL; + + *expanded_xml = NULL; + + CRM_CHECK(xml_obj != NULL, return EINVAL); + + id = ID(xml_obj); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return pcmk_rc_unpack_error; + } + + // Check whether there are any resource sets with template or tag references + *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); + if (*expanded_xml != NULL) { + crm_log_xml_trace(*expanded_xml, "Expanded rsc_ticket"); + return pcmk_rc_ok; + } + + rsc_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); + if (rsc_id == NULL) { + return pcmk_rc_ok; + } + + if (!pcmk__valid_resource_or_tag(data_set, rsc_id, &rsc, &tag)) { + pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " + "valid resource or tag", id, rsc_id); + return pcmk_rc_unpack_error; + + } else if (rsc != NULL) { + // No template or tag is referenced + return pcmk_rc_ok; + } + + state = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); + + *expanded_xml = copy_xml(xml_obj); + + // Convert template/tag reference in "rsc" into resource_set under rsc_ticket + if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, XML_COLOC_ATTR_SOURCE, + false, data_set)) { + free_xml(*expanded_xml); + *expanded_xml = NULL; + return pcmk_rc_unpack_error; + } + + if (rsc_set != NULL) { + if (state != NULL) { + // Move "rsc-role" into converted resource_set as a "role" attribute + crm_xml_add(rsc_set, "role", state); + xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE); + } + + } else { + free_xml(*expanded_xml); + *expanded_xml = NULL; + } + + return pcmk_rc_ok; +} + +void +pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set) +{ + xmlNode *set = NULL; + bool any_sets = false; + + const char *id = NULL; + const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET); + const char *loss_policy = crm_element_value(xml_obj, XML_TICKET_ATTR_LOSS_POLICY); + + pe_ticket_t *ticket = NULL; + + xmlNode *orig_xml = NULL; + xmlNode *expanded_xml = NULL; + + CRM_CHECK(xml_obj != NULL, return); + + id = ID(xml_obj); + if (id == NULL) { + pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, + crm_element_name(xml_obj)); + return; + } + + if (data_set->tickets == NULL) { + data_set->tickets = pcmk__strkey_table(free, destroy_ticket); + } + + if (ticket_str == NULL) { + pcmk__config_err("Ignoring constraint '%s' without ticket", id); + return; + } else { + ticket = g_hash_table_lookup(data_set->tickets, ticket_str); + } + + if (ticket == NULL) { + ticket = ticket_new(ticket_str, data_set); + if (ticket == NULL) { + return; + } + } + + if (unpack_rsc_ticket_tags(xml_obj, &expanded_xml, + data_set) != pcmk_rc_ok) { + return; + } + if (expanded_xml != NULL) { + orig_xml = xml_obj; + xml_obj = expanded_xml; + } + + for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; + set = crm_next_same_xml(set)) { + + any_sets = true; + set = expand_idref(set, data_set->input); + if ((set == NULL) // Configuration error, message already logged + || (unpack_rsc_ticket_set(set, ticket, loss_policy, + data_set) != pcmk_rc_ok)) { + if (expanded_xml != NULL) { + free_xml(expanded_xml); + } + return; + } + } + + if (expanded_xml) { + free_xml(expanded_xml); + xml_obj = orig_xml; + } + + if (!any_sets) { + unpack_simple_rsc_ticket(xml_obj, data_set); + } +} + +/*! + * \internal + * \brief Ban resource from a node if it doesn't have a promotion ticket + * + * If a resource has tickets for the promoted role, and the ticket is either not + * granted or set to standby, then ban the resource from all nodes. + * + * \param[in,out] rsc Resource to check + */ +void +pcmk__require_promotion_tickets(pe_resource_t *rsc) +{ + for (GList *item = rsc->rsc_tickets; item != NULL; item = item->next) { + rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) item->data; + + if ((rsc_ticket->role == RSC_ROLE_PROMOTED) + && (!rsc_ticket->ticket->granted || rsc_ticket->ticket->standby)) { + resource_location(rsc, NULL, -INFINITY, + "__stateful_without_ticket__", rsc->cluster); + } + } +} diff --git a/lib/pacemaker/pcmk_sched_utilization.c b/lib/pacemaker/pcmk_sched_utilization.c new file mode 100644 index 0000000..0a4bec3 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_utilization.c @@ -0,0 +1,469 @@ +/* + * Copyright 2014-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/msg_xml.h> +#include <pacemaker-internal.h> + +#include "libpacemaker_private.h" + +// Name for a pseudo-op to use in ordering constraints for utilization +#define LOAD_STOPPED "load_stopped" + +/*! + * \internal + * \brief Get integer utilization from a string + * + * \param[in] s String representation of a node utilization value + * + * \return Integer equivalent of \p s + * \todo It would make sense to restrict utilization values to nonnegative + * integers, but the documentation just says "integers" and we didn't + * restrict them initially, so for backward compatibility, allow any + * integer. + */ +static int +utilization_value(const char *s) +{ + int value = 0; + + if ((s != NULL) && (pcmk__scan_min_int(s, &value, INT_MIN) == EINVAL)) { + pe_warn("Using 0 for utilization instead of invalid value '%s'", value); + value = 0; + } + return value; +} + + +/* + * Functions for comparing node capacities + */ + +struct compare_data { + const pe_node_t *node1; + const pe_node_t *node2; + bool node2_only; + int result; +}; + +/*! + * \internal + * \brief Compare a single utilization attribute for two nodes + * + * Compare one utilization attribute for two nodes, incrementing the result if + * the first node has greater capacity, and decrementing it if the second node + * has greater capacity. + * + * \param[in] key Utilization attribute name to compare + * \param[in] value Utilization attribute value to compare + * \param[in,out] user_data Comparison data (as struct compare_data*) + */ +static void +compare_utilization_value(gpointer key, gpointer value, gpointer user_data) +{ + int node1_capacity = 0; + int node2_capacity = 0; + struct compare_data *data = user_data; + const char *node2_value = NULL; + + if (data->node2_only) { + if (g_hash_table_lookup(data->node1->details->utilization, key)) { + return; // We've already compared this attribute + } + } else { + node1_capacity = utilization_value((const char *) value); + } + + node2_value = g_hash_table_lookup(data->node2->details->utilization, key); + node2_capacity = utilization_value(node2_value); + + if (node1_capacity > node2_capacity) { + data->result--; + } else if (node1_capacity < node2_capacity) { + data->result++; + } +} + +/*! + * \internal + * \brief Compare utilization capacities of two nodes + * + * \param[in] node1 First node to compare + * \param[in] node2 Second node to compare + * + * \return Negative integer if node1 has more free capacity, + * 0 if the capacities are equal, or a positive integer + * if node2 has more free capacity + */ +int +pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2) +{ + struct compare_data data = { + .node1 = node1, + .node2 = node2, + .node2_only = false, + .result = 0, + }; + + // Compare utilization values that node1 and maybe node2 have + g_hash_table_foreach(node1->details->utilization, compare_utilization_value, + &data); + + // Compare utilization values that only node2 has + data.node2_only = true; + g_hash_table_foreach(node2->details->utilization, compare_utilization_value, + &data); + + return data.result; +} + + +/* + * Functions for updating node capacities + */ + +struct calculate_data { + GHashTable *current_utilization; + bool plus; +}; + +/*! + * \internal + * \brief Update a single utilization attribute with a new value + * + * \param[in] key Name of utilization attribute to update + * \param[in] value Value to add or substract + * \param[in,out] user_data Calculation data (as struct calculate_data *) + */ +static void +update_utilization_value(gpointer key, gpointer value, gpointer user_data) +{ + int result = 0; + const char *current = NULL; + struct calculate_data *data = user_data; + + current = g_hash_table_lookup(data->current_utilization, key); + if (data->plus) { + result = utilization_value(current) + utilization_value(value); + } else if (current) { + result = utilization_value(current) - utilization_value(value); + } + g_hash_table_replace(data->current_utilization, + strdup(key), pcmk__itoa(result)); +} + +/*! + * \internal + * \brief Subtract a resource's utilization from node capacity + * + * \param[in,out] current_utilization Current node utilization attributes + * \param[in] rsc Resource with utilization to subtract + */ +void +pcmk__consume_node_capacity(GHashTable *current_utilization, + const pe_resource_t *rsc) +{ + struct calculate_data data = { + .current_utilization = current_utilization, + .plus = false, + }; + + g_hash_table_foreach(rsc->utilization, update_utilization_value, &data); +} + +/*! + * \internal + * \brief Add a resource's utilization to node capacity + * + * \param[in,out] current_utilization Current node utilization attributes + * \param[in] rsc Resource with utilization to add + */ +void +pcmk__release_node_capacity(GHashTable *current_utilization, + const pe_resource_t *rsc) +{ + struct calculate_data data = { + .current_utilization = current_utilization, + .plus = true, + }; + + g_hash_table_foreach(rsc->utilization, update_utilization_value, &data); +} + + +/* + * Functions for checking for sufficient node capacity + */ + +struct capacity_data { + const pe_node_t *node; + const char *rsc_id; + bool is_enough; +}; + +/*! + * \internal + * \brief Check whether a single utilization attribute has sufficient capacity + * + * \param[in] key Name of utilization attribute to check + * \param[in] value Amount of utilization required + * \param[in,out] user_data Capacity data (as struct capacity_data *) + */ +static void +check_capacity(gpointer key, gpointer value, gpointer user_data) +{ + int required = 0; + int remaining = 0; + const char *node_value_s = NULL; + struct capacity_data *data = user_data; + + node_value_s = g_hash_table_lookup(data->node->details->utilization, key); + + required = utilization_value(value); + remaining = utilization_value(node_value_s); + + if (required > remaining) { + crm_debug("Remaining capacity for %s on %s (%d) is insufficient " + "for resource %s usage (%d)", + (const char *) key, pe__node_name(data->node), remaining, + data->rsc_id, required); + data->is_enough = false; + } +} + +/*! + * \internal + * \brief Check whether a node has sufficient capacity for a resource + * + * \param[in] node Node to check + * \param[in] rsc_id ID of resource to check (for debug logs only) + * \param[in] utilization Required utilization amounts + * + * \return true if node has sufficient capacity for resource, otherwise false + */ +static bool +have_enough_capacity(const pe_node_t *node, const char *rsc_id, + GHashTable *utilization) +{ + struct capacity_data data = { + .node = node, + .rsc_id = rsc_id, + .is_enough = true, + }; + + g_hash_table_foreach(utilization, check_capacity, &data); + return data.is_enough; +} + +/*! + * \internal + * \brief Sum the utilization requirements of a list of resources + * + * \param[in] orig_rsc Resource being allocated (for logging purposes) + * \param[in] rscs Resources whose utilization should be summed + * + * \return Newly allocated hash table with sum of all utilization values + * \note It is the caller's responsibility to free the return value using + * g_hash_table_destroy(). + */ +static GHashTable * +sum_resource_utilization(const pe_resource_t *orig_rsc, GList *rscs) +{ + GHashTable *utilization = pcmk__strkey_table(free, free); + + for (GList *iter = rscs; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + rsc->cmds->add_utilization(rsc, orig_rsc, rscs, utilization); + } + return utilization; +} + +/*! + * \internal + * \brief Ban resource from nodes with insufficient utilization capacity + * + * \param[in,out] rsc Resource to check + * + * \return Allowed node for \p rsc with most spare capacity, if there are no + * nodes with enough capacity for \p rsc and all its colocated resources + */ +const pe_node_t * +pcmk__ban_insufficient_capacity(pe_resource_t *rsc) +{ + bool any_capable = false; + char *rscs_id = NULL; + pe_node_t *node = NULL; + const pe_node_t *most_capable_node = NULL; + GList *colocated_rscs = NULL; + GHashTable *unallocated_utilization = NULL; + GHashTableIter iter; + + CRM_CHECK(rsc != NULL, return NULL); + + // The default placement strategy ignores utilization + if (pcmk__str_eq(rsc->cluster->placement_strategy, "default", + pcmk__str_casei)) { + return NULL; + } + + // Check whether any resources are colocated with this one + colocated_rscs = rsc->cmds->colocated_resources(rsc, NULL, NULL); + if (colocated_rscs == NULL) { + return NULL; + } + + rscs_id = crm_strdup_printf("%s and its colocated resources", rsc->id); + + // If rsc isn't in the list, add it so we include its utilization + if (g_list_find(colocated_rscs, rsc) == NULL) { + colocated_rscs = g_list_append(colocated_rscs, rsc); + } + + // Sum utilization of colocated resources that haven't been allocated yet + unallocated_utilization = sum_resource_utilization(rsc, colocated_rscs); + + // Check whether any node has enough capacity for all the resources + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (!pcmk__node_available(node, true, false)) { + continue; + } + + if (have_enough_capacity(node, rscs_id, unallocated_utilization)) { + any_capable = true; + } + + // Keep track of node with most free capacity + if ((most_capable_node == NULL) + || (pcmk__compare_node_capacities(node, most_capable_node) < 0)) { + most_capable_node = node; + } + } + + if (any_capable) { + // If so, ban resource from any node with insufficient capacity + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (pcmk__node_available(node, true, false) + && !have_enough_capacity(node, rscs_id, + unallocated_utilization)) { + pe_rsc_debug(rsc, "%s does not have enough capacity for %s", + pe__node_name(node), rscs_id); + resource_location(rsc, node, -INFINITY, "__limit_utilization__", + rsc->cluster); + } + } + most_capable_node = NULL; + + } else { + // Otherwise, ban from nodes with insufficient capacity for rsc alone + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (pcmk__node_available(node, true, false) + && !have_enough_capacity(node, rsc->id, rsc->utilization)) { + pe_rsc_debug(rsc, "%s does not have enough capacity for %s", + pe__node_name(node), rsc->id); + resource_location(rsc, node, -INFINITY, "__limit_utilization__", + rsc->cluster); + } + } + } + + g_hash_table_destroy(unallocated_utilization); + g_list_free(colocated_rscs); + free(rscs_id); + + pe__show_node_weights(true, rsc, "Post-utilization", + rsc->allowed_nodes, rsc->cluster); + return most_capable_node; +} + +/*! + * \internal + * \brief Create a new load_stopped pseudo-op for a node + * + * \param[in] node Node to create op for + * \param[in,out] data_set Cluster working set + * + * \return Newly created load_stopped op + */ +static pe_action_t * +new_load_stopped_op(const pe_node_t *node, pe_working_set_t *data_set) +{ + char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", + node->details->uname); + pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); + + if (load_stopped->node == NULL) { + load_stopped->node = pe__copy_node(node); + pe__clear_action_flags(load_stopped, pe_action_optional); + } + free(load_stopped_task); + return load_stopped; +} + +/*! + * \internal + * \brief Create utilization-related internal constraints for a resource + * + * \param[in,out] rsc Resource to create constraints for + * \param[in] allowed_nodes List of allowed next nodes for \p rsc + */ +void +pcmk__create_utilization_constraints(pe_resource_t *rsc, + const GList *allowed_nodes) +{ + const GList *iter = NULL; + const pe_node_t *node = NULL; + pe_action_t *load_stopped = NULL; + + pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", + rsc->id, rsc->cluster->placement_strategy); + + // "stop rsc then load_stopped" constraints for current nodes + for (iter = rsc->running_on; iter != NULL; iter = iter->next) { + node = (const pe_node_t *) iter->data; + load_stopped = new_load_stopped_op(node, rsc->cluster); + pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, NULL, load_stopped, + pe_order_load, rsc->cluster); + } + + // "load_stopped then start/migrate_to rsc" constraints for allowed nodes + for (iter = allowed_nodes; iter; iter = iter->next) { + node = (const pe_node_t *) iter->data; + load_stopped = new_load_stopped_op(node, rsc->cluster); + pcmk__new_ordering(NULL, NULL, load_stopped, rsc, start_key(rsc), NULL, + pe_order_load, rsc->cluster); + pcmk__new_ordering(NULL, NULL, load_stopped, + rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, + pe_order_load, rsc->cluster); + } +} + +/*! + * \internal + * \brief Output node capacities if enabled + * + * \param[in] desc Prefix for output + * \param[in,out] data_set Cluster working set + */ +void +pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set) +{ + if (!pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { + return; + } + for (const GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + const pe_node_t *node = (const pe_node_t *) iter->data; + pcmk__output_t *out = data_set->priv; + + out->message(out, "node-capacity", node, desc); + } +} diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c new file mode 100644 index 0000000..b4e670d --- /dev/null +++ b/lib/pacemaker/pcmk_scheduler.c @@ -0,0 +1,811 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <crm/crm.h> +#include <crm/cib.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> +#include <crm/common/xml_internal.h> + +#include <glib.h> + +#include <crm/pengine/status.h> +#include <pacemaker-internal.h> +#include "libpacemaker_private.h" + +CRM_TRACE_INIT_DATA(pacemaker); + +/*! + * \internal + * \brief Do deferred action checks after allocation + * + * When unpacking the resource history, the scheduler checks for resource + * configurations that have changed since an action was run. However, at that + * time, bundles using the REMOTE_CONTAINER_HACK don't have their final + * parameter information, so instead they add a deferred check to a list. This + * function processes one entry in that list. + * + * \param[in,out] rsc Resource that action history is for + * \param[in,out] node Node that action history is for + * \param[in] rsc_op Action history entry + * \param[in] check Type of deferred check to do + */ +static void +check_params(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_op, + enum pe_check_parameters check) +{ + const char *reason = NULL; + op_digest_cache_t *digest_data = NULL; + + switch (check) { + case pe_check_active: + if (pcmk__check_action_config(rsc, node, rsc_op) + && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL)) { + reason = "action definition changed"; + } + break; + + case pe_check_last_failure: + digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, + rsc->cluster); + switch (digest_data->rc) { + case RSC_DIGEST_UNKNOWN: + crm_trace("Resource %s history entry %s on %s has " + "no digest to compare", + rsc->id, ID(rsc_op), node->details->id); + break; + case RSC_DIGEST_MATCH: + break; + default: + reason = "resource parameters have changed"; + break; + } + break; + } + if (reason != NULL) { + pe__clear_failcount(rsc, node, reason, rsc->cluster); + } +} + +/*! + * \internal + * \brief Check whether a resource has failcount clearing scheduled on a node + * + * \param[in] node Node to check + * \param[in] rsc Resource to check + * + * \return true if \p rsc has failcount clearing scheduled on \p node, + * otherwise false + */ +static bool +failcount_clear_action_exists(const pe_node_t *node, const pe_resource_t *rsc) +{ + GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); + + if (list != NULL) { + g_list_free(list); + return true; + } + return false; +} + +/*! + * \internal + * \brief Ban a resource from a node if it reached its failure threshold there + * + * \param[in,out] rsc Resource to check failure threshold for + * \param[in] node Node to check \p rsc on + */ +static void +check_failure_threshold(pe_resource_t *rsc, const pe_node_t *node) +{ + // If this is a collective resource, apply recursively to children instead + if (rsc->children != NULL) { + g_list_foreach(rsc->children, (GFunc) check_failure_threshold, + (gpointer) node); + return; + + } else if (failcount_clear_action_exists(node, rsc)) { + /* Don't force the resource away from this node due to a failcount + * that's going to be cleared. + * + * @TODO Failcount clearing can be scheduled in + * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in + * schedule_resource_actions() via check_params(). This runs well before + * then, so it cannot detect those, meaning we might check the migration + * threshold when we shouldn't. Worst case, we stop or move the + * resource, then move it back in the next transition. + */ + return; + + } else { + pe_resource_t *failed = NULL; + + if (pcmk__threshold_reached(rsc, node, &failed)) { + resource_location(failed, node, -INFINITY, "__fail_limit__", + rsc->cluster); + } + } +} + +/*! + * \internal + * \brief If resource has exclusive discovery, ban node if not allowed + * + * Location constraints have a resource-discovery option that allows users to + * specify where probes are done for the affected resource. If this is set to + * exclusive, probes will only be done on nodes listed in exclusive constraints. + * This function bans the resource from the node if the node is not listed. + * + * \param[in,out] rsc Resource to check + * \param[in] node Node to check \p rsc on + */ +static void +apply_exclusive_discovery(pe_resource_t *rsc, const pe_node_t *node) +{ + if (rsc->exclusive_discover + || pe__const_top_resource(rsc, false)->exclusive_discover) { + pe_node_t *match = NULL; + + // If this is a collective resource, apply recursively to children + g_list_foreach(rsc->children, (GFunc) apply_exclusive_discovery, + (gpointer) node); + + match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); + if ((match != NULL) + && (match->rsc_discover_mode != pe_discover_exclusive)) { + match->weight = -INFINITY; + } + } +} + +/*! + * \internal + * \brief Apply stickiness to a resource if appropriate + * + * \param[in,out] rsc Resource to check for stickiness + * \param[in,out] data_set Cluster working set + */ +static void +apply_stickiness(pe_resource_t *rsc, pe_working_set_t *data_set) +{ + pe_node_t *node = NULL; + + // If this is a collective resource, apply recursively to children instead + if (rsc->children != NULL) { + g_list_foreach(rsc->children, (GFunc) apply_stickiness, data_set); + return; + } + + /* A resource is sticky if it is managed, has stickiness configured, and is + * active on a single node. + */ + if (!pcmk_is_set(rsc->flags, pe_rsc_managed) + || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) { + return; + } + + node = rsc->running_on->data; + + /* In a symmetric cluster, stickiness can always be used. In an + * asymmetric cluster, we have to check whether the resource is still + * allowed on the node, so we don't keep the resource somewhere it is no + * longer explicitly enabled. + */ + if (!pcmk_is_set(rsc->cluster->flags, pe_flag_symmetric_cluster) + && (pe_hash_table_lookup(rsc->allowed_nodes, + node->details->id) == NULL)) { + pe_rsc_debug(rsc, + "Ignoring %s stickiness because the cluster is " + "asymmetric and %s is not explicitly allowed", + rsc->id, pe__node_name(node)); + return; + } + + pe_rsc_debug(rsc, "Resource %s has %d stickiness on %s", + rsc->id, rsc->stickiness, pe__node_name(node)); + resource_location(rsc, node, rsc->stickiness, "stickiness", data_set); +} + +/*! + * \internal + * \brief Apply shutdown locks for all resources as appropriate + * + * \param[in,out] data_set Cluster working set + */ +static void +apply_shutdown_locks(pe_working_set_t *data_set) +{ + if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { + return; + } + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + rsc->cmds->shutdown_lock(rsc); + } +} + +/*! + * \internal + * \brief Calculate the number of available nodes in the cluster + * + * \param[in,out] data_set Cluster working set + */ +static void +count_available_nodes(pe_working_set_t *data_set) +{ + if (pcmk_is_set(data_set->flags, pe_flag_no_compat)) { + return; + } + + // @COMPAT for API backward compatibility only (cluster does not use value) + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + + if ((node != NULL) && (node->weight >= 0) && node->details->online + && (node->details->type != node_ping)) { + data_set->max_valid_nodes++; + } + } + crm_trace("Online node count: %d", data_set->max_valid_nodes); +} + +/* + * \internal + * \brief Apply node-specific scheduling criteria + * + * After the CIB has been unpacked, process node-specific scheduling criteria + * including shutdown locks, location constraints, resource stickiness, + * migration thresholds, and exclusive resource discovery. + */ +static void +apply_node_criteria(pe_working_set_t *data_set) +{ + crm_trace("Applying node-specific scheduling criteria"); + apply_shutdown_locks(data_set); + count_available_nodes(data_set); + pcmk__apply_locations(data_set); + g_list_foreach(data_set->resources, (GFunc) apply_stickiness, data_set); + + for (GList *node_iter = data_set->nodes; node_iter != NULL; + node_iter = node_iter->next) { + for (GList *rsc_iter = data_set->resources; rsc_iter != NULL; + rsc_iter = rsc_iter->next) { + pe_node_t *node = (pe_node_t *) node_iter->data; + pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data; + + check_failure_threshold(rsc, node); + apply_exclusive_discovery(rsc, node); + } + } +} + +/*! + * \internal + * \brief Allocate resources to nodes + * + * \param[in,out] data_set Cluster working set + */ +static void +allocate_resources(pe_working_set_t *data_set) +{ + GList *iter = NULL; + + crm_trace("Allocating resources to nodes"); + + if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { + pcmk__sort_resources(data_set); + } + pcmk__show_node_capacities("Original", data_set); + + if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { + /* Allocate remote connection resources first (which will also allocate + * any colocation dependencies). If the connection is migrating, always + * prefer the partial migration target. + */ + for (iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + if (rsc->is_remote_node) { + pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", + rsc->id); + rsc->cmds->assign(rsc, rsc->partial_migration_target); + } + } + } + + /* now do the rest of the resources */ + for (iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + if (!rsc->is_remote_node) { + pe_rsc_trace(rsc, "Allocating %s resource '%s'", + crm_element_name(rsc->xml), rsc->id); + rsc->cmds->assign(rsc, NULL); + } + } + + pcmk__show_node_capacities("Remaining", data_set); +} + +/*! + * \internal + * \brief Schedule fail count clearing on online nodes if resource is orphaned + * + * \param[in,out] rsc Resource to check + * \param[in,out] data_set Cluster working set + */ +static void +clear_failcounts_if_orphaned(pe_resource_t *rsc, pe_working_set_t *data_set) +{ + if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + return; + } + crm_trace("Clear fail counts for orphaned resource %s", rsc->id); + + /* There's no need to recurse into rsc->children because those + * should just be unallocated clone instances. + */ + + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + pe_action_t *clear_op = NULL; + + if (!node->details->online) { + continue; + } + if (pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL) == 0) { + continue; + } + + clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); + + /* We can't use order_action_then_stop() here because its + * pe_order_preserve breaks things + */ + pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), + NULL, pe_order_optional, data_set); + } +} + +/*! + * \internal + * \brief Schedule any resource actions needed + * + * \param[in,out] data_set Cluster working set + */ +static void +schedule_resource_actions(pe_working_set_t *data_set) +{ + // Process deferred action checks + pe__foreach_param_check(data_set, check_params); + pe__free_param_checks(data_set); + + if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { + crm_trace("Scheduling probes"); + pcmk__schedule_probes(data_set); + } + + if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { + g_list_foreach(data_set->resources, + (GFunc) clear_failcounts_if_orphaned, data_set); + } + + crm_trace("Scheduling resource actions"); + for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { + pe_resource_t *rsc = (pe_resource_t *) iter->data; + + rsc->cmds->create_actions(rsc); + } +} + +/*! + * \internal + * \brief Check whether a resource or any of its descendants are managed + * + * \param[in] rsc Resource to check + * + * \return true if resource or any descendant is managed, otherwise false + */ +static bool +is_managed(const pe_resource_t *rsc) +{ + if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + return true; + } + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + if (is_managed((pe_resource_t *) iter->data)) { + return true; + } + } + return false; +} + +/*! + * \internal + * \brief Check whether any resources in the cluster are managed + * + * \param[in] data_set Cluster working set + * + * \return true if any resource is managed, otherwise false + */ +static bool +any_managed_resources(const pe_working_set_t *data_set) +{ + for (const GList *iter = data_set->resources; + iter != NULL; iter = iter->next) { + if (is_managed((const pe_resource_t *) iter->data)) { + return true; + } + } + return false; +} + +/*! + * \internal + * \brief Check whether a node requires fencing + * + * \param[in] node Node to check + * \param[in] have_managed Whether any resource in cluster is managed + * \param[in] data_set Cluster working set + * + * \return true if \p node should be fenced, otherwise false + */ +static bool +needs_fencing(const pe_node_t *node, bool have_managed, + const pe_working_set_t *data_set) +{ + return have_managed && node->details->unclean + && pe_can_fence(data_set, node); +} + +/*! + * \internal + * \brief Check whether a node requires shutdown + * + * \param[in] node Node to check + * + * \return true if \p node should be shut down, otherwise false + */ +static bool +needs_shutdown(const pe_node_t *node) +{ + if (pe__is_guest_or_remote_node(node)) { + /* Do not send shutdown actions for Pacemaker Remote nodes. + * @TODO We might come up with a good use for this in the future. + */ + return false; + } + return node->details->online && node->details->shutdown; +} + +/*! + * \internal + * \brief Track and order non-DC fencing + * + * \param[in,out] list List of existing non-DC fencing actions + * \param[in,out] action Fencing action to prepend to \p list + * \param[in] data_set Cluster working set + * + * \return (Possibly new) head of \p list + */ +static GList * +add_nondc_fencing(GList *list, pe_action_t *action, + const pe_working_set_t *data_set) +{ + if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing) + && (list != NULL)) { + /* Concurrent fencing is disabled, so order each non-DC + * fencing in a chain. If there is any DC fencing or + * shutdown, it will be ordered after the last action in the + * chain later. + */ + order_actions((pe_action_t *) list->data, action, pe_order_optional); + } + return g_list_prepend(list, action); +} + +/*! + * \internal + * \brief Schedule a node for fencing + * + * \param[in,out] node Node that requires fencing + * \param[in,out] data_set Cluster working set + */ +static pe_action_t * +schedule_fencing(pe_node_t *node, pe_working_set_t *data_set) +{ + pe_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", + FALSE, data_set); + + pe_warn("Scheduling node %s for fencing", pe__node_name(node)); + pcmk__order_vs_fence(fencing, data_set); + return fencing; +} + +/*! + * \internal + * \brief Create and order node fencing and shutdown actions + * + * \param[in,out] data_set Cluster working set + */ +static void +schedule_fencing_and_shutdowns(pe_working_set_t *data_set) +{ + pe_action_t *dc_down = NULL; + bool integrity_lost = false; + bool have_managed = any_managed_resources(data_set); + GList *fencing_ops = NULL; + GList *shutdown_ops = NULL; + + crm_trace("Scheduling fencing and shutdowns as needed"); + if (!have_managed) { + crm_notice("No fencing will be done until there are resources to manage"); + } + + // Check each node for whether it needs fencing or shutdown + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + pe_action_t *fencing = NULL; + + /* Guest nodes are "fenced" by recovering their container resource, + * so handle them separately. + */ + if (pe__is_guest_node(node)) { + if (node->details->remote_requires_reset && have_managed + && pe_can_fence(data_set, node)) { + pcmk__fence_guest(node); + } + continue; + } + + if (needs_fencing(node, have_managed, data_set)) { + fencing = schedule_fencing(node, data_set); + + // Track DC and non-DC fence actions separately + if (node->details->is_dc) { + dc_down = fencing; + } else { + fencing_ops = add_nondc_fencing(fencing_ops, fencing, data_set); + } + + } else if (needs_shutdown(node)) { + pe_action_t *down_op = pcmk__new_shutdown_action(node); + + // Track DC and non-DC shutdown actions separately + if (node->details->is_dc) { + dc_down = down_op; + } else { + shutdown_ops = g_list_prepend(shutdown_ops, down_op); + } + } + + if ((fencing == NULL) && node->details->unclean) { + integrity_lost = true; + pe_warn("Node %s is unclean but cannot be fenced", + pe__node_name(node)); + } + } + + if (integrity_lost) { + if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { + pe_warn("Resource functionality and data integrity cannot be " + "guaranteed (configure, enable, and test fencing to " + "correct this)"); + + } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { + crm_notice("Unclean nodes will not be fenced until quorum is " + "attained or no-quorum-policy is set to ignore"); + } + } + + if (dc_down != NULL) { + /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated + * DC elections. However, we don't want to order non-DC shutdowns before + * a DC *fencing*, because even though we don't want a node that's + * shutting down to become DC, the DC fencing could be ordered before a + * clone stop that's also ordered before the shutdowns, thus leading to + * a graph loop. + */ + if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_none)) { + pcmk__order_after_each(dc_down, shutdown_ops); + } + + // Order any non-DC fencing before any DC fencing or shutdown + + if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) { + /* With concurrent fencing, order each non-DC fencing action + * separately before any DC fencing or shutdown. + */ + pcmk__order_after_each(dc_down, fencing_ops); + } else if (fencing_ops != NULL) { + /* Without concurrent fencing, the non-DC fencing actions are + * already ordered relative to each other, so we just need to order + * the DC fencing after the last action in the chain (which is the + * first item in the list). + */ + order_actions((pe_action_t *) fencing_ops->data, dc_down, + pe_order_optional); + } + } + g_list_free(fencing_ops); + g_list_free(shutdown_ops); +} + +static void +log_resource_details(pe_working_set_t *data_set) +{ + pcmk__output_t *out = data_set->priv; + GList *all = NULL; + + /* We need a list of nodes that we are allowed to output information for. + * This is necessary because out->message for all the resource-related + * messages expects such a list, due to the `crm_mon --node=` feature. Here, + * we just make it a list of all the nodes. + */ + all = g_list_prepend(all, (gpointer) "*"); + + for (GList *item = data_set->resources; item != NULL; item = item->next) { + pe_resource_t *rsc = (pe_resource_t *) item->data; + + // Log all resources except inactive orphans + if (!pcmk_is_set(rsc->flags, pe_rsc_orphan) + || (rsc->role != RSC_ROLE_STOPPED)) { + out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all); + } + } + + g_list_free(all); +} + +static void +log_all_actions(pe_working_set_t *data_set) +{ + /* This only ever outputs to the log, so ignore whatever output object was + * previously set and just log instead. + */ + pcmk__output_t *prev_out = data_set->priv; + pcmk__output_t *out = NULL; + + if (pcmk__log_output_new(&out) != pcmk_rc_ok) { + return; + } + + pe__register_messages(out); + pcmk__register_lib_messages(out); + pcmk__output_set_log_level(out, LOG_NOTICE); + data_set->priv = out; + + out->begin_list(out, NULL, NULL, "Actions"); + pcmk__output_actions(data_set); + out->end_list(out); + out->finish(out, CRM_EX_OK, true, NULL); + pcmk__output_free(out); + + data_set->priv = prev_out; +} + +/*! + * \internal + * \brief Log all required but unrunnable actions at trace level + * + * \param[in] data_set Cluster working set + */ +static void +log_unrunnable_actions(const pe_working_set_t *data_set) +{ + const uint64_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo; + + crm_trace("Required but unrunnable actions:"); + for (const GList *iter = data_set->actions; + iter != NULL; iter = iter->next) { + + const pe_action_t *action = (const pe_action_t *) iter->data; + + if (!pcmk_any_flags_set(action->flags, flags)) { + pcmk__log_action("\t", action, true); + } + } +} + +/*! + * \internal + * \brief Unpack the CIB for scheduling + * + * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked) + * \param[in] flags Working set flags to set in addition to defaults + * \param[in,out] data_set Cluster working set + */ +static void +unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) +{ + const char* localhost_save = NULL; + + if (pcmk_is_set(data_set->flags, pe_flag_have_status)) { + crm_trace("Reusing previously calculated cluster status"); + pe__set_working_set_flags(data_set, flags); + return; + } + + if (data_set->localhost) { + localhost_save = data_set->localhost; + } + + CRM_ASSERT(cib != NULL); + crm_trace("Calculating cluster status"); + + /* This will zero the entire struct without freeing anything first, so + * callers should never call pcmk__schedule_actions() with a populated data + * set unless pe_flag_have_status is set (i.e. cluster_status() was + * previously called, whether directly or via pcmk__schedule_actions()). + */ + set_working_set_defaults(data_set); + + if (localhost_save) { + data_set->localhost = localhost_save; + } + + pe__set_working_set_flags(data_set, flags); + data_set->input = cib; + cluster_status(data_set); // Sets pe_flag_have_status +} + +/*! + * \internal + * \brief Run the scheduler for a given CIB + * + * \param[in,out] cib CIB XML to use as scheduler input + * \param[in] flags Working set flags to set in addition to defaults + * \param[in,out] data_set Cluster working set + */ +void +pcmk__schedule_actions(xmlNode *cib, unsigned long long flags, + pe_working_set_t *data_set) +{ + unpack_cib(cib, flags, data_set); + pcmk__set_allocation_methods(data_set); + pcmk__apply_node_health(data_set); + pcmk__unpack_constraints(data_set); + if (pcmk_is_set(data_set->flags, pe_flag_check_config)) { + return; + } + + if (!pcmk_is_set(data_set->flags, pe_flag_quick_location) && + pcmk__is_daemon) { + log_resource_details(data_set); + } + + apply_node_criteria(data_set); + + if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { + return; + } + + pcmk__create_internal_constraints(data_set); + pcmk__handle_rsc_config_changes(data_set); + allocate_resources(data_set); + schedule_resource_actions(data_set); + + /* Remote ordering constraints need to happen prior to calculating fencing + * because it is one more place we can mark nodes as needing fencing. + */ + pcmk__order_remote_connection_actions(data_set); + + schedule_fencing_and_shutdowns(data_set); + pcmk__apply_orderings(data_set); + log_all_actions(data_set); + pcmk__create_graph(data_set); + + if (get_crm_log_level() == LOG_TRACE) { + log_unrunnable_actions(data_set); + } +} diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c new file mode 100644 index 0000000..165c7d3 --- /dev/null +++ b/lib/pacemaker/pcmk_simulate.c @@ -0,0 +1,999 @@ +/* + * Copyright 2021-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> +#include <crm/cib/internal.h> +#include <crm/common/output.h> +#include <crm/common/results.h> +#include <crm/pengine/pe_types.h> +#include <pacemaker-internal.h> +#include <pacemaker.h> + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "libpacemaker_private.h" + +static pcmk__output_t *out = NULL; +static cib_t *fake_cib = NULL; +static GList *fake_resource_list = NULL; +static const GList *fake_op_fail_list = NULL; + +static void set_effective_date(pe_working_set_t *data_set, bool print_original, + const char *use_date); + +/*! + * \internal + * \brief Create an action name for use in a dot graph + * + * \param[in] action Action to create name for + * \param[in] verbose If true, add action ID to name + * + * \return Newly allocated string with action name + * \note It is the caller's responsibility to free the result. + */ +static char * +create_action_name(const pe_action_t *action, bool verbose) +{ + char *action_name = NULL; + const char *prefix = ""; + const char *action_host = NULL; + const char *clone_name = NULL; + const char *task = action->task; + + if (action->node != NULL) { + action_host = action->node->details->uname; + } else if (!pcmk_is_set(action->flags, pe_action_pseudo)) { + action_host = "<none>"; + } + + if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_none)) { + prefix = "Cancel "; + task = action->cancel_task; + } + + if (action->rsc != NULL) { + clone_name = action->rsc->clone_name; + } + + if (clone_name != NULL) { + char *key = NULL; + guint interval_ms = 0; + + if (pcmk__guint_from_hash(action->meta, + XML_LRM_ATTR_INTERVAL_MS, 0, + &interval_ms) != pcmk_rc_ok) { + interval_ms = 0; + } + + if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED, + NULL)) { + const char *n_type = g_hash_table_lookup(action->meta, + "notify_key_type"); + const char *n_task = g_hash_table_lookup(action->meta, + "notify_key_operation"); + + CRM_ASSERT(n_type != NULL); + CRM_ASSERT(n_task != NULL); + key = pcmk__notify_key(clone_name, n_type, n_task); + } else { + key = pcmk__op_key(clone_name, task, interval_ms); + } + + if (action_host != NULL) { + action_name = crm_strdup_printf("%s%s %s", + prefix, key, action_host); + } else { + action_name = crm_strdup_printf("%s%s", prefix, key); + } + free(key); + + } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { + const char *op = g_hash_table_lookup(action->meta, "stonith_action"); + + action_name = crm_strdup_printf("%s%s '%s' %s", + prefix, action->task, op, action_host); + + } else if (action->rsc && action_host) { + action_name = crm_strdup_printf("%s%s %s", + prefix, action->uuid, action_host); + + } else if (action_host) { + action_name = crm_strdup_printf("%s%s %s", + prefix, action->task, action_host); + + } else { + action_name = crm_strdup_printf("%s", action->uuid); + } + + if (verbose) { + char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); + + free(action_name); + action_name = with_id; + } + return action_name; +} + +/*! + * \internal + * \brief Display the status of a cluster + * + * \param[in,out] data_set Cluster working set + * \param[in] show_opts How to modify display (as pcmk_show_opt_e flags) + * \param[in] section_opts Sections to display (as pcmk_section_e flags) + * \param[in] title What to use as list title + * \param[in] print_spacer Whether to display a spacer first + */ +static void +print_cluster_status(pe_working_set_t *data_set, uint32_t show_opts, + uint32_t section_opts, const char *title, bool print_spacer) +{ + pcmk__output_t *out = data_set->priv; + GList *all = NULL; + crm_exit_t stonith_rc = 0; + enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; + + section_opts |= pcmk_section_nodes | pcmk_section_resources; + show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail; + + all = g_list_prepend(all, (gpointer) "*"); + + PCMK__OUTPUT_SPACER_IF(out, print_spacer); + out->begin_list(out, NULL, NULL, "%s", title); + out->message(out, "cluster-status", + data_set, state, stonith_rc, NULL, + false, section_opts, show_opts, NULL, all, all); + out->end_list(out); + + g_list_free(all); +} + +/*! + * \internal + * \brief Display a summary of all actions scheduled in a transition + * + * \param[in,out] data_set Cluster working set (fully scheduled) + * \param[in] print_spacer Whether to display a spacer first + */ +static void +print_transition_summary(pe_working_set_t *data_set, bool print_spacer) +{ + pcmk__output_t *out = data_set->priv; + + PCMK__OUTPUT_SPACER_IF(out, print_spacer); + out->begin_list(out, NULL, NULL, "Transition Summary"); + pcmk__output_actions(data_set); + out->end_list(out); +} + +/*! + * \internal + * \brief Reset a cluster working set's input, output, date, and flags + * + * \param[in,out] data_set Cluster working set + * \param[in] input What to set as cluster input + * \param[in] out What to set as cluster output object + * \param[in] use_date What to set as cluster's current timestamp + * \param[in] flags Cluster flags to add (pe_flag_*) + */ +static void +reset(pe_working_set_t *data_set, xmlNodePtr input, pcmk__output_t *out, + const char *use_date, unsigned int flags) +{ + data_set->input = input; + data_set->priv = out; + set_effective_date(data_set, true, use_date); + if (pcmk_is_set(flags, pcmk_sim_sanitized)) { + pe__set_working_set_flags(data_set, pe_flag_sanitized); + } + if (pcmk_is_set(flags, pcmk_sim_show_scores)) { + pe__set_working_set_flags(data_set, pe_flag_show_scores); + } + if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { + pe__set_working_set_flags(data_set, pe_flag_show_utilization); + } +} + +/*! + * \brief Write out a file in dot(1) format describing the actions that will + * be taken by the scheduler in response to an input CIB file. + * + * \param[in,out] data_set Working set for the cluster + * \param[in] dot_file The filename to write + * \param[in] all_actions Write all actions, even those that are optional + * or are on unmanaged resources + * \param[in] verbose Add extra information, such as action IDs, to the + * output + * + * \return Standard Pacemaker return code + */ +static int +write_sim_dotfile(pe_working_set_t *data_set, const char *dot_file, + bool all_actions, bool verbose) +{ + GList *gIter = NULL; + FILE *dot_strm = fopen(dot_file, "w"); + + if (dot_strm == NULL) { + return errno; + } + + fprintf(dot_strm, " digraph \"g\" {\n"); + for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + const char *style = "dashed"; + const char *font = "black"; + const char *color = "black"; + char *action_name = create_action_name(action, verbose); + + if (pcmk_is_set(action->flags, pe_action_pseudo)) { + font = "orange"; + } + + if (pcmk_is_set(action->flags, pe_action_dumped)) { + style = "bold"; + color = "green"; + + } else if ((action->rsc != NULL) + && !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) { + color = "red"; + font = "purple"; + if (!all_actions) { + goto do_not_write; + } + + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + color = "blue"; + if (!all_actions) { + goto do_not_write; + } + + } else { + color = "red"; + CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pe_action_runnable)); + } + + pe__set_action_flags(action, pe_action_dumped); + fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", + action_name, style, color, font); + do_not_write: + free(action_name); + } + + for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + GList *gIter2 = NULL; + + for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { + pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data; + + char *before_name = NULL; + char *after_name = NULL; + const char *style = "dashed"; + bool optional = true; + + if (before->state == pe_link_dumped) { + optional = false; + style = "bold"; + } else if (before->type == pe_order_none) { + continue; + } else if (pcmk_is_set(before->action->flags, pe_action_dumped) + && pcmk_is_set(action->flags, pe_action_dumped) + && before->type != pe_order_load) { + optional = false; + } + + if (all_actions || !optional) { + before_name = create_action_name(before->action, verbose); + after_name = create_action_name(action, verbose); + fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", + before_name, after_name, style); + free(before_name); + free(after_name); + } + } + } + + fprintf(dot_strm, "}\n"); + fflush(dot_strm); + fclose(dot_strm); + return pcmk_rc_ok; +} + +/*! + * \brief Profile the configuration updates and scheduler actions in a single + * CIB file, printing the profiling timings. + * + * \note \p data_set->priv must have been set to a valid \p pcmk__output_t + * object before this function is called. + * + * \param[in] xml_file The CIB file to profile + * \param[in] repeat Number of times to run + * \param[in,out] data_set Working set for the cluster + * \param[in] use_date The date to set the cluster's time to (may be NULL) + */ +static void +profile_file(const char *xml_file, long long repeat, pe_working_set_t *data_set, + const char *use_date) +{ + pcmk__output_t *out = data_set->priv; + xmlNode *cib_object = NULL; + clock_t start = 0; + clock_t end; + unsigned long long data_set_flags = pe_flag_no_compat; + + CRM_ASSERT(out != NULL); + + cib_object = filename2xml(xml_file); + start = clock(); + + if (pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS) == NULL) { + create_xml_node(cib_object, XML_CIB_TAG_STATUS); + } + + if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { + free_xml(cib_object); + return; + } + + if (validate_xml(cib_object, NULL, FALSE) != TRUE) { + free_xml(cib_object); + return; + } + + if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) { + data_set_flags |= pe_flag_show_scores; + } + if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { + data_set_flags |= pe_flag_show_utilization; + } + + for (int i = 0; i < repeat; ++i) { + xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object); + + data_set->input = input; + set_effective_date(data_set, false, use_date); + pcmk__schedule_actions(input, data_set_flags, data_set); + pe_reset_working_set(data_set); + } + + end = clock(); + out->message(out, "profile", xml_file, start, end); +} + +void +pcmk__profile_dir(const char *dir, long long repeat, pe_working_set_t *data_set, + const char *use_date) +{ + pcmk__output_t *out = data_set->priv; + struct dirent **namelist; + + int file_num = scandir(dir, &namelist, 0, alphasort); + + CRM_ASSERT(out != NULL); + + if (file_num > 0) { + struct stat prop; + char buffer[FILENAME_MAX]; + + out->begin_list(out, NULL, NULL, "Timings"); + + while (file_num--) { + if ('.' == namelist[file_num]->d_name[0]) { + free(namelist[file_num]); + continue; + + } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, + ".xml")) { + free(namelist[file_num]); + continue; + } + snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); + if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { + profile_file(buffer, repeat, data_set, use_date); + } + free(namelist[file_num]); + } + free(namelist); + + out->end_list(out); + } +} + +/*! + * \brief Set the date of the cluster, either to the value given by + * \p use_date, or to the "execution-date" value in the CIB. + * + * \note \p data_set->priv must have been set to a valid \p pcmk__output_t + * object before this function is called. + * + * \param[in,out] data_set Working set for the cluster + * \param[in] print_original If \p true, the "execution-date" should + * also be printed + * \param[in] use_date The date to set the cluster's time to + * (may be NULL) + */ +static void +set_effective_date(pe_working_set_t *data_set, bool print_original, + const char *use_date) +{ + pcmk__output_t *out = data_set->priv; + time_t original_date = 0; + + CRM_ASSERT(out != NULL); + + crm_element_value_epoch(data_set->input, "execution-date", &original_date); + + if (use_date) { + data_set->now = crm_time_new(use_date); + out->info(out, "Setting effective cluster time: %s", use_date); + crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now, + crm_time_log_date | crm_time_log_timeofday); + + } else if (original_date != 0) { + data_set->now = pcmk__copy_timet(original_date); + + if (print_original) { + char *when = crm_time_as_string(data_set->now, + crm_time_log_date|crm_time_log_timeofday); + + out->info(out, "Using the original execution date of: %s", when); + free(when); + } + } +} + +/*! + * \internal + * \brief Simulate successfully executing a pseudo-action in a graph + * + * \param[in,out] graph Graph to update with pseudo-action result + * \param[in,out] action Pseudo-action to simulate executing + * + * \return Standard Pacemaker return code + */ +static int +simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + out->message(out, "inject-pseudo-action", node, task); + + pcmk__update_graph(graph, action); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Simulate executing a resource action in a graph + * + * \param[in,out] graph Graph to update with resource action result + * \param[in,out] action Resource action to simulate executing + * + * \return Standard Pacemaker return code + */ +static int +simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + int rc; + lrmd_event_data_t *op = NULL; + int target_outcome = PCMK_OCF_OK; + + const char *rtype = NULL; + const char *rclass = NULL; + const char *resource = NULL; + const char *rprovider = NULL; + const char *resource_config_name = NULL; + const char *operation = crm_element_value(action->xml, "operation"); + const char *target_rc_s = crm_meta_value(action->params, + XML_ATTR_TE_TARGET_RC); + + xmlNode *cib_node = NULL; + xmlNode *cib_resource = NULL; + xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + + char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); + char *uuid = NULL; + const char *router_node = crm_element_value(action->xml, + XML_LRM_ATTR_ROUTER_NODE); + + // Certain actions don't need to be displayed or history entries + if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { + crm_debug("No history injection for %s op on %s", operation, node); + goto done; // Confirm action and update graph + } + + if (action_rsc == NULL) { // Shouldn't be possible + crm_log_xml_err(action->xml, "Bad"); + free(node); + return EPROTO; + } + + /* A resource might be known by different names in the configuration and in + * the action (for example, a clone instance). Grab the configuration name + * (which is preferred when writing history), and if necessary, the instance + * name. + */ + resource_config_name = crm_element_value(action_rsc, XML_ATTR_ID); + if (resource_config_name == NULL) { // Shouldn't be possible + crm_log_xml_err(action->xml, "No ID"); + free(node); + return EPROTO; + } + resource = resource_config_name; + if (pe_find_resource(fake_resource_list, resource) == NULL) { + const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); + + if ((longname != NULL) + && (pe_find_resource(fake_resource_list, longname) != NULL)) { + resource = longname; + } + } + + // Certain actions need to be displayed but don't need history entries + if (pcmk__strcase_any_of(operation, "delete", RSC_METADATA, NULL)) { + out->message(out, "inject-rsc-action", resource, operation, node, + (guint) 0); + goto done; // Confirm action and update graph + } + + rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); + rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); + rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); + + pcmk__scan_min_int(target_rc_s, &target_outcome, 0); + + CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, + cib_sync_call|cib_scope_local) == pcmk_ok); + + // Ensure the action node is in the CIB + uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); + cib_node = pcmk__inject_node(fake_cib, node, + ((router_node == NULL)? uuid: node)); + free(uuid); + CRM_ASSERT(cib_node != NULL); + + // Add a history entry for the action + cib_resource = pcmk__inject_resource_history(out, cib_node, resource, + resource_config_name, + rclass, rtype, rprovider); + if (cib_resource == NULL) { + crm_err("Could not simulate action %d history for resource %s", + action->id, resource); + free(node); + free_xml(cib_node); + return EINVAL; + } + + // Simulate and display an executor event for the action result + op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE, + target_outcome, "User-injected result"); + out->message(out, "inject-rsc-action", resource, op->op_type, node, + op->interval_ms); + + // Check whether action is in a list of desired simulated failures + for (const GList *iter = fake_op_fail_list; + iter != NULL; iter = iter->next) { + const char *spec = (const char *) iter->data; + char *key = NULL; + const char *match_name = NULL; + + // Allow user to specify anonymous clone with or without instance number + key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, + op->interval_ms, node); + if (strncasecmp(key, spec, strlen(key)) == 0) { + match_name = resource; + } + free(key); + + // If not found, try the resource's name in the configuration + if ((match_name == NULL) + && (strcmp(resource, resource_config_name) != 0)) { + + key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name, + op->op_type, op->interval_ms, node); + if (strncasecmp(key, spec, strlen(key)) == 0) { + match_name = resource_config_name; + } + free(key); + } + + if (match_name == NULL) { + continue; // This failed action entry doesn't match + } + + // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} + rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); + if (rc != 1) { + out->err(out, "Invalid failed operation '%s' " + "(result code must be integer)", spec); + continue; // Keep checking other list entries + } + + out->info(out, "Pretending action %d failed with rc=%d", + action->id, op->rc); + pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); + graph->abort_priority = INFINITY; + pcmk__inject_failcount(out, cib_node, match_name, op->op_type, + op->interval_ms, op->rc); + break; + } + + pcmk__inject_action_result(cib_resource, op, target_outcome); + lrmd_free_event(op); + rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + + done: + free(node); + free_xml(cib_node); + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + pcmk__update_graph(graph, action); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Simulate successfully executing a cluster action + * + * \param[in,out] graph Graph to update with action result + * \param[in,out] action Cluster action to simulate + * + * \return Standard Pacemaker return code + */ +static int +simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + out->message(out, "inject-cluster-action", node, task, rsc); + pcmk__update_graph(graph, action); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Simulate successfully executing a fencing action + * + * \param[in,out] graph Graph to update with action result + * \param[in,out] action Fencing action to simulate + * + * \return Standard Pacemaker return code + */ +static int +simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) +{ + const char *op = crm_meta_value(action->params, "stonith_action"); + char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); + + out->message(out, "inject-fencing-action", target, op); + + if (!pcmk__str_eq(op, "on", pcmk__str_casei)) { + int rc = pcmk_ok; + GString *xpath = g_string_sized_new(512); + + // Set node state to offline + xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target, + false); + + CRM_ASSERT(cib_node != NULL); + crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__); + rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node, + cib_sync_call|cib_scope_local); + CRM_ASSERT(rc == pcmk_ok); + + // Simulate controller clearing node's resource history and attributes + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_STATE + "[@" XML_ATTR_UNAME "='", target, "']/" XML_CIB_TAG_LRM, + NULL); + fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, + cib_xpath|cib_sync_call|cib_scope_local); + + g_string_truncate(xpath, 0); + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_STATE + "[@" XML_ATTR_UNAME "='", target, "']" + "/" XML_TAG_TRANSIENT_NODEATTRS, NULL); + fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, + cib_xpath|cib_sync_call|cib_scope_local); + + free_xml(cib_node); + g_string_free(xpath, TRUE); + } + + pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); + pcmk__update_graph(graph, action); + free(target); + return pcmk_rc_ok; +} + +enum pcmk__graph_status +pcmk__simulate_transition(pe_working_set_t *data_set, cib_t *cib, + const GList *op_fail_list) +{ + pcmk__graph_t *transition = NULL; + enum pcmk__graph_status graph_rc; + + pcmk__graph_functions_t simulation_fns = { + simulate_pseudo_action, + simulate_resource_action, + simulate_cluster_action, + simulate_fencing_action, + }; + + out = data_set->priv; + + fake_cib = cib; + fake_op_fail_list = op_fail_list; + + if (!out->is_quiet(out)) { + out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); + } + + pcmk__set_graph_functions(&simulation_fns); + transition = pcmk__unpack_graph(data_set->graph, crm_system_name); + pcmk__log_graph(LOG_DEBUG, transition); + + fake_resource_list = data_set->resources; + do { + graph_rc = pcmk__execute_graph(transition); + } while (graph_rc == pcmk__graph_active); + fake_resource_list = NULL; + + if (graph_rc != pcmk__graph_complete) { + out->err(out, "Transition failed: %s", + pcmk__graph_status2text(graph_rc)); + pcmk__log_graph(LOG_ERR, transition); + out->err(out, "An invalid transition was produced"); + } + pcmk__free_graph(transition); + + if (!out->is_quiet(out)) { + // If not quiet, we'll need the resulting CIB for later display + xmlNode *cib_object = NULL; + int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, + cib_sync_call|cib_scope_local); + + CRM_ASSERT(rc == pcmk_ok); + pe_reset_working_set(data_set); + data_set->input = cib_object; + out->end_list(out); + } + return graph_rc; +} + +int +pcmk__simulate(pe_working_set_t *data_set, pcmk__output_t *out, + const pcmk_injections_t *injections, unsigned int flags, + uint32_t section_opts, const char *use_date, + const char *input_file, const char *graph_file, + const char *dot_file) +{ + int printed = pcmk_rc_no_output; + int rc = pcmk_rc_ok; + xmlNodePtr input = NULL; + cib_t *cib = NULL; + + rc = cib__signon_query(out, &cib, &input); + if (rc != pcmk_rc_ok) { + goto simulate_done; + } + + reset(data_set, input, out, use_date, flags); + cluster_status(data_set); + + if ((cib->variant == cib_native) + && pcmk_is_set(section_opts, pcmk_section_times)) { + if (pcmk__our_nodename == NULL) { + // Currently used only in the times section + pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0); + } + data_set->localhost = pcmk__our_nodename; + } + + if (!out->is_quiet(out)) { + if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { + printed = out->message(out, "maint-mode", data_set->flags); + } + + if (data_set->disabled_resources || data_set->blocked_resources) { + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + printed = out->info(out, + "%d of %d resource instances DISABLED and " + "%d BLOCKED from further action due to failure", + data_set->disabled_resources, + data_set->ninstances, + data_set->blocked_resources); + } + + /* Most formatted output headers use caps for each word, but this one + * only has the first word capitalized for compatibility with pcs. + */ + print_cluster_status(data_set, + pcmk_is_set(flags, pcmk_sim_show_pending)? pcmk_show_pending : 0, + section_opts, "Current cluster status", + (printed == pcmk_rc_ok)); + printed = pcmk_rc_ok; + } + + // If the user requested any injections, handle them + if ((injections->node_down != NULL) + || (injections->node_fail != NULL) + || (injections->node_up != NULL) + || (injections->op_inject != NULL) + || (injections->ticket_activate != NULL) + || (injections->ticket_grant != NULL) + || (injections->ticket_revoke != NULL) + || (injections->ticket_standby != NULL) + || (injections->watchdog != NULL)) { + + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + pcmk__inject_scheduler_input(data_set, cib, injections); + printed = pcmk_rc_ok; + + rc = cib->cmds->query(cib, NULL, &input, cib_sync_call); + if (rc != pcmk_rc_ok) { + rc = pcmk_legacy2rc(rc); + goto simulate_done; + } + + cleanup_calculations(data_set); + reset(data_set, input, out, use_date, flags); + cluster_status(data_set); + } + + if (input_file != NULL) { + rc = write_xml_file(input, input_file, FALSE); + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + goto simulate_done; + } + } + + if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) { + pcmk__output_t *logger_out = NULL; + unsigned long long data_set_flags = pe_flag_no_compat; + + if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) { + data_set_flags |= pe_flag_show_scores; + } + if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { + data_set_flags |= pe_flag_show_utilization; + } + + if (pcmk_all_flags_set(data_set->flags, + pe_flag_show_scores|pe_flag_show_utilization)) { + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, + "Allocation Scores and Utilization Information"); + printed = pcmk_rc_ok; + + } else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) { + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Allocation Scores"); + printed = pcmk_rc_ok; + + } else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + out->begin_list(out, NULL, NULL, "Utilization Information"); + printed = pcmk_rc_ok; + + } else { + rc = pcmk__log_output_new(&logger_out); + if (rc != pcmk_rc_ok) { + goto simulate_done; + } + pe__register_messages(logger_out); + pcmk__register_lib_messages(logger_out); + data_set->priv = logger_out; + } + + pcmk__schedule_actions(input, data_set_flags, data_set); + + if (logger_out == NULL) { + out->end_list(out); + } else { + logger_out->finish(logger_out, CRM_EX_OK, true, NULL); + pcmk__output_free(logger_out); + data_set->priv = out; + } + + input = NULL; /* Don't try and free it twice */ + + if (graph_file != NULL) { + rc = write_xml_file(data_set->graph, graph_file, FALSE); + if (rc < 0) { + rc = pcmk_rc_graph_error; + goto simulate_done; + } + } + + if (dot_file != NULL) { + rc = write_sim_dotfile(data_set, dot_file, + pcmk_is_set(flags, pcmk_sim_all_actions), + pcmk_is_set(flags, pcmk_sim_verbose)); + if (rc != pcmk_rc_ok) { + rc = pcmk_rc_dot_error; + goto simulate_done; + } + } + + if (!out->is_quiet(out)) { + print_transition_summary(data_set, printed == pcmk_rc_ok); + } + } + + rc = pcmk_rc_ok; + + if (!pcmk_is_set(flags, pcmk_sim_simulate)) { + goto simulate_done; + } + + PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); + if (pcmk__simulate_transition(data_set, cib, + injections->op_fail) != pcmk__graph_complete) { + rc = pcmk_rc_invalid_transition; + } + + if (out->is_quiet(out)) { + goto simulate_done; + } + + set_effective_date(data_set, true, use_date); + + if (pcmk_is_set(flags, pcmk_sim_show_scores)) { + pe__set_working_set_flags(data_set, pe_flag_show_scores); + } + if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { + pe__set_working_set_flags(data_set, pe_flag_show_utilization); + } + + cluster_status(data_set); + print_cluster_status(data_set, 0, section_opts, "Revised Cluster Status", + true); + +simulate_done: + cib__clean_up_connection(&cib); + return rc; +} + +int +pcmk_simulate(xmlNodePtr *xml, pe_working_set_t *data_set, + const pcmk_injections_t *injections, unsigned int flags, + unsigned int section_opts, const char *use_date, + const char *input_file, const char *graph_file, + const char *dot_file) +{ + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + return rc; + } + + pe__register_messages(out); + pcmk__register_lib_messages(out); + + rc = pcmk__simulate(data_set, out, injections, flags, section_opts, + use_date, input_file, graph_file, dot_file); + pcmk__xml_output_finish(out, xml); + return rc; +} diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c new file mode 100644 index 0000000..0e82633 --- /dev/null +++ b/lib/pacemaker/pcmk_status.c @@ -0,0 +1,368 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <crm/cib/internal.h> +#include <crm/common/output.h> +#include <crm/common/results.h> +#include <crm/fencing/internal.h> +#include <crm/stonith-ng.h> +#include <pacemaker.h> +#include <pacemaker-internal.h> + +static stonith_t * +fencing_connect(void) +{ + stonith_t *st = stonith_api_new(); + int rc = pcmk_rc_ok; + + if (st == NULL) { + return NULL; + } + + rc = st->cmds->connect(st, crm_system_name, NULL); + if (rc == pcmk_rc_ok) { + return st; + } else { + stonith_api_delete(st); + return NULL; + } +} + +/*! + * \internal + * \brief Output the cluster status given a fencer and CIB connection + * + * \param[in,out] out Output object + * \param[in,out] stonith Fencer connection + * \param[in,out] cib CIB connection + * \param[in] current_cib Current CIB XML + * \param[in] pcmkd_state \p pacemakerd state + * \param[in] fence_history How much of the fencing history to output + * \param[in] show Group of \p pcmk_section_e flags + * \param[in] show_opts Group of \p pcmk_show_opt_e flags + * \param[in] only_node If a node name or tag, include only the + * matching node(s) (if any) in the output. + * If \p "*" or \p NULL, include all nodes + * in the output. + * \param[in] only_rsc If a resource ID or tag, include only the + * matching resource(s) (if any) in the + * output. If \p "*" or \p NULL, include all + * resources in the output. + * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID + * \param[in] simple_output Whether to use a simple output format. + * Note: This is for use by \p crm_mon only + * and is planned to be deprecated. + * + * \return Standard Pacemaker return code + */ +int +pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, cib_t *cib, + xmlNode *current_cib, + enum pcmk_pacemakerd_state pcmkd_state, + enum pcmk__fence_history fence_history, + uint32_t show, uint32_t show_opts, + const char *only_node, const char *only_rsc, + const char *neg_location_prefix, bool simple_output) +{ + xmlNode *cib_copy = copy_xml(current_cib); + stonith_history_t *stonith_history = NULL; + int history_rc = 0; + pe_working_set_t *data_set = NULL; + GList *unames = NULL; + GList *resources = NULL; + + int rc = pcmk_rc_ok; + + if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { + cib__clean_up_connection(&cib); + free_xml(cib_copy); + rc = pcmk_rc_schema_validation; + out->err(out, "Upgrade failed: %s", pcmk_rc_str(rc)); + return rc; + } + + /* get the stonith-history if there is evidence we need it */ + if (fence_history != pcmk__fence_history_none) { + history_rc = pcmk__get_fencing_history(stonith, &stonith_history, + fence_history); + } + + data_set = pe_new_working_set(); + CRM_ASSERT(data_set != NULL); + pe__set_working_set_flags(data_set, pe_flag_no_compat); + + data_set->input = cib_copy; + data_set->priv = out; + cluster_status(data_set); + + if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { + if (pcmk__our_nodename == NULL) { + // Currently used only in the times section + pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0); + } + data_set->localhost = pcmk__our_nodename; + } + + /* Unpack constraints if any section will need them + * (tickets may be referenced in constraints but not granted yet, + * and bans need negative location constraints) */ + if (pcmk_is_set(show, pcmk_section_bans) || pcmk_is_set(show, pcmk_section_tickets)) { + pcmk__unpack_constraints(data_set); + } + + unames = pe__build_node_name_list(data_set, only_node); + resources = pe__build_rsc_list(data_set, only_rsc); + + /* Always print DC if NULL. */ + if (data_set->dc_node == NULL) { + show |= pcmk_section_dc; + } + + if (simple_output) { + rc = pcmk__output_simple_status(out, data_set); + } else { + out->message(out, "cluster-status", + data_set, pcmkd_state, pcmk_rc2exitc(history_rc), + stonith_history, fence_history, show, show_opts, + neg_location_prefix, unames, resources); + } + + g_list_free_full(unames, free); + g_list_free_full(resources, free); + + stonith_history_free(stonith_history); + stonith_history = NULL; + pe_free_working_set(data_set); + return rc; +} + +int +pcmk_status(xmlNodePtr *xml) +{ + cib_t *cib = NULL; + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; + + uint32_t show_opts = pcmk_show_pending | pcmk_show_inactive_rscs | pcmk_show_timing; + + cib = cib_new(); + + if (cib == NULL) { + return pcmk_rc_cib_corrupt; + } + + rc = pcmk__xml_output_new(&out, xml); + if (rc != pcmk_rc_ok) { + cib_delete(cib); + return rc; + } + + pcmk__register_lib_messages(out); + pe__register_messages(out); + stonith__register_messages(out); + + rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all, + show_opts, NULL, NULL, NULL, false, 0); + pcmk__xml_output_finish(out, xml); + + cib_delete(cib); + return rc; +} + +/*! + * \internal + * \brief Query and output the cluster status + * + * The operation is considered a success if we're able to get the \p pacemakerd + * state. If possible, we'll also try to connect to the fencer and CIB and + * output their respective status information. + * + * \param[in,out] out Output object + * \param[in,out] cib CIB connection + * \param[in] fence_history How much of the fencing history to output + * \param[in] show Group of \p pcmk_section_e flags + * \param[in] show_opts Group of \p pcmk_show_opt_e flags + * \param[in] only_node If a node name or tag, include only the + * matching node(s) (if any) in the output. + * If \p "*" or \p NULL, include all nodes + * in the output. + * \param[in] only_rsc If a resource ID or tag, include only the + * matching resource(s) (if any) in the + * output. If \p "*" or \p NULL, include all + * resources in the output. + * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID + * \param[in] simple_output Whether to use a simple output format. + * Note: This is for use by \p crm_mon only + * and is planned to be deprecated. + * \param[in] timeout_ms How long to wait for a reply from the + * \p pacemakerd API. If 0, + * \p pcmk_ipc_dispatch_sync will be used. + * If positive, \p pcmk_ipc_dispatch_main + * will be used, and a new mainloop will be + * created for this purpose (freed before + * return). + * + * \return Standard Pacemaker return code + */ +int +pcmk__status(pcmk__output_t *out, cib_t *cib, + enum pcmk__fence_history fence_history, uint32_t show, + uint32_t show_opts, const char *only_node, const char *only_rsc, + const char *neg_location_prefix, bool simple_output, + unsigned int timeout_ms) +{ + xmlNode *current_cib = NULL; + int rc = pcmk_rc_ok; + stonith_t *stonith = NULL; + enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; + time_t last_updated = 0; + + if (cib == NULL) { + return ENOTCONN; + } + + if (cib->variant == cib_native) { + rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, false, + &pcmkd_state); + if (rc != pcmk_rc_ok) { + return rc; + } + + last_updated = time(NULL); + + switch (pcmkd_state) { + case pcmk_pacemakerd_state_running: + case pcmk_pacemakerd_state_shutting_down: + case pcmk_pacemakerd_state_remote: + /* Fencer and CIB may still be available while shutting down or + * running on a Pacemaker Remote node + */ + break; + default: + // Fencer and CIB are definitely unavailable + out->message(out, "pacemakerd-health", + NULL, pcmkd_state, NULL, last_updated); + return rc; + } + + if (fence_history != pcmk__fence_history_none) { + stonith = fencing_connect(); + } + } + + rc = cib__signon_query(out, &cib, ¤t_cib); + if (rc != pcmk_rc_ok) { + if (pcmkd_state != pcmk_pacemakerd_state_invalid) { + // Invalid at this point means we didn't query the pcmkd state + out->message(out, "pacemakerd-health", + NULL, pcmkd_state, NULL, last_updated); + } + goto done; + } + + rc = pcmk__output_cluster_status(out, stonith, cib, current_cib, + pcmkd_state, fence_history, show, + show_opts, only_node, only_rsc, + neg_location_prefix, simple_output); + if (rc != pcmk_rc_ok) { + out->err(out, "Error outputting status info from the fencer or CIB"); + } + +done: + stonith_api_delete(stonith); + free_xml(current_cib); + return pcmk_rc_ok; +} + +/* This is an internal-only function that is planned to be deprecated and removed. + * It should only ever be called from crm_mon. + */ +int +pcmk__output_simple_status(pcmk__output_t *out, + const pe_working_set_t *data_set) +{ + int nodes_online = 0; + int nodes_standby = 0; + int nodes_maintenance = 0; + GString *offline_nodes = NULL; + bool no_dc = false; + bool offline = false; + bool has_warnings = false; + + if (data_set->dc_node == NULL) { + has_warnings = true; + no_dc = true; + } + + for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; + + if (node->details->standby && node->details->online) { + nodes_standby++; + } else if (node->details->maintenance && node->details->online) { + nodes_maintenance++; + } else if (node->details->online) { + nodes_online++; + } else { + pcmk__add_word(&offline_nodes, 1024, "offline node:"); + pcmk__add_word(&offline_nodes, 0, pe__node_name(node)); + has_warnings = true; + offline = true; + } + } + + if (has_warnings) { + out->info(out, "CLUSTER WARN: %s%s%s", + no_dc ? "No DC" : "", + no_dc && offline ? ", " : "", + (offline? (const char *) offline_nodes->str : "")); + + if (offline_nodes != NULL) { + g_string_free(offline_nodes, TRUE); + } + + } else { + char *nodes_standby_s = NULL; + char *nodes_maint_s = NULL; + + if (nodes_standby > 0) { + nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby, + pcmk__plural_s(nodes_standby)); + } + + if (nodes_maintenance > 0) { + nodes_maint_s = crm_strdup_printf(", %d maintenance node%s", + nodes_maintenance, + pcmk__plural_s(nodes_maintenance)); + } + + out->info(out, "CLUSTER OK: %d node%s online%s%s, " + "%d resource instance%s configured", + nodes_online, pcmk__plural_s(nodes_online), + nodes_standby_s != NULL ? nodes_standby_s : "", + nodes_maint_s != NULL ? nodes_maint_s : "", + data_set->ninstances, pcmk__plural_s(data_set->ninstances)); + + free(nodes_standby_s); + free(nodes_maint_s); + } + + if (has_warnings) { + return pcmk_rc_error; + } else { + return pcmk_rc_ok; + } + /* coverity[leaked_storage] False positive */ +} |