diff options
Diffstat (limited to 'lib/pengine/unpack.c')
-rw-r--r-- | lib/pengine/unpack.c | 1794 |
1 files changed, 1022 insertions, 772 deletions
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 2bd6707..3429d56 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -29,8 +29,8 @@ CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { - pe_resource_t *rsc; // Resource that history is for - pe_node_t *node; // Node that history is for + pcmk_resource_t *rsc; // Resource that history is for + pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML @@ -49,43 +49,40 @@ struct action_history { * use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the * flag is stringified more readably in log messages. */ -#define set_config_flag(data_set, option, flag) do { \ - const char *scf_value = pe_pref((data_set)->config_hash, (option)); \ - if (scf_value != NULL) { \ - if (crm_is_true(scf_value)) { \ - (data_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \ - LOG_TRACE, "Working set", \ - crm_system_name, (data_set)->flags, \ - (flag), #flag); \ - } else { \ - (data_set)->flags = pcmk__clear_flags_as(__func__, __LINE__,\ - LOG_TRACE, "Working set", \ - crm_system_name, (data_set)->flags, \ - (flag), #flag); \ - } \ - } \ +#define set_config_flag(scheduler, option, flag) do { \ + const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \ + if (scf_value != NULL) { \ + if (crm_is_true(scf_value)) { \ + (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Scheduler", \ + crm_system_name, (scheduler)->flags, \ + (flag), #flag); \ + } else { \ + (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ + LOG_TRACE, "Scheduler", \ + crm_system_name, (scheduler)->flags, \ + (flag), #flag); \ + } \ + } \ } while(0) -static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - xmlNode **last_failure, +static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, + xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed); -static void determine_remote_online_status(pe_working_set_t *data_set, - pe_node_t *this_node); -static void add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, - bool overwrite, pe_working_set_t *data_set); +static void determine_remote_online_status(pcmk_scheduler_t *scheduler, + pcmk_node_t *this_node); +static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, + bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, - pe_node_t *this_node, - pe_working_set_t *data_set); + pcmk_node_t *this_node, + pcmk_scheduler_t *scheduler); -static void unpack_node_lrm(pe_node_t *node, const xmlNode *xml, - pe_working_set_t *data_set); +static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, + pcmk_scheduler_t *scheduler); -// Bitmask for warnings we only want to print once -uint32_t pe_wo = 0; - static gboolean -is_dangling_guest_node(pe_node_t *node) +is_dangling_guest_node(pcmk_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared @@ -94,7 +91,7 @@ is_dangling_guest_node(pe_node_t *node) node->details->remote_rsc && node->details->remote_rsc->container == NULL && pcmk_is_set(node->details->remote_rsc->flags, - pe_rsc_orphan_container_filler)) { + pcmk_rsc_removed_filler)) { return TRUE; } @@ -104,23 +101,23 @@ is_dangling_guest_node(pe_node_t *node) /*! * \brief Schedule a fence action for a node * - * \param[in,out] data_set Current working set of cluster - * \param[in,out] node Node to fence - * \param[in] reason Text description of why fencing is needed + * \param[in,out] scheduler Scheduler data + * \param[in,out] node Node to fence + * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider `priority-fencing-delay` */ void -pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, +pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { - pe_resource_t *rsc = node->details->remote_rsc->container; + pcmk_resource_t *rsc = node->details->remote_rsc->container; - if (!pcmk_is_set(rsc->flags, pe_rsc_failed)) { - if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { + if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { + if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", @@ -135,7 +132,8 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } } @@ -145,12 +143,12 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, "and guest resource no longer exists", pe__node_name(node), reason); pe__set_resource_flags(node->details->remote_rsc, - pe_rsc_failed|pe_rsc_stop); + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } else if (pe__is_remote_node(node)) { - pe_resource_t *rsc = node->details->remote_rsc; + pcmk_resource_t *rsc = node->details->remote_rsc; - if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pe__node_name(node), reason); @@ -158,26 +156,26 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", pe__node_name(node), - pe_can_fence(data_set, node)? "will be fenced" : "is unclean", + pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply `priority-fencing-delay` for remote nodes - pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set); + pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pe__node_name(node), - pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", + pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", pe__node_name(node), - pe_can_fence(data_set, node)? "will be fenced" : "is unclean", + pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; - pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set); + pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } @@ -197,215 +195,258 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void -set_if_xpath(uint64_t flag, const char *xpath, pe_working_set_t *data_set) +set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; - if (!pcmk_is_set(data_set->flags, flag)) { - result = xpath_search(data_set->input, xpath); + if (!pcmk_is_set(scheduler->flags, flag)) { + result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { - pe__set_working_set_flags(data_set, flag); + pe__set_working_set_flags(scheduler, flag); } freeXpathObject(result); } } gboolean -unpack_config(xmlNode * config, pe_working_set_t * data_set) +unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, - .role = RSC_ROLE_UNKNOWN, - .now = data_set->now, + .role = pcmk_role_unknown, + .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; - data_set->config_hash = config_hash; + scheduler->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, - CIB_OPTIONS_FIRST, FALSE, data_set); + CIB_OPTIONS_FIRST, FALSE, scheduler); - verify_pe_options(data_set->config_hash); + verify_pe_options(scheduler->config_hash); - set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); - if (!pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { + set_config_flag(scheduler, "enable-startup-probes", + pcmk_sched_probe_resources); + if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } - value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); + value = pe_pref(scheduler->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and stonith-watchdog-timeout is nonzero"); - pe__set_working_set_flags(data_set, pe_flag_have_stonith_resource); + pe__set_working_set_flags(scheduler, pcmk_sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ - set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); + set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING, + scheduler); - value = pe_pref(data_set->config_hash, "stonith-timeout"); - data_set->stonith_timeout = (int) crm_parse_interval_spec(value); - crm_debug("STONITH timeout: %d", data_set->stonith_timeout); + value = pe_pref(scheduler->config_hash, "stonith-timeout"); + scheduler->stonith_timeout = (int) crm_parse_interval_spec(value); + crm_debug("STONITH timeout: %d", scheduler->stonith_timeout); - set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); - crm_debug("STONITH of failed nodes is %s", - pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled"); + set_config_flag(scheduler, "stonith-enabled", pcmk_sched_fencing_enabled); + if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { + crm_debug("STONITH of failed nodes is enabled"); + } else { + crm_debug("STONITH of failed nodes is disabled"); + } - data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); - if (!strcmp(data_set->stonith_action, "poweroff")) { - pe_warn_once(pe_wo_poweroff, + scheduler->stonith_action = pe_pref(scheduler->config_hash, + "stonith-action"); + if (!strcmp(scheduler->stonith_action, "poweroff")) { + pe_warn_once(pcmk__wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); - data_set->stonith_action = "off"; + scheduler->stonith_action = PCMK_ACTION_OFF; } - crm_trace("STONITH will %s nodes", data_set->stonith_action); + crm_trace("STONITH will %s nodes", scheduler->stonith_action); - set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); - crm_debug("Concurrent fencing is %s", - pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)? "enabled" : "disabled"); + set_config_flag(scheduler, "concurrent-fencing", + pcmk_sched_concurrent_fencing); + if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) { + crm_debug("Concurrent fencing is enabled"); + } else { + crm_debug("Concurrent fencing is disabled"); + } - value = pe_pref(data_set->config_hash, + value = pe_pref(scheduler->config_hash, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); if (value) { - data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; - crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay); + scheduler->priority_fencing_delay = crm_parse_interval_spec(value) + / 1000; + crm_trace("Priority fencing delay is %ds", + scheduler->priority_fencing_delay); } - set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); + set_config_flag(scheduler, "stop-all-resources", pcmk_sched_stop_all); crm_debug("Stop all active resources: %s", - pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything))); + pcmk__btoa(pcmk_is_set(scheduler->flags, pcmk_sched_stop_all))); - set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); - if (pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) { + set_config_flag(scheduler, "symmetric-cluster", + pcmk_sched_symmetric_cluster); + if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } - value = pe_pref(data_set->config_hash, "no-quorum-policy"); + value = pe_pref(scheduler->config_hash, "no-quorum-policy"); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { - data_set->no_quorum_policy = no_quorum_ignore; + scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { - data_set->no_quorum_policy = no_quorum_freeze; + scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { - data_set->no_quorum_policy = no_quorum_demote; + scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { - if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { + if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { int do_panic = 0; - crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, + crm_element_value_int(scheduler->input, XML_ATTR_QUORUM_PANIC, &do_panic); - if (do_panic || pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { - data_set->no_quorum_policy = no_quorum_suicide; + if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) { + scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); - data_set->no_quorum_policy = no_quorum_stop; + scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting no-quorum-policy to 'stop' because " "fencing is disabled"); - data_set->no_quorum_policy = no_quorum_stop; + scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { - data_set->no_quorum_policy = no_quorum_stop; + scheduler->no_quorum_policy = pcmk_no_quorum_stop; } - switch (data_set->no_quorum_policy) { - case no_quorum_freeze: + switch (scheduler->no_quorum_policy) { + case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; - case no_quorum_stop: + case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; - case no_quorum_demote: + case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; - case no_quorum_suicide: + case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; - case no_quorum_ignore: + case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } - set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); - crm_trace("Orphan resources are %s", - pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)? "stopped" : "ignored"); + set_config_flag(scheduler, "stop-orphan-resources", + pcmk_sched_stop_removed_resources); + if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { + crm_trace("Orphan resources are stopped"); + } else { + crm_trace("Orphan resources are ignored"); + } - set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); - crm_trace("Orphan resource actions are %s", - pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)? "stopped" : "ignored"); + set_config_flag(scheduler, "stop-orphan-actions", + pcmk_sched_cancel_removed_actions); + if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) { + crm_trace("Orphan resource actions are stopped"); + } else { + crm_trace("Orphan resource actions are ignored"); + } - value = pe_pref(data_set->config_hash, "remove-after-stop"); + value = pe_pref(scheduler->config_hash, "remove-after-stop"); if (value != NULL) { if (crm_is_true(value)) { - pe__set_working_set_flags(data_set, pe_flag_remove_after_stop); + pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop); #ifndef PCMK__COMPAT_2_0 - pe_warn_once(pe_wo_remove_after, + pe_warn_once(pcmk__wo_remove_after, "Support for the remove-after-stop cluster property is" " deprecated and will be removed in a future release"); #endif } else { - pe__clear_working_set_flags(data_set, pe_flag_remove_after_stop); + pe__clear_working_set_flags(scheduler, + pcmk_sched_remove_after_stop); } } - set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); + set_config_flag(scheduler, "maintenance-mode", pcmk_sched_in_maintenance); crm_trace("Maintenance mode: %s", - pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode))); + pcmk__btoa(pcmk_is_set(scheduler->flags, + pcmk_sched_in_maintenance))); - set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); - crm_trace("Start failures are %s", - pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)? "always fatal" : "handled by failcount"); + set_config_flag(scheduler, "start-failure-is-fatal", + pcmk_sched_start_failure_fatal); + if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) { + crm_trace("Start failures are always fatal"); + } else { + crm_trace("Start failures are handled by failcount"); + } - if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { - set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); + if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { + set_config_flag(scheduler, "startup-fencing", + pcmk_sched_startup_fencing); } - if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) { + if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { - pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); + pe_warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } - pe__unpack_node_health_scores(data_set); + pe__unpack_node_health_scores(scheduler); - data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); - crm_trace("Placement strategy: %s", data_set->placement_strategy); + scheduler->placement_strategy = pe_pref(scheduler->config_hash, + "placement-strategy"); + crm_trace("Placement strategy: %s", scheduler->placement_strategy); - set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); - crm_trace("Resources will%s be locked to cleanly shut down nodes", - (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); - if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { - value = pe_pref(data_set->config_hash, + set_config_flag(scheduler, "shutdown-lock", pcmk_sched_shutdown_lock); + if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { + value = pe_pref(scheduler->config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); - data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; - crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); + scheduler->shutdown_lock = crm_parse_interval_spec(value) / 1000; + crm_trace("Resources will be locked to nodes that were cleanly " + "shut down (locks expire after %s)", + pcmk__readable_interval(scheduler->shutdown_lock)); + } else { + crm_trace("Resources will not be locked to nodes that were cleanly " + "shut down"); + } + + value = pe_pref(scheduler->config_hash, + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); + scheduler->node_pending_timeout = crm_parse_interval_spec(value) / 1000; + if (scheduler->node_pending_timeout == 0) { + crm_trace("Do not fence pending nodes"); + } else { + crm_trace("Fence pending nodes after %s", + pcmk__readable_interval(scheduler->node_pending_timeout + * 1000)); } return TRUE; } -pe_node_t * +pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, - const char *score, pe_working_set_t * data_set) + const char *score, pcmk_scheduler_t *scheduler) { - pe_node_t *new_node = NULL; + pcmk_node_t *new_node = NULL; - if (pe_find_node(data_set->nodes, uname) != NULL) { + if (pe_find_node(scheduler->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } - new_node = calloc(1, sizeof(pe_node_t)); + new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { return NULL; } @@ -425,14 +466,14 @@ pe_create_node(const char *id, const char *uname, const char *type, new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; - new_node->details->data_set = data_set; + new_node->details->data_set = scheduler; if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { - new_node->details->type = node_member; + new_node->details->type = pcmk_node_variant_cluster; } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { - new_node->details->type = node_remote; - pe__set_working_set_flags(data_set, pe_flag_have_remote_nodes); + new_node->details->type = pcmk_node_variant_remote; + pe__set_working_set_flags(scheduler, pcmk_sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it @@ -443,7 +484,7 @@ pe_create_node(const char *id, const char *uname, const char *type, "assuming 'ping'", pcmk__s(uname, "without name"), type); } - pe_warn_once(pe_wo_ping_node, + pe_warn_once(pcmk__wo_ping_node, "Support for nodes of type 'ping' (such as %s) is " "deprecated and will be removed in a future release", pcmk__s(uname, "unnamed node")); @@ -464,13 +505,13 @@ pe_create_node(const char *id, const char *uname, const char *type, new_node->details->digest_cache = pcmk__strkey_table(free, pe__free_digests); - data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, - pe__cmp_node_name); + scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, + pe__cmp_node_name); return new_node; } static const char * -expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) +expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; @@ -527,9 +568,10 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data } static void -handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) +handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { - if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { + if ((new_node->details->type == pcmk_node_variant_remote) + && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. @@ -537,7 +579,7 @@ handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) return; } - if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) { + if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; @@ -552,10 +594,10 @@ handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) } gboolean -unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) +unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; - pe_node_t *new_node = NULL; + pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; @@ -578,46 +620,48 @@ unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) "> entry in configuration without id"); continue; } - new_node = pe_create_node(id, uname, type, score, data_set); + new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } - handle_startup_fencing(data_set, new_node); + handle_startup_fencing(scheduler, new_node); - add_node_attrs(xml_obj, new_node, FALSE, data_set); + add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } - if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { + if (scheduler->localhost + && (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) { crm_info("Creating a fake local node"); - pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, - data_set); + pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0, + scheduler); } return TRUE; } static void -setup_container(pe_resource_t * rsc, pe_working_set_t * data_set) +setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *container_id = NULL; if (rsc->children) { - g_list_foreach(rsc->children, (GFunc) setup_container, data_set); + g_list_foreach(rsc->children, (GFunc) setup_container, scheduler); return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { - pe_resource_t *container = pe_find_resource(data_set->resources, container_id); + pcmk_resource_t *container = pe_find_resource(scheduler->resources, + container_id); if (container) { rsc->container = container; - pe__set_resource_flags(container, pe_rsc_is_container); + pe__set_resource_flags(container, pcmk_rsc_has_filler); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { @@ -627,7 +671,7 @@ setup_container(pe_resource_t * rsc, pe_working_set_t * data_set) } gboolean -unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) +unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; @@ -646,11 +690,12 @@ unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ - if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { + if (new_node_id + && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, - data_set); + scheduler); } continue; } @@ -663,12 +708,14 @@ unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) * configuration for the guest node's connection, to be unpacked * later. */ - new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); - if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { + new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, + scheduler); + if (new_node_id + && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, - data_set); + scheduler); } continue; } @@ -681,13 +728,15 @@ unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { - new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); + new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, + scheduler); - if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { + if (new_node_id + && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, - data_set); + scheduler); } } } @@ -704,20 +753,20 @@ unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) * easy access to the connection resource during the scheduler calculations. */ static void -link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) +link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { - pe_node_t *remote_node = NULL; + pcmk_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } - if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { + if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } - remote_node = pe_find_node(data_set->nodes, new_rsc->id); + remote_node = pe_find_node(scheduler->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s", @@ -728,7 +777,7 @@ link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ - handle_startup_fencing(data_set, remote_node); + handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now @@ -742,7 +791,7 @@ link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) static void destroy_tag(gpointer data) { - pe_tag_t *tag = data; + pcmk_tag_t *tag = data; if (tag) { free(tag->id); @@ -756,7 +805,7 @@ destroy_tag(gpointer data) * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML - * \param[in,out] data_set Where to put resource information + * \param[in,out] scheduler Scheduler data * * \return TRUE * @@ -764,63 +813,64 @@ destroy_tag(gpointer data) * be used when pe__unpack_resource() calls resource_location() */ gboolean -unpack_resources(const xmlNode *xml_resources, pe_working_set_t * data_set) +unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; - data_set->template_rsc_sets = pcmk__strkey_table(free, destroy_tag); + scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag); for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { - pe_resource_t *new_rsc = NULL; + pcmk_resource_t *new_rsc = NULL; const char *id = ID(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", - crm_element_name(xml_obj)); + xml_obj->name); continue; } if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, pcmk__str_none)) { - if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id, + if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ - g_hash_table_insert(data_set->template_rsc_sets, strdup(id), NULL); + g_hash_table_insert(scheduler->template_rsc_sets, strdup(id), + NULL); } continue; } crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>", - crm_element_name(xml_obj), id); + xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, - data_set) == pcmk_rc_ok) { - data_set->resources = g_list_append(data_set->resources, new_rsc); + scheduler) == pcmk_rc_ok) { + scheduler->resources = g_list_append(scheduler->resources, new_rsc); pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", - crm_element_name(xml_obj), id); + xml_obj->name, id); } } - for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { - pe_resource_t *rsc = (pe_resource_t *) gIter->data; + for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) { + pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; - setup_container(rsc, data_set); - link_rsc2remotenode(data_set, rsc); + setup_container(rsc, scheduler); + link_rsc2remotenode(scheduler, rsc); } - data_set->resources = g_list_sort(data_set->resources, + scheduler->resources = g_list_sort(scheduler->resources, pe__cmp_rsc_priority); - if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { + if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* Ignore */ - } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) - && !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { + } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled) + && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option"); @@ -831,11 +881,11 @@ unpack_resources(const xmlNode *xml_resources, pe_working_set_t * data_set) } gboolean -unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) +unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; - data_set->tags = pcmk__strkey_table(free, destroy_tag); + scheduler->tags = pcmk__strkey_table(free, destroy_tag); for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { @@ -849,7 +899,7 @@ unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID, - crm_element_name(xml_tag)); + (const char *) xml_tag->name); continue; } @@ -864,11 +914,11 @@ unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID, - crm_element_name(xml_obj_ref), tag_id); + xml_obj_ref->name, tag_id); continue; } - if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { + if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } @@ -880,7 +930,7 @@ unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean -unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) +unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; @@ -888,7 +938,7 @@ unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) const char *standby = NULL; xmlAttrPtr xIter = NULL; - pe_ticket_t *ticket = NULL; + pcmk_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { @@ -897,9 +947,9 @@ unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) crm_trace("Processing ticket state for %s", ticket_id); - ticket = g_hash_table_lookup(data_set->tickets, ticket_id); + ticket = g_hash_table_lookup(scheduler->tickets, ticket_id); if (ticket == NULL) { - ticket = ticket_new(ticket_id, data_set); + ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } @@ -907,7 +957,7 @@ unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; - const char *prop_value = crm_element_value(xml_ticket, prop_name); + const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) { continue; @@ -948,7 +998,7 @@ unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) } static gboolean -unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) +unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; @@ -958,19 +1008,19 @@ unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } - unpack_ticket_state(xml_obj, data_set); + unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void -unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, - pe_working_set_t *data_set) +unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, + pcmk_scheduler_t *scheduler) { const char *resource_discovery_enabled = NULL; const xmlNode *attrs = NULL; - pe_resource_t *rsc = NULL; + pcmk_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { return; @@ -990,7 +1040,7 @@ unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); - add_node_attrs(attrs, this_node, TRUE, data_set); + add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pe__node_name(this_node)); @@ -1003,7 +1053,7 @@ unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || - ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed))) { + ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) { crm_info("%s is in maintenance mode", pe__node_name(this_node)); this_node->details->maintenance = TRUE; } @@ -1011,7 +1061,7 @@ unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) - && !pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { + && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute on Pacemaker Remote node %s" " because fencing is disabled", @@ -1033,19 +1083,19 @@ unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, * \internal * \brief Unpack a cluster node's transient attributes * - * \param[in] state CIB node state XML - * \param[in,out] node Cluster node whose attributes are being unpacked - * \param[in,out] data_set Cluster working set + * \param[in] state CIB node state XML + * \param[in,out] node Cluster node whose attributes are being unpacked + * \param[in,out] scheduler Scheduler data */ static void -unpack_transient_attributes(const xmlNode *state, pe_node_t *node, - pe_working_set_t *data_set) +unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, + pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); - add_node_attrs(attrs, node, TRUE, data_set); + add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pe_node_attribute_raw(node, "standby"))) { crm_info("%s is in standby mode", pe__node_name(node)); @@ -1074,15 +1124,15 @@ unpack_transient_attributes(const xmlNode *state, pe_node_t *node, * resource history inside it. Multiple passes through the status are needed to * fully unpack everything. * - * \param[in] state CIB node state XML - * \param[in,out] data_set Cluster working set + * \param[in] state CIB node state XML + * \param[in,out] scheduler Scheduler data */ static void -unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) +unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; - pe_node_t *this_node = NULL; + pcmk_node_t *this_node = NULL; id = crm_element_value(state, XML_ATTR_ID); if (id == NULL) { @@ -1093,15 +1143,21 @@ unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) uname = crm_element_value(state, XML_ATTR_UNAME); if (uname == NULL) { - crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without " - XML_ATTR_UNAME); - return; + /* If a joining peer makes the cluster acquire the quorum from corosync + * meanwhile it has not joined CPG membership of pacemaker-controld yet, + * it's possible that the created node_state entry doesn't have an uname + * yet. We should recognize the node as `pending` and wait for it to + * join CPG. + */ + crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without " + XML_ATTR_UNAME, id); } - this_node = pe_find_node_any(data_set->nodes, id, uname); + this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { - pcmk__config_warn("Ignoring recorded node state for '%s' because " - "it is no longer in the configuration", uname); + pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) " + "because it is no longer in the configuration", + id, pcmk__s(uname, "uname unknown")); return; } @@ -1116,7 +1172,7 @@ unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) return; } - unpack_transient_attributes(state, this_node, data_set); + unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. @@ -1126,16 +1182,16 @@ unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) crm_trace("Determining online status of cluster node %s (id %s)", pe__node_name(this_node), id); - determine_online_status(state, this_node, data_set); + determine_online_status(state, this_node, scheduler); - if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum) + if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate) && this_node->details->online - && (data_set->no_quorum_policy == no_quorum_suicide)) { + && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ - pe_fence_node(data_set, this_node, "cluster does not have quorum", + pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } @@ -1150,16 +1206,16 @@ unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) * in another node's history, so it might take multiple passes to unpack * everything. * - * \param[in] status CIB XML status section - * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen - * \param[in,out] data_set Cluster working set + * \param[in] status CIB XML status section + * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen + * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; @@ -1169,7 +1225,7 @@ unpack_node_history(const xmlNode *status, bool fence, const char *id = ID(state); const char *uname = crm_element_value(state, XML_ATTR_UNAME); - pe_node_t *this_node = NULL; + pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section @@ -1178,7 +1234,7 @@ unpack_node_history(const xmlNode *status, bool fence, continue; } - this_node = pe_find_node_any(data_set->nodes, id, uname); + this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " @@ -1200,10 +1256,10 @@ unpack_node_history(const xmlNode *status, bool fence, * other resource history to the point that we know that the node's * connection and containing resource are both up. */ - pe_resource_t *rsc = this_node->details->remote_rsc; + pcmk_resource_t *rsc = this_node->details->remote_rsc; - if ((rsc == NULL) || (rsc->role != RSC_ROLE_STARTED) - || (rsc->container->role != RSC_ROLE_STARTED)) { + if ((rsc == NULL) || (rsc->role != pcmk_role_started) + || (rsc->container->role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because container and connection are not known to " "be up", id); @@ -1216,11 +1272,11 @@ unpack_node_history(const xmlNode *status, bool fence, * connection is up, with the exception of when shutdown locks are * in use. */ - pe_resource_t *rsc = this_node->details->remote_rsc; + pcmk_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) - || (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock) - && (rsc->role != RSC_ROLE_STARTED))) { + || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock) + && (rsc->role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; @@ -1231,8 +1287,9 @@ unpack_node_history(const xmlNode *status, bool fence, * nodes have been unpacked. This allows us to number active clone * instances first. */ - } else if (!pcmk_any_flags_set(data_set->flags, pe_flag_stonith_enabled - |pe_flag_shutdown_lock) + } else if (!pcmk_any_flags_set(scheduler->flags, + pcmk_sched_fencing_enabled + |pcmk_sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); @@ -1240,15 +1297,15 @@ unpack_node_history(const xmlNode *status, bool fence, } if (pe__is_guest_or_remote_node(this_node)) { - determine_remote_online_status(data_set, this_node); - unpack_handle_remote_attrs(this_node, state, data_set); + determine_remote_online_status(scheduler, this_node); + unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); this_node->details->unpacked = TRUE; - unpack_node_lrm(this_node, state, data_set); + unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } @@ -1259,172 +1316,324 @@ unpack_node_history(const xmlNode *status, bool fence, /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean -unpack_status(xmlNode * status, pe_working_set_t * data_set) +unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); - if (data_set->tickets == NULL) { - data_set->tickets = pcmk__strkey_table(free, destroy_ticket); + if (scheduler->tickets == NULL) { + scheduler->tickets = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { - unpack_tickets_state((xmlNode *) state, data_set); + unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { - unpack_node_state(state, data_set); + unpack_node_state(state, scheduler); } } - while (unpack_node_history(status, FALSE, data_set) == EAGAIN) { + while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, - pcmk_is_set(data_set->flags, pe_flag_stonith_enabled), - data_set); + pcmk_is_set(scheduler->flags, + pcmk_sched_fencing_enabled), + scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ - if (data_set->stop_needed != NULL) { - for (GList *item = data_set->stop_needed; item; item = item->next) { - pe_resource_t *container = item->data; - pe_node_t *node = pe__current_node(container); + if (scheduler->stop_needed != NULL) { + for (GList *item = scheduler->stop_needed; item; item = item->next) { + pcmk_resource_t *container = item->data; + pcmk_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } - g_list_free(data_set->stop_needed); - data_set->stop_needed = NULL; + g_list_free(scheduler->stop_needed); + scheduler->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ - for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - pe_node_t *this_node = gIter->data; + for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { + pcmk_node_t *this_node = gIter->data; if (!pe__is_guest_or_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->details->remote_rsc != NULL)) { - pe__set_next_role(this_node->details->remote_rsc, RSC_ROLE_STOPPED, + pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped, "remote shutdown"); } if (!this_node->details->unpacked) { - determine_remote_online_status(data_set, this_node); + determine_remote_online_status(scheduler, this_node); } } return TRUE; } +/*! + * \internal + * \brief Unpack node's time when it became a member at the cluster layer + * + * \param[in] node_state Node's node_state entry + * \param[in,out] scheduler Scheduler data + * + * \return Epoch time when node became a cluster member + * (or scheduler effective time for legacy entries) if a member, + * 0 if not a member, or -1 if no valid information available + */ +static long long +unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) +{ + const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); + int member = 0; + + if (member_time == NULL) { + return -1LL; + + } else if (crm_str_to_boolean(member_time, &member) == 1) { + /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was + * recorded as a boolean for a DC < 2.1.7, or the node is pending + * shutdown and has left the CPG, in which case it was set to 1 to avoid + * fencing for node-pending-timeout. + * + * We return the effective time for in_ccm=1 because what's important to + * avoid fencing is that effective time minus this value is less than + * the pending node timeout. + */ + return member? (long long) get_effective_time(scheduler) : 0LL; + + } else { + long long when_member = 0LL; + + if ((pcmk__scan_ll(member_time, &when_member, + 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { + crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM + " in " XML_CIB_TAG_STATE " entry", member_time); + return -1LL; + } + return when_member; + } +} + +/*! + * \internal + * \brief Unpack node's time when it became online in process group + * + * \param[in] node_state Node's node_state entry + * + * \return Epoch time when node became online in process group (or 0 if not + * online, or 1 for legacy online entries) + */ +static long long +unpack_node_online(const xmlNode *node_state) +{ + const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD); + + // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" + if (pcmk__str_eq(peer_time, OFFLINESTATUS, + pcmk__str_casei|pcmk__str_null_matches)) { + return 0LL; + + } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) { + return 1LL; + + } else { + long long when_online = 0LL; + + if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) + || (when_online < 0)) { + crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in " + XML_CIB_TAG_STATE " entry, assuming offline", peer_time); + return 0LL; + } + return when_online; + } +} + +/*! + * \internal + * \brief Unpack node attribute for user-requested fencing + * + * \param[in] node Node to check + * \param[in] node_state Node's node_state entry in CIB status + * + * \return \c true if fencing has been requested for \p node, otherwise \c false + */ +static bool +unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) +{ + long long value = 0LL; + int value_i = 0; + const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE); + + // Value may be boolean or an epoch time + if (crm_str_to_boolean(value_s, &value_i) == 1) { + return (value_i != 0); + } + if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { + return (value > 0); + } + crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE + "node attribute for %s", value_s, pe__node_name(node)); + return false; +} + static gboolean -determine_online_status_no_fencing(pe_working_set_t *data_set, +determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, - pe_node_t *this_node) + pcmk_node_t *this_node) { gboolean online = FALSE; - const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); - const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); - const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); - const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); + const char *join = crm_element_value(node_state, PCMK__XA_JOIN); + const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); + long long when_member = unpack_node_member(node_state, scheduler); + long long when_online = unpack_node_online(node_state); - if (!crm_is_true(in_cluster)) { - crm_trace("Node is down: in_cluster=%s", - pcmk__s(in_cluster, "<null>")); + if (when_member <= 0) { + crm_trace("Node %s is %sdown", pe__node_name(this_node), + ((when_member < 0)? "presumed " : "")); - } else if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) { + } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { - crm_debug("Node is not ready to run resources: %s", join); + crm_debug("Node %s is not ready to run resources: %s", + pe__node_name(this_node), join); } } else if (this_node->details->expected_up == FALSE) { - crm_trace("Controller is down: " - "in_cluster=%s is_peer=%s join=%s expected=%s", - pcmk__s(in_cluster, "<null>"), pcmk__s(is_peer, "<null>"), + crm_trace("Node %s controller is down: " + "member@%lld online@%lld join=%s expected=%s", + pe__node_name(this_node), when_member, when_online, pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>")); } else { /* mark it unclean */ - pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE); - crm_info("in_cluster=%s is_peer=%s join=%s expected=%s", - pcmk__s(in_cluster, "<null>"), pcmk__s(is_peer, "<null>"), + pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); + crm_info("Node %s member@%lld online@%lld join=%s expected=%s", + pe__node_name(this_node), when_member, when_online, pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>")); } return online; } -static gboolean -determine_online_status_fencing(pe_working_set_t *data_set, - const xmlNode *node_state, pe_node_t *this_node) +/*! + * \internal + * \brief Check whether a node has taken too long to join controller group + * + * \param[in,out] scheduler Scheduler data + * \param[in] node Node to check + * \param[in] when_member Epoch time when node became a cluster member + * \param[in] when_online Epoch time when node joined controller group + * + * \return true if node has been pending (on the way up) longer than + * node-pending-timeout, otherwise false + * \note This will also update the cluster's recheck time if appropriate. + */ +static inline bool +pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, + long long when_member, long long when_online) { - gboolean online = FALSE; - gboolean do_terminate = FALSE; - bool crmd_online = FALSE; - const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); - const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); - const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); - const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); - const char *terminate = pe_node_attribute_raw(this_node, "terminate"); - -/* - - XML_NODE_IN_CLUSTER ::= true|false - - XML_NODE_IS_PEER ::= online|offline - - XML_NODE_JOIN_STATE ::= member|down|pending|banned - - XML_NODE_EXPECTED ::= member|down -*/ + if ((scheduler->node_pending_timeout > 0) + && (when_member > 0) && (when_online <= 0)) { + // There is a timeout on pending nodes, and node is pending - if (crm_is_true(terminate)) { - do_terminate = TRUE; + time_t timeout = when_member + scheduler->node_pending_timeout; - } else if (terminate != NULL && strlen(terminate) > 0) { - /* could be a time() value */ - char t = terminate[0]; - - if (t != '0' && isdigit(t)) { - do_terminate = TRUE; + if (get_effective_time(node->details->data_set) >= timeout) { + return true; // Node has timed out } + + // Node is pending, but still has time + pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } + return false; +} + +static bool +determine_online_status_fencing(pcmk_scheduler_t *scheduler, + const xmlNode *node_state, + pcmk_node_t *this_node) +{ + bool termination_requested = unpack_node_terminate(this_node, node_state); + const char *join = crm_element_value(node_state, PCMK__XA_JOIN); + const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); + long long when_member = unpack_node_member(node_state, scheduler); + long long when_online = unpack_node_online(node_state); + +/* + - PCMK__XA_JOIN ::= member|down|pending|banned + - PCMK__XA_EXPECTED ::= member|down - crm_trace("%s: in_cluster=%s is_peer=%s join=%s expected=%s term=%d", - pe__node_name(this_node), pcmk__s(in_cluster, "<null>"), - pcmk__s(is_peer, "<null>"), pcmk__s(join, "<null>"), - pcmk__s(exp_state, "<null>"), do_terminate); + @COMPAT with entries recorded for DCs < 2.1.7 + - PCMK__XA_IN_CCM ::= true|false + - PCMK__XA_CRMD ::= online|offline - online = crm_is_true(in_cluster); - crmd_online = pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei); - if (exp_state == NULL) { - exp_state = CRMD_JOINSTATE_DOWN; - } + Since crm_feature_set 3.18.0 (pacemaker-2.1.7): + - PCMK__XA_IN_CCM ::= <timestamp>|0 + Since when node has been a cluster member. A value 0 of means the node is not + a cluster member. + + - PCMK__XA_CRMD ::= <timestamp>|0 + Since when peer has been online in CPG. A value 0 means the peer is offline + in CPG. +*/ + + crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", + pe__node_name(this_node), when_member, when_online, + pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"), + (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pe__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ - online = crmd_online; + return (when_online > 0); + } - } else if (in_cluster == NULL) { - pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE); + if (when_member < 0) { + pe_fence_node(scheduler, this_node, + "peer has not been seen by the cluster", FALSE); + return false; + } - } else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) { - pe_fence_node(data_set, this_node, + if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { + pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); - } else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) { + } else if (termination_requested) { + if ((when_member <= 0) && (when_online <= 0) + && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { + crm_info("%s was fenced as requested", pe__node_name(this_node)); + return false; + } + pe_fence_node(scheduler, this_node, "fencing was requested", false); + + } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, + pcmk__str_null_matches)) { - if (crm_is_true(in_cluster) || crmd_online) { + if (pending_too_long(scheduler, this_node, when_member, when_online)) { + pe_fence_node(scheduler, this_node, + "peer pending timed out on joining the process group", + FALSE); + + } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; @@ -1435,48 +1644,41 @@ determine_online_status_fencing(pe_working_set_t *data_set, pe__node_name(this_node)); } - } else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei) - && crm_is_true(in_cluster) == FALSE && !crmd_online) { - crm_info("%s was just shot", pe__node_name(this_node)); - online = FALSE; - - } else if (crm_is_true(in_cluster) == FALSE) { + } else if (when_member <= 0) { // Consider `priority-fencing-delay` for lost nodes - pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE); + pe_fence_node(scheduler, this_node, + "peer is no longer part of the cluster", TRUE); - } else if (!crmd_online) { - pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE); + } else if (when_online <= 0) { + pe_fence_node(scheduler, this_node, + "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ - } else if (do_terminate) { - pe_fence_node(data_set, this_node, "termination was requested", FALSE); - } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { + } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pe__node_name(this_node)); - } else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { + } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, + CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { - pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE); - crm_warn("%s: in-cluster=%s is-peer=%s join=%s expected=%s term=%d shutdown=%d", - pe__node_name(this_node), pcmk__s(in_cluster, "<null>"), - pcmk__s(is_peer, "<null>"), pcmk__s(join, "<null>"), - pcmk__s(exp_state, "<null>"), do_terminate, - this_node->details->shutdown); + pe_fence_node(scheduler, this_node, "peer was in an unknown state", + FALSE); } - return online; + return (when_member > 0); } static void -determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node) +determine_remote_online_status(pcmk_scheduler_t *scheduler, + pcmk_node_t *this_node) { - pe_resource_t *rsc = this_node->details->remote_rsc; - pe_resource_t *container = NULL; - pe_node_t *host = NULL; + pcmk_resource_t *rsc = this_node->details->remote_rsc; + pcmk_resource_t *container = NULL; + pcmk_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will @@ -1494,33 +1696,36 @@ determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_nod } /* If the resource is currently started, mark it online. */ - if (rsc->role == RSC_ROLE_STARTED) { + if (rsc->role == pcmk_role_started) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ - if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { + if ((rsc->role == pcmk_role_started) + && (rsc->next_role == pcmk_role_stopped)) { + crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ - if(container && pcmk_is_set(container->flags, pe_rsc_failed)) { + if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; - } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { + } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; - } else if (rsc->role == RSC_ROLE_STOPPED - || (container && container->role == RSC_ROLE_STOPPED)) { + } else if ((rsc->role == pcmk_role_stopped) + || ((container != NULL) + && (container->role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); @@ -1541,11 +1746,11 @@ remote_online_done: } static void -determine_online_status(const xmlNode *node_state, pe_node_t *this_node, - pe_working_set_t *data_set) +determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, + pcmk_scheduler_t *scheduler) { gboolean online = FALSE; - const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); + const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); CRM_CHECK(this_node != NULL, return); @@ -1566,11 +1771,13 @@ determine_online_status(const xmlNode *node_state, pe_node_t *this_node, * Anyone caught abusing this logic will be shot */ - } else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { - online = determine_online_status_no_fencing(data_set, node_state, this_node); + } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { + online = determine_online_status_no_fencing(scheduler, node_state, + this_node); } else { - online = determine_online_status_fencing(data_set, node_state, this_node); + online = determine_online_status_fencing(scheduler, node_state, + this_node); } if (online) { @@ -1692,30 +1899,30 @@ clone_zero(const char *last_rsc_id) return zero; } -static pe_resource_t * +static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { - pe_resource_t *rsc = NULL; + pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); - if (pe__unpack_resource(xml_rsc, &rsc, NULL, data_set) != pcmk_rc_ok) { + if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { - pe_node_t *node; + pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); - node = pe_find_node(data_set->nodes, rsc_id); + node = pe_find_node(scheduler->nodes, rsc_id); if (node == NULL) { - node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); + node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler); } - link_rsc2remotenode(data_set, rsc); + link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); @@ -1726,10 +1933,10 @@ create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); - pe__set_resource_flags(rsc, pe_rsc_orphan_container_filler); + pe__set_resource_flags(rsc, pcmk_rsc_removed_filler); } - pe__set_resource_flags(rsc, pe_rsc_orphan); - data_set->resources = g_list_append(data_set->resources, rsc); + pe__set_resource_flags(rsc, pcmk_rsc_removed); + scheduler->resources = g_list_append(scheduler->resources, rsc); return rsc; } @@ -1737,21 +1944,22 @@ create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, * \internal * \brief Create orphan instance for anonymous clone resource history * - * \param[in,out] parent Clone resource that orphan will be added to - * \param[in] rsc_id Orphan's resource ID - * \param[in] node Where orphan is active (for logging only) - * \param[in,out] data_set Cluster working set + * \param[in,out] parent Clone resource that orphan will be added to + * \param[in] rsc_id Orphan's resource ID + * \param[in] node Where orphan is active (for logging only) + * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ -static pe_resource_t * -create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, - const pe_node_t *node, pe_working_set_t *data_set) +static pcmk_resource_t * +create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, + const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { - pe_resource_t *top = pe__create_clone_child(parent, data_set); + pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); // find_rsc() because we might be a cloned group - pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); + pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, + pcmk_rsc_match_clone_only); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pe__node_name(node)); @@ -1767,30 +1975,30 @@ create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * - * \param[in,out] data_set Cluster information - * \param[in] node Node on which to check for instance - * \param[in,out] parent Clone to check - * \param[in] rsc_id Name of cloned resource in history (without instance) + * \param[in,out] scheduler Scheduler data + * \param[in] node Node on which to check for instance + * \param[in,out] parent Clone to check + * \param[in] rsc_id Name of cloned resource in history (no instance) */ -static pe_resource_t * -find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, - pe_resource_t *parent, const char *rsc_id) +static pcmk_resource_t * +find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, + pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; - pe_resource_t *rsc = NULL; - pe_resource_t *inactive_instance = NULL; + pcmk_resource_t *rsc = NULL; + pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); - CRM_ASSERT(!pcmk_is_set(parent->flags, pe_rsc_unique)); + CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pe__node_name(node), parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GList *locations = NULL; - pe_resource_t *child = rIter->data; + pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called @@ -1804,8 +2012,8 @@ find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and - * (3) when we re-run calculations on the same data set as part of a - * simulation. + * (3) when we re-run calculations on the same scheduler data as part of + * a simulation. */ child->fns->location(child, &locations, 2); if (locations) { @@ -1815,7 +2023,7 @@ find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, */ CRM_LOG_ASSERT(locations->next == NULL); - if (((pe_node_t *)locations->data)->details == node->details) { + if (((pcmk_node_t *) locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we @@ -1823,7 +2031,8 @@ find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, * * If the history entry is orphaned, rsc will be NULL. */ - rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); + rsc = parent->fns->find_rsc(child, rsc_id, NULL, + pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can @@ -1848,10 +2057,10 @@ find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance - && !pcmk_is_set(child->flags, pe_rsc_block)) { + && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, - pe_find_clone); + pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node @@ -1881,30 +2090,30 @@ find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ - if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_needs_fencing) + if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) - && !pe__is_universal_clone(parent, data_set)) { + && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { - rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); + rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } -static pe_resource_t * -unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, +static pcmk_resource_t * +unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { - pe_resource_t *rsc = NULL; - pe_resource_t *parent = NULL; + pcmk_resource_t *rsc = NULL; + pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); - rsc = pe_find_resource(data_set->resources, rsc_id); + rsc = pe_find_resource(scheduler->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, @@ -1912,9 +2121,10 @@ unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); - pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); + pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources, + clone0_id); - if (clone0 && !pcmk_is_set(clone0->flags, pe_rsc_unique)) { + if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); @@ -1924,7 +2134,7 @@ unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, } free(clone0_id); - } else if (rsc->variant > pe_native) { + } else if (rsc->variant > pcmk_rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; @@ -1940,7 +2150,7 @@ unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, } else { char *base = clone_strip(rsc_id); - rsc = find_anonymous_clone(data_set, node, parent, base); + rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } @@ -1952,42 +2162,43 @@ unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, pcmk__str_update(&rsc->clone_name, rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pe__node_name(node), rsc->id, - (pcmk_is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); + (pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "")); } return rsc; } -static pe_resource_t * -process_orphan_resource(const xmlNode *rsc_entry, const pe_node_t *node, - pe_working_set_t *data_set) +static pcmk_resource_t * +process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, + pcmk_scheduler_t *scheduler) { - pe_resource_t *rsc = NULL; + pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node)); - rsc = create_fake_resource(rsc_id, rsc_entry, data_set); + rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } - if (!pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { - pe__clear_resource_flags(rsc, pe_rsc_managed); + if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { + pe__clear_resource_flags(rsc, pcmk_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pe_rsc_trace(rsc, "Added orphan %s", rsc->id); - resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set); + resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", + scheduler); } return rsc; } static void -process_rsc_state(pe_resource_t * rsc, pe_node_t * node, +process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum action_fail_response on_fail) { - pe_node_t *tmpnode = NULL; + pcmk_node_t *tmpnode = NULL; char *reason = NULL; - enum action_fail_response save_on_fail = action_fail_ignore; + enum action_fail_response save_on_fail = pcmk_on_fail_ignore; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", @@ -1995,12 +2206,12 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, fail2text(on_fail)); /* process current state */ - if (rsc->role != RSC_ROLE_UNKNOWN) { - pe_resource_t *iter = rsc; + if (rsc->role != pcmk_role_unknown) { + pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { - pe_node_t *n = pe__copy_node(node); + pcmk_node_t *n = pe__copy_node(node); pe_rsc_trace(rsc, "%s%s%s known on %s", rsc->id, @@ -2009,7 +2220,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, pe__node_name(n)); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } - if (pcmk_is_set(iter->flags, pe_rsc_unique)) { + if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) { break; } iter = iter->parent; @@ -2017,10 +2228,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, } /* If a managed resource is believed to be running, but node is down ... */ - if (rsc->role > RSC_ROLE_STOPPED + if ((rsc->role > pcmk_role_stopped) && node->details->online == FALSE && node->details->maintenance == FALSE - && pcmk_is_set(rsc->flags, pe_rsc_managed)) { + && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { gboolean should_fence = FALSE; @@ -2032,12 +2243,15 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); should_fence = TRUE; - } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + } else if (pcmk_is_set(rsc->cluster->flags, + pcmk_sched_fencing_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc - && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + && !pcmk_is_set(node->details->remote_rsc->flags, + pcmk_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start @@ -2070,20 +2284,20 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, /* No extra processing needed * Also allows resources to be started again after a node is shot */ - on_fail = action_fail_ignore; + on_fail = pcmk_on_fail_ignore; } switch (on_fail) { - case action_fail_ignore: + case pcmk_on_fail_ignore: /* nothing to do */ break; - case action_fail_demote: - pe__set_resource_flags(rsc, pe_rsc_failed); + case pcmk_on_fail_demote: + pe__set_resource_flags(rsc, pcmk_rsc_failed); demote_action(rsc, node, FALSE); break; - case action_fail_fence: + case pcmk_on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ @@ -2092,20 +2306,20 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, free(reason); break; - case action_fail_standby: + case pcmk_on_fail_standby_node: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; - case action_fail_block: + case pcmk_on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ - pe__clear_resource_flags(rsc, pe_rsc_managed); - pe__set_resource_flags(rsc, pe_rsc_block); + pe__clear_resource_flags(rsc, pcmk_rsc_managed); + pe__set_resource_flags(rsc, pcmk_rsc_blocked); break; - case action_fail_migrate: + case pcmk_on_fail_ban: /* make sure it comes up somewhere else * or not at all */ @@ -2113,19 +2327,22 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, rsc->cluster); break; - case action_fail_stop: - pe__set_next_role(rsc, RSC_ROLE_STOPPED, "on-fail=stop"); + case pcmk_on_fail_stop: + pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop"); break; - case action_fail_recover: - if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + case pcmk_on_fail_restart: + if ((rsc->role != pcmk_role_stopped) + && (rsc->role != pcmk_role_unknown)) { + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; - case action_fail_restart_container: - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + case pcmk_on_fail_restart_container: + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the @@ -2136,14 +2353,16 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, g_list_prepend(rsc->cluster->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); - } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { + } else if ((rsc->role != pcmk_role_stopped) + && (rsc->role != pcmk_role_unknown)) { stop_action(rsc, node, FALSE); } break; - case action_fail_reset_remote: - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + case pcmk_on_fail_reset_remote: + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); + if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); @@ -2161,14 +2380,14 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, } /* require the stop action regardless if fencing is occurring or not. */ - if (rsc->role > RSC_ROLE_STOPPED) { + if (rsc->role > pcmk_role_stopped) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { - pe__set_next_role(rsc, RSC_ROLE_STOPPED, "remote reset"); + pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } @@ -2177,16 +2396,17 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ - if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { + if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } - if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { - if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { - if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + if ((rsc->role != pcmk_role_stopped) + && (rsc->role != pcmk_role_unknown)) { + if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) { + if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, pe__node_name(node)); } else { @@ -2198,16 +2418,17 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, } native_add_running(rsc, node, rsc->cluster, - (save_on_fail != action_fail_ignore)); + (save_on_fail != pcmk_on_fail_ignore)); switch (on_fail) { - case action_fail_ignore: + case pcmk_on_fail_ignore: break; - case action_fail_demote: - case action_fail_block: - pe__set_resource_flags(rsc, pe_rsc_failed); + case pcmk_on_fail_demote: + case pcmk_on_fail_block: + pe__set_resource_flags(rsc, pcmk_rsc_failed); break; default: - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__set_resource_flags(rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); break; } @@ -2220,14 +2441,14 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, rsc->clone_name = NULL; } else { - GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP, - FALSE); + GList *possible_matches = pe__resource_actions(rsc, node, + PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { - pe_action_t *stop = (pe_action_t *) gIter->data; + pcmk_action_t *stop = (pcmk_action_t *) gIter->data; - pe__set_action_flags(stop, pe_action_optional); + pe__set_action_flags(stop, pcmk_action_optional); } g_list_free(possible_matches); @@ -2236,21 +2457,21 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ - if (rsc->role == RSC_ROLE_STOPPED + if ((rsc->role == pcmk_role_stopped) && rsc->partial_migration_source && rsc->partial_migration_source->details == node->details && rsc->partial_migration_target && rsc->running_on) { - rsc->role = RSC_ROLE_STARTED; + rsc->role = pcmk_role_started; } } /* create active recurring operations as optional */ static void -process_recurring(pe_node_t * node, pe_resource_t * rsc, +process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, - GList *sorted_op_list, pe_working_set_t * data_set) + GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; @@ -2303,7 +2524,7 @@ process_recurring(pe_node_t * node, pe_resource_t * rsc, /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node)); - custom_action(rsc, key, task, node, TRUE, TRUE, data_set); + custom_action(rsc, key, task, node, TRUE, scheduler); } } @@ -2328,20 +2549,24 @@ calculate_active_ops(const GList *sorted_op_list, int *start_index, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); - if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei) + if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; - } else if (pcmk__strcase_any_of(task, CRMD_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) { + } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, + PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; - } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { + } else if ((implied_monitor_start <= *stop_index) + && pcmk__str_eq(task, PCMK_ACTION_MONITOR, + pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } - } else if (pcmk__strcase_any_of(task, CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, NULL)) { + } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, + PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } @@ -2357,26 +2582,26 @@ calculate_active_ops(const GList *sorted_op_list, int *start_index, // If resource history entry has shutdown lock, remember lock node and time static void -unpack_shutdown_lock(const xmlNode *rsc_entry, pe_resource_t *rsc, - const pe_node_t *node, pe_working_set_t *data_set) +unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, + const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { - if ((data_set->shutdown_lock > 0) - && (get_effective_time(data_set) - > (lock_time + data_set->shutdown_lock))) { + if ((scheduler->shutdown_lock > 0) + && (get_effective_time(scheduler) + > (lock_time + scheduler->shutdown_lock))) { pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pe__node_name(node)); - pe__clear_resource_history(rsc, node, data_set); + pe__clear_resource_history(rsc, node); } else { /* @COMPAT I don't like breaking const signatures, but * rsc->lock_node should really be const -- we just can't change it * until the next API compatibility break. */ - rsc->lock_node = (pe_node_t *) node; + rsc->lock_node = (pcmk_node_t *) node; rsc->lock_time = lock_time; } } @@ -2388,30 +2613,30 @@ unpack_shutdown_lock(const xmlNode *rsc_entry, pe_resource_t *rsc, * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry lrm_resource XML being unpacked - * \param[in,out] data_set Cluster working set + * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ -static pe_resource_t * -unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, - pe_working_set_t *data_set) +static pcmk_resource_t * +unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, + pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; - enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; + enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = ID(lrm_resource); - pe_resource_t *rsc = NULL; + pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; - enum action_fail_response on_fail = action_fail_ignore; - enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; + enum action_fail_response on_fail = pcmk_on_fail_ignore; + enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE @@ -2428,7 +2653,7 @@ unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, op_list = g_list_prepend(op_list, rsc_op); } - if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { + if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; @@ -2436,25 +2661,25 @@ unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, } /* find the resource */ - rsc = unpack_find_resource(data_set, node, rsc_id); + rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { - rsc = process_orphan_resource(lrm_resource, node, data_set); + rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node - if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { - unpack_shutdown_lock(lrm_resource, rsc, node, data_set); + if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { + unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->role; - rsc->role = RSC_ROLE_UNKNOWN; + rsc->role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { @@ -2465,7 +2690,8 @@ unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); - process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); + process_recurring(node, rsc, start_index, stop_index, sorted_op_list, + scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); @@ -2473,7 +2699,9 @@ unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { - if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { + if ((rsc->next_role == pcmk_role_unknown) + || (req_role < rsc->next_role)) { + pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE); } else if (req_role > rsc->next_role) { @@ -2492,13 +2720,13 @@ unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, static void handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { - pe_resource_t *rsc; - pe_resource_t *container; + pcmk_resource_t *rsc; + pcmk_resource_t *container; const char *rsc_id; const char *container_id; @@ -2512,15 +2740,14 @@ handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, continue; } - container = pe_find_resource(data_set->resources, container_id); + container = pe_find_resource(scheduler->resources, container_id); if (container == NULL) { continue; } - rsc = pe_find_resource(data_set->resources, rsc_id); - if (rsc == NULL || - !pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler) || - rsc->container != NULL) { + rsc = pe_find_resource(scheduler->resources, rsc_id); + if ((rsc == NULL) || (rsc->container != NULL) + || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { continue; } @@ -2535,12 +2762,13 @@ handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, * \internal * \brief Unpack one node's lrm status section * - * \param[in,out] node Node whose status is being unpacked - * \param[in] xml CIB node state XML - * \param[in,out] data_set Cluster working set + * \param[in,out] node Node whose status is being unpacked + * \param[in] xml CIB node state XML + * \param[in,out] scheduler Scheduler data */ static void -unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set) +unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, + pcmk_scheduler_t *scheduler) { bool found_orphaned_container_filler = false; @@ -2558,10 +2786,10 @@ unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set) for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { - pe_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, data_set); + pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) - && pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler)) { + && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { found_orphaned_container_filler = true; } } @@ -2570,26 +2798,26 @@ unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set) * orphaned container fillers to their container resource. */ if (found_orphaned_container_filler) { - handle_orphaned_container_fillers(xml, data_set); + handle_orphaned_container_fillers(xml, scheduler); } } static void -set_active(pe_resource_t * rsc) +set_active(pcmk_resource_t *rsc) { - const pe_resource_t *top = pe__const_top_resource(rsc, false); + const pcmk_resource_t *top = pe__const_top_resource(rsc, false); - if (top && pcmk_is_set(top->flags, pe_rsc_promotable)) { - rsc->role = RSC_ROLE_UNPROMOTED; + if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) { + rsc->role = pcmk_role_unpromoted; } else { - rsc->role = RSC_ROLE_STARTED; + rsc->role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { - pe_node_t *node = value; + pcmk_node_t *node = value; int *score = user_data; node->weight = *score; @@ -2604,7 +2832,7 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, - int target_rc, pe_working_set_t *data_set) + int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; @@ -2620,12 +2848,13 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * NULL); /* Need to check against transition_magic too? */ - if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATE) == 0)) { + if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']", NULL); - } else if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATED) == 0)) { + } else if ((source != NULL) + && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']", NULL); @@ -2633,7 +2862,7 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * g_string_append_c(xpath, ']'); } - xml = get_xpath_object((const char *) xpath->str, data_set->input, + xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); @@ -2652,7 +2881,7 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; @@ -2665,7 +2894,7 @@ find_lrm_resource(const char *rsc_id, const char *node_name, SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", NULL); - xml = get_xpath_object((const char *) xpath->str, data_set->input, + xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); @@ -2682,7 +2911,7 @@ find_lrm_resource(const char *rsc_id, const char *node_name, * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool -unknown_on_node(pe_resource_t *rsc, const char *node_name) +unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; @@ -2708,20 +2937,20 @@ unknown_on_node(pe_resource_t *rsc, const char *node_name) * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in] same_node Whether the operations are on the same node - * \param[in,out] data_set Cluster working set + * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ - xmlNode *monitor = find_lrm_op(rsc_id, CRMD_ACTION_STATUS, node_name, - NULL, PCMK_OCF_NOT_RUNNING, data_set); + xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, + NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0); } @@ -2730,22 +2959,22 @@ monitor_not_running_after(const char *rsc_id, const char *node_name, * \brief Check whether any non-monitor operation on a node happened after some * event * - * \param[in] rsc_id Resource being checked - * \param[in] node_name Node being checked - * \param[in] xml_op Event that non-monitor is being compared to - * \param[in] same_node Whether the operations are on the same node - * \param[in,out] data_set Cluster working set + * \param[in] rsc_id Resource being checked + * \param[in] node_name Node being checked + * \param[in] xml_op Event that non-monitor is being compared to + * \param[in] same_node Whether the operations are on the same node + * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; - lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); + lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } @@ -2760,8 +2989,9 @@ non_monitor_after(const char *rsc_id, const char *node_name, task = crm_element_value(op, XML_LRM_ATTR_TASK); - if (pcmk__str_any_of(task, CRMD_ACTION_START, CRMD_ACTION_STOP, - CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL) + if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, + PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, + NULL) && pe__is_newer_op(op, xml_op, same_node) > 0) { return true; } @@ -2774,11 +3004,11 @@ non_monitor_after(const char *rsc_id, const char *node_name, * \brief Check whether the resource has newer state on a node after a migration * attempt * - * \param[in] rsc_id Resource being checked - * \param[in] node_name Node being checked - * \param[in] migrate_to Any migrate_to event that is being compared to - * \param[in] migrate_from Any migrate_from event that is being compared to - * \param[in,out] data_set Cluster working set + * \param[in] rsc_id Resource being checked + * \param[in] node_name Node being checked + * \param[in] migrate_to Any migrate_to event that is being compared to + * \param[in] migrate_from Any migrate_from event that is being compared to + * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ @@ -2786,7 +3016,7 @@ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, - pe_working_set_t *data_set) + pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = migrate_to; const char *source = NULL; @@ -2826,9 +3056,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ - return non_monitor_after(rsc_id, node_name, xml_op, same_node, data_set) + return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, - data_set); + scheduler); } /*! @@ -2844,8 +3074,8 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, * \return Standard Pacemaker return code */ static int -get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - const pe_node_t *target_node, +get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, + const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); @@ -2891,11 +3121,11 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, * \param[in] node Migration source */ static void -add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) +add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pe__node_name(node)); - rsc->role = RSC_ROLE_STOPPED; + rsc->role = pcmk_role_stopped; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, (gpointer) node); } @@ -2942,7 +3172,7 @@ unpack_migrate_to_success(struct action_history *history) */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; - pe_node_t *target_node = NULL; + pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; @@ -2961,8 +3191,8 @@ unpack_migrate_to_success(struct action_history *history) true, history->rsc->cluster); // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, history->rsc->cluster); + migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, + target, source, -1, history->rsc->cluster); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a @@ -2998,7 +3228,7 @@ unpack_migrate_to_success(struct action_history *history) /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ - history->rsc->role = RSC_ROLE_STARTED; + history->rsc->role = pcmk_role_started; target_node = pe_find_node(history->rsc->cluster->nodes, target); active_on_target = !target_newer_state && (target_node != NULL) @@ -3010,8 +3240,9 @@ unpack_migrate_to_success(struct action_history *history) TRUE); } else { // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate); + pe__set_resource_flags(history->rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); + pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable); } return; } @@ -3028,8 +3259,8 @@ unpack_migrate_to_success(struct action_history *history) } if (active_on_target) { - pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, - source); + pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, + source); native_add_running(history->rsc, target_node, history->rsc->cluster, FALSE); @@ -3046,8 +3277,9 @@ unpack_migrate_to_success(struct action_history *history) } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate); + pe__set_resource_flags(history->rsc, + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); + pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable); } } @@ -3073,12 +3305,12 @@ unpack_migrate_to_failure(struct action_history *history) /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ - history->rsc->role = RSC_ROLE_STARTED; + history->rsc->role = pcmk_role_started; // Check for migrate_from on the target - target_migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED, - target, source, PCMK_OCF_OK, - history->rsc->cluster); + target_migrate_from = find_lrm_op(history->rsc->id, + PCMK_ACTION_MIGRATE_FROM, target, source, + PCMK_OCF_OK, history->rsc->cluster); if (/* If the resource state is unknown on the target, it will likely be * probed there. @@ -3096,8 +3328,8 @@ unpack_migrate_to_failure(struct action_history *history) * active there. * (if it is up). */ - pe_node_t *target_node = pe_find_node(history->rsc->cluster->nodes, - target); + pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes, + target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, history->rsc->cluster, @@ -3140,10 +3372,10 @@ unpack_migrate_from_failure(struct action_history *history) /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ - history->rsc->role = RSC_ROLE_STARTED; + history->rsc->role = pcmk_role_started; // Check for a migrate_to on the source - source_migrate_to = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATE, + source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, history->rsc->cluster); @@ -3162,8 +3394,8 @@ unpack_migrate_from_failure(struct action_history *history) /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ - pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, - source); + pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, + source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, history->rsc->cluster, @@ -3250,38 +3482,38 @@ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { - case action_fail_demote: + case pcmk_on_fail_demote: switch (second) { - case action_fail_ignore: + case pcmk_on_fail_ignore: return 1; - case action_fail_demote: + case pcmk_on_fail_demote: return 0; default: return -1; } break; - case action_fail_reset_remote: + case pcmk_on_fail_reset_remote: switch (second) { - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: + case pcmk_on_fail_ignore: + case pcmk_on_fail_demote: + case pcmk_on_fail_restart: return 1; - case action_fail_reset_remote: + case pcmk_on_fail_reset_remote: return 0; default: return -1; } break; - case action_fail_restart_container: + case pcmk_on_fail_restart_container: switch (second) { - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: - case action_fail_reset_remote: + case pcmk_on_fail_ignore: + case pcmk_on_fail_demote: + case pcmk_on_fail_restart: + case pcmk_on_fail_reset_remote: return 1; - case action_fail_restart_container: + case pcmk_on_fail_restart_container: return 0; default: return -1; @@ -3292,26 +3524,26 @@ cmp_on_fail(enum action_fail_response first, enum action_fail_response second) break; } switch (second) { - case action_fail_demote: - return (first == action_fail_ignore)? -1 : 1; + case pcmk_on_fail_demote: + return (first == pcmk_on_fail_ignore)? -1 : 1; - case action_fail_reset_remote: + case pcmk_on_fail_reset_remote: switch (first) { - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: + case pcmk_on_fail_ignore: + case pcmk_on_fail_demote: + case pcmk_on_fail_restart: return -1; default: return 1; } break; - case action_fail_restart_container: + case pcmk_on_fail_restart_container: switch (first) { - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: - case action_fail_reset_remote: + case pcmk_on_fail_ignore: + case pcmk_on_fail_demote: + case pcmk_on_fail_restart: + case pcmk_on_fail_reset_remote: return -1; default: return 1; @@ -3331,13 +3563,13 @@ cmp_on_fail(enum action_fail_response first, enum action_fail_response second) * \param[in,out] rsc Resource to ban */ static void -ban_from_all_nodes(pe_resource_t *rsc) +ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -INFINITY; - pe_resource_t *fail_rsc = rsc; + pcmk_resource_t *fail_rsc = rsc; if (fail_rsc->parent != NULL) { - pe_resource_t *parent = uber_parent(fail_rsc); + pcmk_resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_anon_clone(parent)) { /* For anonymous clones, if an operation with on-fail=stop fails for @@ -3358,18 +3590,50 @@ ban_from_all_nodes(pe_resource_t *rsc) /*! * \internal + * \brief Get configured failure handling and role after failure for an action + * + * \param[in,out] history Unpacked action history entry + * \param[out] on_fail Where to set configured failure handling + * \param[out] fail_role Where to set to role after failure + */ +static void +unpack_failure_handling(struct action_history *history, + enum action_fail_response *on_fail, + enum rsc_role_e *fail_role) +{ + xmlNode *config = pcmk__find_action_config(history->rsc, history->task, + history->interval_ms, true); + + GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, + history->task, + history->interval_ms, config); + + const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL); + + *on_fail = pcmk__parse_on_fail(history->rsc, history->task, + history->interval_ms, on_fail_str); + *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, + meta); + g_hash_table_destroy(meta); +} + +/*! + * \internal * \brief Update resource role, failure handling, etc., after a failed action * - * \param[in,out] history Parsed action result history - * \param[out] last_failure Set this to action XML - * \param[in,out] on_fail What should be done about the result + * \param[in,out] history Parsed action result history + * \param[in] config_on_fail Action failure handling from configuration + * \param[in] fail_role Resource's role after failure of this action + * \param[out] last_failure This will be set to the history XML + * \param[in,out] on_fail Actual handling of action result */ static void -unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, +unpack_rsc_op_failure(struct action_history *history, + enum action_fail_response config_on_fail, + enum rsc_role_e fail_role, xmlNode **last_failure, enum action_fail_response *on_fail) { bool is_probe = false; - pe_action_t *action = NULL; char *last_change_s = NULL; *last_failure = history->xml; @@ -3377,7 +3641,7 @@ unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); - if (!pcmk_is_set(history->rsc->cluster->flags, pe_flag_symmetric_cluster) + if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s", @@ -3414,36 +3678,34 @@ unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, free(last_change_s); - action = custom_action(history->rsc, strdup(history->key), history->task, - NULL, TRUE, FALSE, history->rsc->cluster); - if (cmp_on_fail(*on_fail, action->on_fail) < 0) { - pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s (%s)", - fail2text(*on_fail), fail2text(action->on_fail), - action->uuid, history->key); - *on_fail = action->on_fail; + if (cmp_on_fail(*on_fail, config_on_fail) < 0) { + pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s", + fail2text(*on_fail), fail2text(config_on_fail), + history->key); + *on_fail = config_on_fail; } - if (strcmp(history->task, CRMD_ACTION_STOP) == 0) { + if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -INFINITY, "__stop_fail__", history->rsc->cluster); - } else if (strcmp(history->task, CRMD_ACTION_MIGRATE) == 0) { + } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); - } else if (strcmp(history->task, CRMD_ACTION_MIGRATED) == 0) { + } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); - } else if (strcmp(history->task, CRMD_ACTION_PROMOTE) == 0) { - history->rsc->role = RSC_ROLE_PROMOTED; + } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { + history->rsc->role = pcmk_role_promoted; - } else if (strcmp(history->task, CRMD_ACTION_DEMOTE) == 0) { - if (action->on_fail == action_fail_block) { - history->rsc->role = RSC_ROLE_PROMOTED; - pe__set_next_role(history->rsc, RSC_ROLE_STOPPED, + } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { + if (config_on_fail == pcmk_on_fail_block) { + history->rsc->role = pcmk_role_promoted; + pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with on-fail=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { - history->rsc->role = RSC_ROLE_STOPPED; + history->rsc->role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and @@ -3451,16 +3713,16 @@ unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ - history->rsc->role = RSC_ROLE_UNPROMOTED; + history->rsc->role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); - history->rsc->role = RSC_ROLE_STOPPED; + history->rsc->role = pcmk_role_stopped; - } else if (history->rsc->role < RSC_ROLE_STARTED) { + } else if (history->rsc->role < pcmk_role_started) { pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } @@ -3469,18 +3731,16 @@ unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", history->rsc->id, role2text(history->rsc->role), pcmk__btoa(history->node->details->unclean), - fail2text(action->on_fail), role2text(action->fail_role)); + fail2text(config_on_fail), role2text(fail_role)); - if ((action->fail_role != RSC_ROLE_STARTED) - && (history->rsc->next_role < action->fail_role)) { - pe__set_next_role(history->rsc, action->fail_role, "failure"); + if ((fail_role != pcmk_role_started) + && (history->rsc->next_role < fail_role)) { + pe__set_next_role(history->rsc, fail_role, "failure"); } - if (action->fail_role == RSC_ROLE_STOPPED) { + if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } - - pe_free_action(action); } /*! @@ -3497,7 +3757,7 @@ block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; - if (strcmp(history->task, CRMD_ACTION_STOP) != 0) { + if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->details->data_set, history->node)) { @@ -3516,8 +3776,8 @@ block_if_unrecoverable(struct action_history *history) free(last_change_s); - pe__clear_resource_flags(history->rsc, pe_rsc_managed); - pe__set_resource_flags(history->rsc, pe_rsc_block); + pe__clear_resource_flags(history->rsc, pcmk_rsc_managed); + pe__set_resource_flags(history->rsc, pcmk_rsc_blocked); } /*! @@ -3556,8 +3816,8 @@ remap_because(struct action_history *history, const char **why, int value, * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, - * the operation will be recorded in the data set's list of failed operations - * to highlight it for the user. + * the operation will be recorded in the scheduler data's list of failed + * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ @@ -3664,16 +3924,16 @@ remap_operation(struct action_history *history, case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) - || !pcmk_is_set(history->rsc->flags, pe_rsc_managed)) { + || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); - history->rsc->role = RSC_ROLE_STOPPED; - *on_fail = action_fail_ignore; - pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + history->rsc->role = pcmk_role_stopped; + *on_fail = pcmk_on_fail_ignore; + pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; @@ -3692,13 +3952,13 @@ remap_operation(struct action_history *history, } if (!expired || (history->exit_status == history->expected_exit_status)) { - history->rsc->role = RSC_ROLE_PROMOTED; + history->rsc->role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { - history->rsc->role = RSC_ROLE_PROMOTED; + history->rsc->role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; @@ -3765,16 +4025,15 @@ remap_done: // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, - pe_resource_t *rsc, pe_node_t *node) + pcmk_resource_t *rsc, pcmk_node_t *node) { - if (!strcmp(task, "start") || !strcmp(task, "monitor")) { - + if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ - pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, + pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->cluster); } else { @@ -3783,13 +4042,13 @@ should_clear_for_param_change(const xmlNode *xml_op, const char *task, digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); switch (digest_data->rc) { - case RSC_DIGEST_UNKNOWN: + case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pe__xe_history_key(xml_op), node->details->id); break; - case RSC_DIGEST_MATCH: + case pcmk__digest_match: break; default: return TRUE; @@ -3801,21 +4060,21 @@ should_clear_for_param_change(const xmlNode *xml_op, const char *task, // Order action after fencing of remote node, given connection rsc static void -order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, - pe_working_set_t *data_set) +order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, + pcmk_scheduler_t *scheduler) { - pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id); + pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id); if (remote_node) { - pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, - FALSE, data_set); + pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, + FALSE, scheduler); - order_actions(fence, action, pe_order_implies_then); + order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool -should_ignore_failure_timeout(const pe_resource_t *rsc, const char *task, +should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The @@ -3839,10 +4098,11 @@ should_ignore_failure_timeout(const pe_resource_t *rsc, const char *task, * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms - && pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) - && (interval_ms != 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { + && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled) + && (interval_ms != 0) + && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { - pe_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); + pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { @@ -3909,7 +4169,8 @@ check_operation_expiry(struct action_history *history) // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, - &last_failure, pe_fc_effective, + &last_failure, + pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure @@ -3920,13 +4181,14 @@ check_operation_expiry(struct action_history *history) history->rsc->failure_timeout, (long long) last_failure); last_failure += history->rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { - pe__update_recheck_time(last_failure, history->rsc->cluster); + pe__update_recheck_time(last_failure, history->rsc->cluster, + "fail count expiration"); } } if (expired) { - if (pe_get_failcount(history->node, history->rsc, NULL, pe_fc_default, - history->xml)) { + if (pe_get_failcount(history->node, history->rsc, NULL, + pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { @@ -3963,12 +4225,14 @@ check_operation_expiry(struct action_history *history) } if (clear_reason != NULL) { + pcmk_action_t *clear_op = NULL; + // Schedule clearing of the fail count - pe_action_t *clear_op = pe__clear_failcount(history->rsc, history->node, - clear_reason, - history->rsc->cluster); + clear_op = pe__clear_failcount(history->rsc, history->node, + clear_reason, history->rsc->cluster); - if (pcmk_is_set(history->rsc->cluster->flags, pe_flag_stonith_enabled) + if (pcmk_is_set(history->rsc->cluster->flags, + pcmk_sched_fencing_enabled) && (history->rsc->remote_reconnect_ms != 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing @@ -3987,7 +4251,7 @@ check_operation_expiry(struct action_history *history) } if (expired && (history->interval_ms == 0) - && pcmk__str_eq(history->task, CRMD_ACTION_STATUS, pcmk__str_none)) { + && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: @@ -4022,27 +4286,6 @@ pe__target_rc_from_xml(const xmlNode *xml_op) /*! * \internal - * \brief Get the failure handling for an action - * - * \param[in,out] history Parsed action history entry - * - * \return Failure handling appropriate to action - */ -static enum action_fail_response -get_action_on_fail(struct action_history *history) -{ - enum action_fail_response result = action_fail_recover; - pe_action_t *action = custom_action(history->rsc, strdup(history->key), - history->task, NULL, TRUE, FALSE, - history->rsc->cluster); - - result = action->on_fail; - pe_free_action(action); - return result; -} - -/*! - * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry @@ -4060,53 +4303,53 @@ update_resource_state(struct action_history *history, int exit_status, if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { - history->rsc->role = RSC_ROLE_STOPPED; + history->rsc->role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_STATUS, + } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pe__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } - if (history->rsc->role < RSC_ROLE_STARTED) { + if (history->rsc->role < pcmk_role_started) { set_active(history->rsc); } - } else if (pcmk__str_eq(history->task, CRMD_ACTION_START, pcmk__str_none)) { - history->rsc->role = RSC_ROLE_STARTED; + } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { + history->rsc->role = pcmk_role_started; clear_past_failure = true; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_STOP, pcmk__str_none)) { - history->rsc->role = RSC_ROLE_STOPPED; + } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { + history->rsc->role = pcmk_role_stopped; clear_past_failure = true; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_PROMOTE, + } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { - history->rsc->role = RSC_ROLE_PROMOTED; + history->rsc->role = pcmk_role_promoted; clear_past_failure = true; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_DEMOTE, + } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { - if (*on_fail == action_fail_demote) { + if (*on_fail == pcmk_on_fail_demote) { // Demote clears an error only if on-fail=demote clear_past_failure = true; } - history->rsc->role = RSC_ROLE_UNPROMOTED; + history->rsc->role = pcmk_role_unpromoted; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_MIGRATED, + } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { - history->rsc->role = RSC_ROLE_STARTED; + history->rsc->role = pcmk_role_started; clear_past_failure = true; - } else if (pcmk__str_eq(history->task, CRMD_ACTION_MIGRATE, + } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); - } else if (history->rsc->role < RSC_ROLE_STARTED) { + } else if (history->rsc->role < pcmk_role_started) { pe_rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pe__node_name(history->node)); set_active(history->rsc); @@ -4117,26 +4360,26 @@ update_resource_state(struct action_history *history, int exit_status, } switch (*on_fail) { - case action_fail_stop: - case action_fail_fence: - case action_fail_migrate: - case action_fail_standby: + case pcmk_on_fail_stop: + case pcmk_on_fail_ban: + case pcmk_on_fail_standby_node: + case pcmk_on_fail_fence_node: pe_rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, fail2text(*on_fail), history->task); break; - case action_fail_block: - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: - case action_fail_restart_container: - *on_fail = action_fail_ignore; - pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + case pcmk_on_fail_block: + case pcmk_on_fail_ignore: + case pcmk_on_fail_demote: + case pcmk_on_fail_restart: + case pcmk_on_fail_restart_container: + *on_fail = pcmk_on_fail_ignore; + pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; - case action_fail_reset_remote: + case pcmk_on_fail_reset_remote: if (history->rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and @@ -4144,8 +4387,8 @@ update_resource_state(struct action_history *history, int exit_status, * for the failure to be cleared entirely before attempting * to reconnect.) */ - *on_fail = action_fail_ignore; - pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + *on_fail = pcmk_on_fail_ignore; + pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; @@ -4170,14 +4413,14 @@ can_affect_state(struct action_history *history) * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ - return pcmk__str_any_of(history->task, CRMD_ACTION_STATUS, - CRMD_ACTION_START, CRMD_ACTION_STOP, - CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, - CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, + return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, + PCMK_ACTION_START, PCMK_ACTION_STOP, + PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, + PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else - return !pcmk__str_any_of(history->task, CRMD_ACTION_NOTIFY, - CRMD_ACTION_METADATA, NULL); + return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, + PCMK_ACTION_META_DATA, NULL); #endif } @@ -4244,8 +4487,8 @@ process_expired_result(struct action_history *history, int orig_exit_status) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { - if (history->rsc->role <= RSC_ROLE_STOPPED) { - history->rsc->role = RSC_ROLE_UNKNOWN; + if (history->rsc->role <= pcmk_role_stopped) { + history->rsc->role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", @@ -4303,9 +4546,9 @@ mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum action_fail_response *on_fail) { - pe_resource_t *ban_rsc = history->rsc; + pcmk_resource_t *ban_rsc = history->rsc; - if (!pcmk_is_set(history->rsc->flags, pe_rsc_unique)) { + if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) { ban_rsc = uber_parent(history->rsc); } @@ -4392,20 +4635,20 @@ process_pending_action(struct action_history *history, return; } - if (strcmp(history->task, CRMD_ACTION_START) == 0) { - pe__set_resource_flags(history->rsc, pe_rsc_start_pending); + if (strcmp(history->task, PCMK_ACTION_START) == 0) { + pe__set_resource_flags(history->rsc, pcmk_rsc_start_pending); set_active(history->rsc); - } else if (strcmp(history->task, CRMD_ACTION_PROMOTE) == 0) { - history->rsc->role = RSC_ROLE_PROMOTED; + } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { + history->rsc->role = pcmk_role_promoted; - } else if ((strcmp(history->task, CRMD_ACTION_MIGRATE) == 0) + } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; - pe_node_t *target = NULL; + pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, XML_LRM_ATTR_MIGRATE_TARGET); @@ -4439,13 +4682,14 @@ process_pending_action(struct action_history *history, } static void -unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail) { int old_rc = 0; bool expired = false; - pe_resource_t *parent = rsc; - enum action_fail_response failure_strategy = action_fail_recover; + pcmk_resource_t *parent = rsc; + enum rsc_role_e fail_role = pcmk_role_unknown; + enum action_fail_response failure_strategy = pcmk_on_fail_restart; struct action_history history = { .rsc = rsc, @@ -4514,7 +4758,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, goto done; } - if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) { parent = uber_parent(rsc); } @@ -4529,25 +4773,29 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, goto done; case PCMK_EXEC_NOT_INSTALLED: - failure_strategy = get_action_on_fail(&history); - if (failure_strategy == action_fail_ignore) { + unpack_failure_handling(&history, &failure_strategy, &fail_role); + if (failure_strategy == pcmk_on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", history.task, rsc->id, pe__node_name(node), history.execution_status, history.exit_status, history.id); - /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ - *on_fail = action_fail_migrate; + /* Also for printing it as "FAILED" by marking it as + * pcmk_rsc_failed later + */ + *on_fail = pcmk_on_fail_ban; } resource_location(parent, node, -INFINITY, "hard-error", rsc->cluster); - unpack_rsc_op_failure(&history, last_failure, on_fail); + unpack_rsc_op_failure(&history, failure_strategy, fail_role, + last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) - && pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_managed)) { + && pcmk_is_set(node->details->remote_rsc->flags, + pcmk_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a @@ -4555,7 +4803,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, * that, ensure the remote connection is considered failed. */ pe__set_resource_flags(node->details->remote_rsc, - pe_rsc_failed|pe_rsc_stop); + pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } break; // Not done, do error handling @@ -4571,10 +4819,10 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, break; } - failure_strategy = get_action_on_fail(&history); - if ((failure_strategy == action_fail_ignore) - || (failure_strategy == action_fail_restart_container - && (strcmp(history.task, CRMD_ACTION_STOP) == 0))) { + unpack_failure_handling(&history, &failure_strategy, &fail_role); + if ((failure_strategy == pcmk_on_fail_ignore) + || ((failure_strategy == pcmk_on_fail_restart_container) + && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); @@ -4589,17 +4837,18 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); - pe__set_resource_flags(rsc, pe_rsc_failure_ignored); + pe__set_resource_flags(rsc, pcmk_rsc_ignore_failure); record_failed_op(&history); - if ((failure_strategy == action_fail_restart_container) - && cmp_on_fail(*on_fail, action_fail_recover) <= 0) { + if ((failure_strategy == pcmk_on_fail_restart_container) + && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) { *on_fail = failure_strategy; } } else { - unpack_rsc_op_failure(&history, last_failure, on_fail); + unpack_rsc_op_failure(&history, failure_strategy, fail_role, + last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; @@ -4635,15 +4884,15 @@ done: } static void -add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite, - pe_working_set_t *data_set) +add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, + pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, - .role = RSC_ROLE_UNKNOWN, - .now = data_set->now, + .role = pcmk_role_unknown, + .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL @@ -4654,8 +4903,8 @@ add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite, g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); - if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) { - data_set->dc_node = node; + if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) { + scheduler->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); @@ -4664,18 +4913,19 @@ add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } - cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); + cluster_name = g_hash_table_lookup(scheduler->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, - node->details->attrs, NULL, overwrite, data_set); + node->details->attrs, NULL, overwrite, + scheduler); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, node->details->utilization, NULL, - FALSE, data_set); + FALSE, scheduler); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); @@ -4760,15 +5010,15 @@ extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gbool GList * find_operations(const char *rsc, const char *node, gboolean active_filter, - pe_working_set_t * data_set) + pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; - xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); + xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE); - pe_node_t *this_node = NULL; + pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; @@ -4782,20 +5032,20 @@ find_operations(const char *rsc, const char *node, gboolean active_filter, continue; } - this_node = pe_find_node(data_set->nodes, uname); + this_node = pe_find_node(scheduler->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { - determine_remote_online_status(data_set, this_node); + determine_remote_online_status(scheduler, this_node); } else { - determine_online_status(node_state, this_node, data_set); + determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online - || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { + || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith |