diff options
Diffstat (limited to 'daemons/fenced')
-rw-r--r-- | daemons/fenced/Makefile.am | 33 | ||||
-rw-r--r-- | daemons/fenced/cts-fence-helper.c | 43 | ||||
-rw-r--r-- | daemons/fenced/fenced_cib.c | 734 | ||||
-rw-r--r-- | daemons/fenced/fenced_commands.c | 128 | ||||
-rw-r--r-- | daemons/fenced/fenced_remote.c | 61 | ||||
-rw-r--r-- | daemons/fenced/fenced_scheduler.c | 225 | ||||
-rw-r--r-- | daemons/fenced/pacemaker-fenced.c | 929 | ||||
-rw-r--r-- | daemons/fenced/pacemaker-fenced.h | 31 |
8 files changed, 1166 insertions, 1018 deletions
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am index 2ca0088..62aa864 100644 --- a/daemons/fenced/Makefile.am +++ b/daemons/fenced/Makefile.am @@ -14,7 +14,8 @@ include $(top_srcdir)/mk/man.mk halibdir = $(CRM_DAEMON_DIR) -halib_PROGRAMS = pacemaker-fenced cts-fence-helper +halib_PROGRAMS = pacemaker-fenced \ + cts-fence-helper noinst_HEADERS = pacemaker-fenced.h @@ -23,30 +24,36 @@ man7_MANS = pacemaker-fenced.7 endif cts_fence_helper_SOURCES = cts-fence-helper.c -cts_fence_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/fencing/libstonithd.la +cts_fence_helper_LDADD = $(top_builddir)/lib/fencing/libstonithd.la +cts_fence_helper_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_fenced_YFLAGS = -d pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE) -pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ - $(top_builddir)/lib/pacemaker/libpacemaker.la \ - $(CLUSTERLIBS) -pacemaker_fenced_SOURCES = pacemaker-fenced.c \ - fenced_commands.c \ - fenced_remote.c \ + +pacemaker_fenced_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/pengine/libpe_status.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/cib/libcib.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/fencing/libstonithd.la +pacemaker_fenced_LDADD += $(top_builddir)/lib/common/libcrmcommon.la +pacemaker_fenced_LDADD += $(CLUSTERLIBS) + +pacemaker_fenced_SOURCES = pacemaker-fenced.c \ + fenced_cib.c \ + fenced_commands.c \ + fenced_remote.c \ + fenced_scheduler.c \ fenced_history.c CLEANFILES = $(man7_MANS) $(man8_MANS) if BUILD_LEGACY_LINKS +.PHONY: install-exec-hook install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd +.PHONY: uninstall-hook uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd endif diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c index e18a1f4..07bd500 100644 --- a/daemons/fenced/cts-fence-helper.c +++ b/daemons/fenced/cts-fence-helper.c @@ -212,10 +212,12 @@ run_fence_failure_test(void) cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for failure test", 1, 0); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, + 3, 0), "Fence failure results off", 1, -ENODATA); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", + PCMK_ACTION_REBOOT, 3, 0), "Fence failure results reboot", 1, -ENODATA); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), @@ -246,11 +248,13 @@ run_fence_failure_rollover_test(void) cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), "Register device2 for rollover test", 1, 0); - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, + 3, 0), "Fence rollover results off", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ - single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3, + 0), "Fence rollover results on", 1, -ENODEV); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), @@ -278,7 +282,8 @@ run_standard_test(void) stonith_key_value_freeall(params, 1, 1); params = NULL; - single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0); + single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), + PCMK_ACTION_LIST, 1, 0); single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0); @@ -288,14 +293,17 @@ run_standard_test(void) single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), "Status false_1_node1", 1, 0); - single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), + single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, + 1, 0), "Fence unknown-host (expected failure)", 0, -ENODEV); - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, + 1, 0), "Fence false_1_node1", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ - single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0), + single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1, + 0), "Unfence false_1_node1", 1, -ENODEV); /* Confirm that an invalid level index is rejected */ @@ -362,31 +370,31 @@ standard_dev_test(void) rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0); + rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); - rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0); + rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0); crm_debug("Fence alias: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); crm_debug("Status alias: %d", rc); - rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); + rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); @@ -426,7 +434,8 @@ test_async_fence_pass(int check_event) return; } - rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); @@ -459,7 +468,8 @@ test_async_fence_custom_timeout(int check_event) } begin = time(NULL); - rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); @@ -479,7 +489,8 @@ test_async_fence_timeout(int check_event) return; } - rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); + rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF, + MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c new file mode 100644 index 0000000..e11bf68 --- /dev/null +++ b/daemons/fenced/fenced_cib.c @@ -0,0 +1,734 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +*/ + +#include <crm_internal.h> + +#include <stdbool.h> +#include <stdio.h> +#include <libxml/tree.h> +#include <libxml/xpath.h> + +#include <crm/crm.h> +#include <crm/msg_xml.h> +#include <crm/common/xml.h> + +#include <crm/cluster/internal.h> + +#include <crm/cib.h> +#include <crm/cib/internal.h> + +#include <pacemaker-fenced.h> + +static xmlNode *local_cib = NULL; +static cib_t *cib_api = NULL; +static bool have_cib_devices = FALSE; + +/*! + * \internal + * \brief Check whether a node has a specific attribute name/value + * + * \param[in] node Name of node to check + * \param[in] name Name of an attribute to look for + * \param[in] value The value the named attribute needs to be set to in order to be considered a match + * + * \return TRUE if the locally cached CIB has the specified node attribute + */ +gboolean +node_has_attr(const char *node, const char *name, const char *value) +{ + GString *xpath = NULL; + xmlNode *match; + + CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) + && (value != NULL), return FALSE); + + /* Search for the node's attributes in the CIB. While the schema allows + * multiple sets of instance attributes, and allows instance attributes to + * use id-ref to reference values elsewhere, that is intended for resources, + * so we ignore that here. + */ + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS + "/" XML_CIB_TAG_NVPAIR + "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " + "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); + + match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); + + g_string_free(xpath, TRUE); + return (match != NULL); +} + +static void +add_topology_level(xmlNode *match) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(match != NULL, return); + + fenced_register_level(match, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); + pcmk__reset_result(&result); + free(desc); +} + +static void +topology_remove_helper(const char *node, int level) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); + + crm_xml_add(data, F_STONITH_ORIGIN, __func__); + crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + fenced_unregister_level(data, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); + pcmk__reset_result(&result); + free_xml(data); + free(desc); +} + +static void +remove_topology_level(xmlNode *match) +{ + int index = 0; + char *key = NULL; + + CRM_CHECK(match != NULL, return); + + key = stonith_level_key(match, fenced_target_by_unknown); + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + topology_remove_helper(key, index); + free(key); +} + +static void +register_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + remove_topology_level(match); + add_topology_level(match); + } +} + +/* Fencing +<diff crm_feature_set="3.0.6"> + <diff-removed> + <fencing-topology> + <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/> + <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/> + <fencing-level devices="disk,network" id="f-p2.1"/> + </fencing-topology> + </diff-removed> + <diff-added> + <fencing-topology> + <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/> + <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/> + <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/> + </fencing-topology> + </diff-added> +</diff> +*/ + +void +fencing_topology_init(void) +{ + xmlXPathObjectPtr xpathObj = NULL; + const char *xpath = "//" XML_TAG_FENCING_LEVEL; + + crm_trace("Full topology refresh"); + free_topology_list(); + init_topology_list(); + + /* Grab everything */ + xpathObj = xpath_search(local_cib, xpath); + register_fencing_topology(xpathObj); + + freeXpathObject(xpathObj); +} + +static void +remove_cib_device(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + } + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + rsc_id = crm_element_value(match, XML_ATTR_ID); + + stonith_device_remove(rsc_id, true); + } +} + +static void +update_stonith_watchdog_timeout_ms(xmlNode *cib) +{ + long timeout_ms = 0; + xmlNode *stonith_watchdog_xml = NULL; + const char *value = NULL; + + stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", + cib, LOG_NEVER); + if (stonith_watchdog_xml) { + value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); + } + if (value) { + timeout_ms = crm_get_msec(value); + } + + if (timeout_ms < 0) { + timeout_ms = pcmk__auto_watchdog_timeout(); + } + + stonith_watchdog_timeout_ms = timeout_ms; +} + +/*! + * \internal + * \brief Update all STONITH device definitions based on current CIB + */ +static void +cib_devices_update(void) +{ + GHashTableIter iter; + stonith_device_t *device = NULL; + + crm_info("Updating devices to version %s.%s.%s", + crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(local_cib, XML_ATTR_GENERATION), + crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->cib_registered) { + device->dirty = TRUE; + } + } + + /* have list repopulated if cib has a watchdog-fencing-resource + TODO: keep a cached list for queries happening while we are refreshing + */ + g_list_free_full(stonith_watchdog_targets, free); + stonith_watchdog_targets = NULL; + + fenced_scheduler_run(local_cib); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->dirty) { + g_hash_table_iter_remove(&iter); + } + } +} + +static void +update_cib_stonith_devices_v1(const char *event, xmlNode * msg) +{ + const char *reason = "none"; + gboolean needs_update = FALSE; + xmlXPathObjectPtr xpath_obj = NULL; + + /* process new constraints */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + /* Safest and simplest to always recompute */ + needs_update = TRUE; + reason = "new location constraint"; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpath_obj, lpc); + + crm_log_xml_trace(match, "new constraint"); + } + } + freeXpathObject(xpath_obj); + + /* process deletions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + remove_cib_device(xpath_obj); + } + freeXpathObject(xpath_obj); + + /* process additions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpath_obj, lpc); + + rsc_id = crm_element_value(match, XML_ATTR_ID); + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + crm_trace("Fencing resource %s was added or modified", rsc_id); + reason = "new resource"; + needs_update = TRUE; + } + } + freeXpathObject(xpath_obj); + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } +} + +static void +update_cib_stonith_devices_v2(const char *event, xmlNode * msg) +{ + xmlNode *change = NULL; + char *reason = NULL; + bool needs_update = FALSE; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + const char *shortpath = NULL; + + if ((op == NULL) || + (strcmp(op, "move") == 0) || + strstr(xpath, "/"XML_CIB_TAG_STATUS)) { + continue; + } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { + const char *rsc_id = NULL; + char *search = NULL; + char *mutable = NULL; + + if (strstr(xpath, XML_TAG_ATTR_SETS) || + strstr(xpath, XML_TAG_META_SETS)) { + needs_update = TRUE; + pcmk__str_update(&reason, + "(meta) attribute deleted from resource"); + break; + } + pcmk__str_update(&mutable, xpath); + rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); + if (rsc_id != NULL) { + rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); + search = strchr(rsc_id, '\''); + } + if (search != NULL) { + *search = 0; + stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { + crm_warn("Ignoring malformed CIB update (resource deletion)"); + } + free(mutable); + + } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || + strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || + strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { + shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); + reason = crm_strdup_printf("%s %s", op, shortpath+1); + needs_update = TRUE; + break; + } + } + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } else { + crm_trace("No updates for device list found in CIB"); + } + free(reason); +} + +static void +update_cib_stonith_devices(const char *event, xmlNode * msg) +{ + int format = 1; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + switch(format) { + case 1: + update_cib_stonith_devices_v1(event, msg); + break; + case 2: + update_cib_stonith_devices_v2(event, msg); + break; + default: + crm_warn("Unknown patch format: %d", format); + } +} + +static void +watchdog_device_update(void) +{ + if (stonith_watchdog_timeout_ms > 0) { + if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && + !stonith_watchdog_targets) { + /* getting here watchdog-fencing enabled, no device there yet + and reason isn't stonith_watchdog_targets preventing that + */ + int rc; + xmlNode *xml; + + xml = create_device_registration_xml( + STONITH_WATCHDOG_ID, + st_namespace_internal, + STONITH_WATCHDOG_AGENT, + NULL, /* stonith_device_register will add our + own name as PCMK_STONITH_HOST_LIST param + so we can skip that here + */ + NULL); + rc = stonith_device_register(xml, TRUE); + free_xml(xml); + if (rc != pcmk_ok) { + rc = pcmk_legacy2rc(rc); + exit_code = CRM_EX_FATAL; + crm_crit("Cannot register watchdog pseudo fence agent: %s", + pcmk_rc_str(rc)); + stonith_shutdown(0); + } + } + + } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { + /* be silent if no device - todo parameter to stonith_device_remove */ + stonith_device_remove(STONITH_WATCHDOG_ID, true); + } +} + +/*! + * \internal + * \brief Query the full CIB + * + * \return Standard Pacemaker return code + */ +static int +fenced_query_cib(void) +{ + int rc = pcmk_ok; + + crm_trace("Re-requesting full CIB"); + rc = cib_api->cmds->query(cib_api, NULL, &local_cib, + cib_scope_local|cib_sync_call); + rc = pcmk_legacy2rc(rc); + if (rc == pcmk_rc_ok) { + CRM_ASSERT(local_cib != NULL); + } else { + crm_err("Couldn't retrieve the CIB: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + return rc; +} + +static void +remove_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if (match && crm_element_value(match, XML_DIFF_MARKER)) { + /* Deletion */ + int index = 0; + char *target = stonith_level_key(match, fenced_target_by_unknown); + + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + if (target == NULL) { + crm_err("Invalid fencing target in element %s", ID(match)); + + } else if (index <= 0) { + crm_err("Invalid level for %s in element %s", target, ID(match)); + + } else { + topology_remove_helper(target, index); + } + /* } else { Deal with modifications during the 'addition' stage */ + } + } +} + +static void +update_fencing_topology(const char *event, xmlNode * msg) +{ + int format = 1; + const char *xpath; + xmlXPathObjectPtr xpathObj = NULL; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + + if(format == 1) { + /* Process deletions (only) */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + remove_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + /* Process additions and changes */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + register_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + } else if(format == 2) { + xmlNode *change = NULL; + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + + xml_patch_versions(patchset, add, del); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + if(op == NULL) { + continue; + + } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { + /* Change to a specific entry */ + + crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); + if(strcmp(op, "move") == 0) { + continue; + + } else if(strcmp(op, "create") == 0) { + add_topology_level(change->children); + + } else if(strcmp(op, "modify") == 0) { + xmlNode *match = first_named_child(change, XML_DIFF_RESULT); + + if(match) { + remove_topology_level(match->children); + add_topology_level(match->children); + } + + } else if(strcmp(op, "delete") == 0) { + /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ + crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { + /* Change to the topology in general */ + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + + } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { + /* Changes to the whole config section, possibly including the topology as a whild */ + if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + + } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + } + } + + } else { + crm_warn("Unknown patch format: %d", format); + } +} + +static void +update_cib_cache_cb(const char *event, xmlNode * msg) +{ + long timeout_ms_saved = stonith_watchdog_timeout_ms; + bool need_full_refresh = false; + + if(!have_cib_devices) { + crm_trace("Skipping updates until we get a full dump"); + return; + + } else if(msg == NULL) { + crm_trace("Missing %s update", event); + return; + } + + /* Maintain a local copy of the CIB so that we have full access + * to device definitions, location constraints, and node attributes + */ + if (local_cib != NULL) { + int rc = pcmk_ok; + xmlNode *patchset = NULL; + + crm_element_value_int(msg, F_CIB_RC, &rc); + if (rc != pcmk_ok) { + return; + } + + patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + rc = xml_apply_patchset(local_cib, patchset, TRUE); + switch (rc) { + case pcmk_ok: + case -pcmk_err_old_data: + break; + case -pcmk_err_diff_resync: + case -pcmk_err_diff_failed: + crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + break; + default: + crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + } + } + + if (local_cib == NULL) { + if (fenced_query_cib() != pcmk_rc_ok) { + return; + } + need_full_refresh = true; + } + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + if (timeout_ms_saved != stonith_watchdog_timeout_ms) { + need_full_refresh = true; + } + + if (need_full_refresh) { + fencing_topology_init(); + cib_devices_update(); + } else { + // Partial refresh + update_fencing_topology(event, msg); + update_cib_stonith_devices(event, msg); + } + + watchdog_device_update(); +} + +static void +init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + crm_info("Updating device list from CIB"); + have_cib_devices = TRUE; + local_cib = copy_xml(output); + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + fencing_topology_init(); + cib_devices_update(); + watchdog_device_update(); +} + +static void +cib_connection_destroy(gpointer user_data) +{ + if (stonith_shutdown_flag) { + crm_info("Connection to the CIB manager closed"); + return; + } else { + crm_crit("Lost connection to the CIB manager, shutting down"); + } + if (cib_api) { + cib_api->cmds->signoff(cib_api); + } + stonith_shutdown(0); +} + +/*! + * \internal + * \brief Disconnect from CIB manager + */ +void +fenced_cib_cleanup(void) +{ + if (cib_api != NULL) { + cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, + update_cib_cache_cb); + cib__clean_up_connection(&cib_api); + } + free_xml(local_cib); + local_cib = NULL; +} + +void +setup_cib(void) +{ + int rc, retries = 0; + + cib_api = cib_new(); + if (cib_api == NULL) { + crm_err("No connection to the CIB manager"); + return; + } + + do { + sleep(retries); + rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); + } while (rc == -ENOTCONN && ++retries < 5); + + if (rc != pcmk_ok) { + crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); + + } else if (pcmk_ok != + cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { + crm_err("Could not set CIB notification callback"); + + } else { + rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); + cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", + init_cib_cache_cb); + cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); + crm_info("Watching for fencing topology changes"); + } +} diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index ba63cf8..7a62ed6 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -68,8 +68,6 @@ struct device_search_s { static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); -static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, - pcmk__client_t *client); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); @@ -124,7 +122,7 @@ static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing - && pcmk__str_eq(action, "on", pcmk__str_none); + && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int @@ -223,11 +221,11 @@ get_action_timeout(const stonith_device_t *device, const char *action, /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ - if (pcmk__str_eq(action, "reboot", pcmk__str_none) + if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); - action = "off"; + action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ @@ -277,7 +275,7 @@ fenced_device_reboot_action(const char *device_id) action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } - return pcmk__s(action, "reboot"); + return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! @@ -554,7 +552,7 @@ stonith_device_execute(stonith_device_t * device) #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { - if (pcmk__str_eq(cmd->action, "stop", pcmk__str_none)) { + if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); @@ -570,14 +568,14 @@ stonith_device_execute(stonith_device_t * device) #endif action_str = cmd->action; - if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_none) + if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); - action_str = "off"; + action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { @@ -691,7 +689,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) delay_base = delay_max; } if (delay_max > 0) { - // coverity[dont_call] We're not using rand() for security + // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; @@ -948,16 +946,16 @@ read_action_metadata(stonith_device_t *device) action = crm_element_value(match, "name"); - if (pcmk__str_eq(action, "list", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); - } else if (pcmk__str_eq(action, "status", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); - } else if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); - } else if (pcmk__str_eq(action, "on", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* "automatic" means the cluster will unfence node when it joins */ /* "required" is a deprecated synonym for "automatic" */ if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { @@ -1024,16 +1022,16 @@ xml2device_params(const char *name, const xmlNode *dev) if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); - } else if (strcmp(value, "reboot") == 0) { + } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); - } else if (strcmp(value, "off") == 0) { - map_action(params, "reboot", value); + } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { + map_action(params, PCMK_ACTION_REBOOT, value); } else { - map_action(params, "off", value); - map_action(params, "reboot", value); + map_action(params, PCMK_ACTION_OFF, value); + map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); @@ -1132,7 +1130,7 @@ build_device_from_xml(xmlNode *dev) device->automatic_unfencing = TRUE; } - if (is_action_required("on", device)) { + if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } @@ -1672,8 +1670,7 @@ unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, * search by xpath, because it might give multiple hits if the XML is the * CIB. */ - if ((xml != NULL) - && !pcmk__str_eq(TYPE(xml), XML_TAG_FENCING_LEVEL, pcmk__str_none)) { + if ((xml != NULL) && !pcmk__xe_is(xml, XML_TAG_FENCING_LEVEL)) { xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING); } @@ -1972,7 +1969,7 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) "Watchdog fence device not configured"); return; - } else if (pcmk__str_eq(action, "list", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, @@ -1980,7 +1977,7 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) NULL); return; - } else if (pcmk__str_eq(action, "monitor", pcmk__str_none)) { + } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } @@ -1994,7 +1991,8 @@ execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) "'%s' not found", id); return; - } else if (!device->api_registered && !strcmp(action, "monitor")) { + } else if (!device->api_registered + && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); @@ -2104,14 +2102,14 @@ localhost_is_eligible_with_remap(const stonith_device_t *device, // Check potential remaps - if (pcmk__str_eq(action, "reboot", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ - if (localhost_is_eligible(device, "off", target, allow_self) - || localhost_is_eligible(device, "on", target, FALSE)) { + if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) + || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } @@ -2146,7 +2144,7 @@ can_fence_host_with_device(stonith_device_t *dev, /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ - if (pcmk__str_eq(action, "on", pcmk__str_none) + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; @@ -2175,7 +2173,8 @@ can_fence_host_with_device(stonith_device_t *dev, time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { - int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout); + int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, + search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", @@ -2185,7 +2184,7 @@ can_fence_host_with_device(stonith_device_t *dev, crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); - schedule_internal_command(__func__, dev, "list", NULL, + schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ @@ -2207,7 +2206,7 @@ can_fence_host_with_device(stonith_device_t *dev, crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); - schedule_internal_command(__func__, dev, "status", target, + schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; @@ -2384,6 +2383,30 @@ add_action_reply(xmlNode *xml, const char *action, add_disallowed(child, action, device, target, allow_suicide); } +/*! + * \internal + * \brief Send a reply to a CPG peer or IPC client + * + * \param[in] reply XML reply to send + * \param[in] call_options Send synchronously if st_opt_sync_call is set + * \param[in] remote_peer If not NULL, name of peer node to send CPG reply + * \param[in,out] client If not NULL, client to send IPC reply + */ +static void +stonith_send_reply(const xmlNode *reply, int call_options, + const char *remote_peer, pcmk__client_t *client) +{ + CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), + return); + + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { + send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, + reply, FALSE); + } +} + static void stonith_query_capable_device_cb(GList * devices, void *user_data) { @@ -2429,15 +2452,16 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) - && pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, + pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); - action = "off"; + action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); - if (pcmk__str_eq(query->action, "reboot", pcmk__str_none)) { + if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. @@ -2451,9 +2475,9 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); - add_action_reply(dev, "off", device, query->target, + add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); - add_action_reply(dev, "on", device, query->target, FALSE); + add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ @@ -2765,8 +2789,10 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) /* The device is ready to do something else now */ if (device) { - if (!device->verified && pcmk__result_ok(result) && - (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { + if (!device->verified && pcmk__result_ok(result) + && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, + PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, + NULL)) { device->verified = TRUE; } @@ -3052,30 +3078,6 @@ check_alternate_host(const char *target) return NULL; } -/*! - * \internal - * \brief Send a reply to a CPG peer or IPC client - * - * \param[in] reply XML reply to send - * \param[in] call_options Send synchronously if st_opt_sync_call is set - * \param[in] remote_peer If not NULL, name of peer node to send CPG reply - * \param[in,out] client If not NULL, client to send IPC reply - */ -static void -stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, - pcmk__client_t *client) -{ - CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), - return); - - if (remote_peer == NULL) { - do_local_reply(reply, client, call_options); - } else { - send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, - reply, FALSE); - } -} - static void remove_relay_op(xmlNode * request) { diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index dc67947..843b3d4 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -292,7 +292,7 @@ init_stonith_remote_op_hash_table(GHashTable **table) static const char * op_requested_action(const remote_fencing_op_t *op) { - return ((op->phase > st_phase_requested)? "reboot" : op->action); + return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! @@ -311,7 +311,7 @@ op_phase_off(remote_fencing_op_t *op) /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ - strcpy(op->action, "off"); + strcpy(op->action, PCMK_ACTION_OFF); } /*! @@ -329,7 +329,7 @@ op_phase_on(remote_fencing_op_t *op) "remapping to 'on' for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; - strcpy(op->action, "on"); + strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. @@ -362,7 +362,7 @@ undo_op_remap(remote_fencing_op_t *op) crm_info("Undoing remap of reboot targeting %s for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; - strcpy(op->action, "reboot"); + strcpy(op->action, PCMK_ACTION_REBOOT); } } @@ -673,8 +673,8 @@ remote_op_timeout_one(gpointer userdata) "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); @@ -961,12 +961,12 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok) set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level - if (op->level > 1 && op->delay > 0) { - op->delay = 0; + if ((op->level > 1) && (op->client_delay > 0)) { + op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) - && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) { + && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about @@ -1163,7 +1163,7 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays - crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + crm_element_value_int(request, F_STONITH_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); @@ -1474,8 +1474,8 @@ get_device_timeout(const remote_fencing_op_t *op, return op->base_timeout; } - // op->delay < 0 means disable any static/random fencing delays - if (with_delay && op->delay >= 0) { + // op->client_delay < 0 means disable any static/random fencing delays + if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max delay = (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); @@ -1541,7 +1541,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, GList *iter = NULL; GList *auto_list = NULL; - if (pcmk__str_eq(op->action, "on", pcmk__str_none) + if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } @@ -1620,7 +1620,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, * up the total timeout. */ return ((total_timeout ? total_timeout : op->base_timeout) - + (op->delay > 0 ? op->delay : 0)); + + ((op->client_delay > 0)? op->client_delay : 0)); } static void @@ -1695,7 +1695,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) - && pcmk__str_eq(op->action, "on", pcmk__str_none)) { + && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); @@ -1724,8 +1724,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, op->target, op->client_name, op->originator); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; } request_peer_fencing(op, NULL); @@ -1794,7 +1794,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) * node back on when we should. */ device = op->devices->data; - if (pcmk__str_eq(fenced_device_reboot_action(device), "off", + if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", @@ -1844,13 +1844,16 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) } if (peer) { - /* Take any requested fencing delay into account to prevent it from eating - * up the timeout. - */ - int timeout_one = (op->delay > 0 ? - TIMEOUT_MULTIPLY_FACTOR * op->delay : 0); + int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + if (op->client_delay > 0) { + /* Take requested fencing delay into account to prevent it from + * eating up the timeout. + */ + timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; + } + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); @@ -1859,7 +1862,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); - crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * @@ -2097,7 +2100,7 @@ parse_action_specific(const xmlNode *xml, const char *peer, const char *device, } /* Handle devices with automatic unfencing */ - if (pcmk__str_eq(action, "on", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); @@ -2160,11 +2163,11 @@ add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ - if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) { - parse_action_specific(child, peer->host, device, "off", + if (pcmk__str_eq(ID(child), PCMK_ACTION_OFF, pcmk__str_none)) { + parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); - } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) { - parse_action_specific(child, peer->host, device, "on", + } else if (pcmk__str_eq(ID(child), PCMK_ACTION_ON, pcmk__str_none)) { + parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } diff --git a/daemons/fenced/fenced_scheduler.c b/daemons/fenced/fenced_scheduler.c new file mode 100644 index 0000000..27d990f --- /dev/null +++ b/daemons/fenced/fenced_scheduler.c @@ -0,0 +1,225 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +*/ + +#include <crm_internal.h> + +#include <stdio.h> +#include <errno.h> +#include <glib.h> + +#include <crm/pengine/status.h> +#include <crm/pengine/internal.h> + +#include <pacemaker-internal.h> +#include <pacemaker-fenced.h> + +static pcmk_scheduler_t *scheduler = NULL; + +/*! + * \internal + * \brief Initialize scheduler data for fencer purposes + * + * \return Standard Pacemaker return code + */ +int +fenced_scheduler_init(void) +{ + pcmk__output_t *logger = NULL; + int rc = pcmk__log_output_new(&logger); + + if (rc != pcmk_rc_ok) { + return rc; + } + + scheduler = pe_new_working_set(); + if (scheduler == NULL) { + pcmk__output_free(logger); + return ENOMEM; + } + + pe__register_messages(logger); + pcmk__register_lib_messages(logger); + pcmk__output_set_log_level(logger, LOG_TRACE); + scheduler->priv = logger; + + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Free all scheduler-related resources + */ +void +fenced_scheduler_cleanup(void) +{ + if (scheduler != NULL) { + pcmk__output_t *logger = scheduler->priv; + + if (logger != NULL) { + logger->finish(logger, CRM_EX_OK, true, NULL); + pcmk__output_free(logger); + scheduler->priv = NULL; + } + pe_free_working_set(scheduler); + scheduler = NULL; + } +} + +/*! + * \internal + * \brief Check whether the local node is in a resource's allowed node list + * + * \param[in] rsc Resource to check + * + * \return Pointer to node if found, otherwise NULL + */ +static pcmk_node_t * +local_node_allowed_for(const pcmk_resource_t *rsc) +{ + if ((rsc != NULL) && (stonith_our_uname != NULL)) { + GHashTableIter iter; + pcmk_node_t *node = NULL; + + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { + if (pcmk__str_eq(node->details->uname, stonith_our_uname, + pcmk__str_casei)) { + return node; + } + } + } + return NULL; +} + +/*! + * \internal + * \brief If a given resource or any of its children are fencing devices, + * register the devices + * + * \param[in,out] data Resource to check + * \param[in,out] user_data Ignored + */ +static void +register_if_fencing_device(gpointer data, gpointer user_data) +{ + pcmk_resource_t *rsc = data; + + xmlNode *xml = NULL; + GHashTableIter hash_iter; + pcmk_node_t *node = NULL; + const char *name = NULL; + const char *value = NULL; + const char *rclass = NULL; + const char *agent = NULL; + const char *rsc_provides = NULL; + stonith_key_value_t *params = NULL; + + // If this is a collective resource, check children instead + if (rsc->children != NULL) { + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + register_if_fencing_device(iter->data, NULL); + if (pe_rsc_is_clone(rsc)) { + return; // Only one instance needs to be checked for clones + } + } + return; + } + + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + return; // Not a fencing device + } + + if (pe__resource_is_disabled(rsc)) { + crm_info("Ignoring fencing device %s because it is disabled", rsc->id); + return; + } + + if ((stonith_watchdog_timeout_ms <= 0) && + pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { + crm_info("Ignoring fencing device %s " + "because watchdog fencing is disabled", rsc->id); + return; + } + + // Check whether local node is allowed to run resource + node = local_node_allowed_for(rsc); + if (node == NULL) { + crm_info("Ignoring fencing device %s " + "because local node is not allowed to run it", rsc->id); + return; + } + if (node->weight < 0) { + crm_info("Ignoring fencing device %s " + "because local node has preference %s for it", + rsc->id, pcmk_readable_score(node->weight)); + return; + } + + // If device is in a group, check whether local node is allowed for group + if ((rsc->parent != NULL) + && (rsc->parent->variant == pcmk_rsc_variant_group)) { + pcmk_node_t *group_node = local_node_allowed_for(rsc->parent); + + if ((group_node != NULL) && (group_node->weight < 0)) { + crm_info("Ignoring fencing device %s " + "because local node has preference %s for its group", + rsc->id, pcmk_readable_score(group_node->weight)); + return; + } + } + + crm_debug("Reloading configuration of fencing device %s", rsc->id); + + agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); + + get_meta_attributes(rsc->meta, rsc, node, scheduler); + rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); + + g_hash_table_iter_init(&hash_iter, pe_rsc_params(rsc, node, scheduler)); + while (g_hash_table_iter_next(&hash_iter, (gpointer *) &name, + (gpointer *) &value)) { + if ((name == NULL) || (value == NULL)) { + continue; + } + params = stonith_key_value_add(params, name, value); + } + + xml = create_device_registration_xml(pcmk__s(rsc->clone_name, rsc->id), + st_namespace_any, agent, params, + rsc_provides); + stonith_key_value_freeall(params, 1, 1); + CRM_ASSERT(stonith_device_register(xml, TRUE) == pcmk_ok); + free_xml(xml); +} + +/*! + * \internal + * \brief Run the scheduler for fencer purposes + * + * \param[in] cib Cluster's current CIB + */ +void +fenced_scheduler_run(xmlNode *cib) +{ + CRM_CHECK((cib != NULL) && (scheduler != NULL), return); + + if (scheduler->now != NULL) { + crm_time_free(scheduler->now); + scheduler->now = NULL; + } + scheduler->localhost = stonith_our_uname; + pcmk__schedule_actions(cib, pcmk_sched_location_only + |pcmk_sched_no_compat + |pcmk_sched_no_counts, scheduler); + g_list_foreach(scheduler->resources, register_if_fencing_device, NULL); + + scheduler->input = NULL; // Wasn't a copy, so don't let API free it + pe_reset_working_set(scheduler); +} diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 4edda6c..7c69fb8 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -27,7 +27,6 @@ #include <crm/common/ipc.h> #include <crm/common/ipc_internal.h> #include <crm/common/output_internal.h> -#include <crm/cluster/internal.h> #include <crm/stonith-ng.h> #include <crm/fencing/internal.h> @@ -37,8 +36,6 @@ #include <crm/common/mainloop.h> #include <crm/cib/internal.h> -#include <crm/pengine/status.h> -#include <pacemaker-internal.h> #include <pacemaker-fenced.h> @@ -51,18 +48,9 @@ GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; -static gboolean stonith_shutdown_flag = FALSE; +gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; -static xmlNode *local_cib = NULL; -static pe_working_set_t *fenced_data_set = NULL; -static const unsigned long long data_set_flags = pe_flag_quick_location - | pe_flag_no_compat - | pe_flag_no_counts; - -static cib_t *cib_api = NULL; - -static pcmk__output_t *logger_out = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { @@ -77,9 +65,8 @@ static struct { gchar **log_files; } options; -static crm_exit_t exit_code = CRM_EX_OK; +crm_exit_t exit_code = CRM_EX_OK; -static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t @@ -241,7 +228,8 @@ stonith_peer_cs_destroy(gpointer user_data) #endif void -do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options) +do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, + int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; @@ -292,7 +280,7 @@ static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { - xmlNode *update_msg = user_data; + const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; @@ -443,589 +431,6 @@ fenced_send_level_notification(const char *op, send_config_notification(op, result, desc, g_hash_table_size(topology)); } -static void -topology_remove_helper(const char *node, int level) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); - - crm_xml_add(data, F_STONITH_ORIGIN, __func__); - crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); - crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); - - fenced_unregister_level(data, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); - pcmk__reset_result(&result); - free_xml(data); - free(desc); -} - -static void -remove_cib_device(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if(match != NULL) { - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - } - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - rsc_id = crm_element_value(match, XML_ATTR_ID); - - stonith_device_remove(rsc_id, true); - } -} - -static void -remove_topology_level(xmlNode *match) -{ - int index = 0; - char *key = NULL; - - CRM_CHECK(match != NULL, return); - - key = stonith_level_key(match, fenced_target_by_unknown); - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - topology_remove_helper(key, index); - free(key); -} - -static void -add_topology_level(xmlNode *match) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - - CRM_CHECK(match != NULL, return); - - fenced_register_level(match, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); - pcmk__reset_result(&result); - free(desc); -} - -static void -remove_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if (match && crm_element_value(match, XML_DIFF_MARKER)) { - /* Deletion */ - int index = 0; - char *target = stonith_level_key(match, fenced_target_by_unknown); - - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - if (target == NULL) { - crm_err("Invalid fencing target in element %s", ID(match)); - - } else if (index <= 0) { - crm_err("Invalid level for %s in element %s", target, ID(match)); - - } else { - topology_remove_helper(target, index); - } - /* } else { Deal with modifications during the 'addition' stage */ - } - } -} - -static void -register_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - remove_topology_level(match); - add_topology_level(match); - } -} - -/* Fencing -<diff crm_feature_set="3.0.6"> - <diff-removed> - <fencing-topology> - <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/> - <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/> - <fencing-level devices="disk,network" id="f-p2.1"/> - </fencing-topology> - </diff-removed> - <diff-added> - <fencing-topology> - <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/> - <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/> - <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/> - </fencing-topology> - </diff-added> -</diff> -*/ - -static void -fencing_topology_init(void) -{ - xmlXPathObjectPtr xpathObj = NULL; - const char *xpath = "//" XML_TAG_FENCING_LEVEL; - - crm_trace("Full topology refresh"); - free_topology_list(); - init_topology_list(); - - /* Grab everything */ - xpathObj = xpath_search(local_cib, xpath); - register_fencing_topology(xpathObj); - - freeXpathObject(xpathObj); -} - -#define rsc_name(x) x->clone_name?x->clone_name:x->id - -/*! - * \internal - * \brief Check whether our uname is in a resource's allowed node list - * - * \param[in] rsc Resource to check - * - * \return Pointer to node object if found, NULL otherwise - */ -static pe_node_t * -our_node_allowed_for(const pe_resource_t *rsc) -{ - GHashTableIter iter; - pe_node_t *node = NULL; - - if (rsc && stonith_our_uname) { - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { - break; - } - node = NULL; - } - } - return node; -} - -static void -watchdog_device_update(void) -{ - if (stonith_watchdog_timeout_ms > 0) { - if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && - !stonith_watchdog_targets) { - /* getting here watchdog-fencing enabled, no device there yet - and reason isn't stonith_watchdog_targets preventing that - */ - int rc; - xmlNode *xml; - - xml = create_device_registration_xml( - STONITH_WATCHDOG_ID, - st_namespace_internal, - STONITH_WATCHDOG_AGENT, - NULL, /* stonith_device_register will add our - own name as PCMK_STONITH_HOST_LIST param - so we can skip that here - */ - NULL); - rc = stonith_device_register(xml, TRUE); - free_xml(xml); - if (rc != pcmk_ok) { - rc = pcmk_legacy2rc(rc); - exit_code = CRM_EX_FATAL; - crm_crit("Cannot register watchdog pseudo fence agent: %s", - pcmk_rc_str(rc)); - stonith_shutdown(0); - } - } - - } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { - /* be silent if no device - todo parameter to stonith_device_remove */ - stonith_device_remove(STONITH_WATCHDOG_ID, true); - } -} - -static void -update_stonith_watchdog_timeout_ms(xmlNode *cib) -{ - long timeout_ms = 0; - xmlNode *stonith_watchdog_xml = NULL; - const char *value = NULL; - - stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", - cib, LOG_NEVER); - if (stonith_watchdog_xml) { - value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); - } - if (value) { - timeout_ms = crm_get_msec(value); - } - - if (timeout_ms < 0) { - timeout_ms = pcmk__auto_watchdog_timeout(); - } - - stonith_watchdog_timeout_ms = timeout_ms; -} - -/*! - * \internal - * \brief If a resource or any of its children are STONITH devices, update their - * definitions given a cluster working set. - * - * \param[in,out] rsc Resource to check - * \param[in,out] data_set Cluster working set with device information - */ -static void -cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) -{ - pe_node_t *node = NULL; - const char *value = NULL; - const char *rclass = NULL; - pe_node_t *parent = NULL; - - /* If this is a complex resource, check children rather than this resource itself. */ - if(rsc->children) { - GList *gIter = NULL; - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { - cib_device_update(gIter->data, data_set); - if(pe_rsc_is_clone(rsc)) { - crm_trace("Only processing one copy of the clone %s", rsc->id); - break; - } - } - return; - } - - /* We only care about STONITH resources. */ - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - return; - } - - /* If this STONITH resource is disabled, remove it. */ - if (pe__resource_is_disabled(rsc)) { - crm_info("Device %s has been disabled", rsc->id); - return; - } - - /* if watchdog-fencing is disabled handle any watchdog-fence - resource as if it was disabled - */ - if ((stonith_watchdog_timeout_ms <= 0) && - pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { - crm_info("Watchdog-fencing disabled thus handling " - "device %s as disabled", rsc->id); - return; - } - - /* Check whether our node is allowed for this resource (and its parent if in a group) */ - node = our_node_allowed_for(rsc); - if (rsc->parent && (rsc->parent->variant == pe_group)) { - parent = our_node_allowed_for(rsc->parent); - } - - if(node == NULL) { - /* Our node is disallowed, so remove the device */ - GHashTableIter iter; - - crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - crm_trace("Available: %s = %d", pe__node_name(node), node->weight); - } - - return; - - } else if(node->weight < 0 || (parent && parent->weight < 0)) { - /* Our node (or its group) is disallowed by score, so remove the device */ - int score = (node->weight < 0)? node->weight : parent->weight; - - crm_info("Device %s has been disabled on %s: score=%s", - rsc->id, stonith_our_uname, pcmk_readable_score(score)); - return; - - } else { - /* Our node is allowed, so update the device information */ - int rc; - xmlNode *data; - GHashTable *rsc_params = NULL; - GHashTableIter gIter; - stonith_key_value_t *params = NULL; - - const char *name = NULL; - const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); - const char *rsc_provides = NULL; - - crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); - rsc_params = pe_rsc_params(rsc, node, data_set); - get_meta_attributes(rsc->meta, rsc, node, data_set); - - rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); - - g_hash_table_iter_init(&gIter, rsc_params); - while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { - if (!name || !value) { - continue; - } - params = stonith_key_value_add(params, name, value); - crm_trace(" %s=%s", name, value); - } - - data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, - agent, params, rsc_provides); - stonith_key_value_freeall(params, 1, 1); - rc = stonith_device_register(data, TRUE); - CRM_ASSERT(rc == pcmk_ok); - free_xml(data); - } -} - -/*! - * \internal - * \brief Update all STONITH device definitions based on current CIB - */ -static void -cib_devices_update(void) -{ - GHashTableIter iter; - stonith_device_t *device = NULL; - - crm_info("Updating devices to version %s.%s.%s", - crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), - crm_element_value(local_cib, XML_ATTR_GENERATION), - crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); - - if (fenced_data_set->now != NULL) { - crm_time_free(fenced_data_set->now); - fenced_data_set->now = NULL; - } - fenced_data_set->localhost = stonith_our_uname; - pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->cib_registered) { - device->dirty = TRUE; - } - } - - /* have list repopulated if cib has a watchdog-fencing-resource - TODO: keep a cached list for queries happening while we are refreshing - */ - g_list_free_full(stonith_watchdog_targets, free); - stonith_watchdog_targets = NULL; - g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->dirty) { - g_hash_table_iter_remove(&iter); - } - } - - fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it - pe_reset_working_set(fenced_data_set); -} - -static void -update_cib_stonith_devices_v2(const char *event, xmlNode * msg) -{ - xmlNode *change = NULL; - char *reason = NULL; - bool needs_update = FALSE; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - const char *shortpath = NULL; - - if ((op == NULL) || - (strcmp(op, "move") == 0) || - strstr(xpath, "/"XML_CIB_TAG_STATUS)) { - continue; - } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { - const char *rsc_id = NULL; - char *search = NULL; - char *mutable = NULL; - - if (strstr(xpath, XML_TAG_ATTR_SETS) || - strstr(xpath, XML_TAG_META_SETS)) { - needs_update = TRUE; - pcmk__str_update(&reason, - "(meta) attribute deleted from resource"); - break; - } - pcmk__str_update(&mutable, xpath); - rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); - if (rsc_id != NULL) { - rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); - search = strchr(rsc_id, '\''); - } - if (search != NULL) { - *search = 0; - stonith_device_remove(rsc_id, true); - /* watchdog_device_update called afterwards - to fall back to implicit definition if needed */ - } else { - crm_warn("Ignoring malformed CIB update (resource deletion)"); - } - free(mutable); - - } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || - strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || - strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { - shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); - reason = crm_strdup_printf("%s %s", op, shortpath+1); - needs_update = TRUE; - break; - } - } - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } else { - crm_trace("No updates for device list found in CIB"); - } - free(reason); -} - - -static void -update_cib_stonith_devices_v1(const char *event, xmlNode * msg) -{ - const char *reason = "none"; - gboolean needs_update = FALSE; - xmlXPathObjectPtr xpath_obj = NULL; - - /* process new constraints */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - /* Safest and simplest to always recompute */ - needs_update = TRUE; - reason = "new location constraint"; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpath_obj, lpc); - - crm_log_xml_trace(match, "new constraint"); - } - } - freeXpathObject(xpath_obj); - - /* process deletions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - remove_cib_device(xpath_obj); - } - freeXpathObject(xpath_obj); - - /* process additions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpath_obj, lpc); - - rsc_id = crm_element_value(match, XML_ATTR_ID); - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - crm_trace("Fencing resource %s was added or modified", rsc_id); - reason = "new resource"; - needs_update = TRUE; - } - } - freeXpathObject(xpath_obj); - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } -} - -static void -update_cib_stonith_devices(const char *event, xmlNode * msg) -{ - int format = 1; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, "format", &format); - switch(format) { - case 1: - update_cib_stonith_devices_v1(event, msg); - break; - case 2: - update_cib_stonith_devices_v2(event, msg); - break; - default: - crm_warn("Unknown patch format: %d", format); - } -} - -/*! - * \internal - * \brief Check whether a node has a specific attribute name/value - * - * \param[in] node Name of node to check - * \param[in] name Name of an attribute to look for - * \param[in] value The value the named attribute needs to be set to in order to be considered a match - * - * \return TRUE if the locally cached CIB has the specified node attribute - */ -gboolean -node_has_attr(const char *node, const char *name, const char *value) -{ - GString *xpath = NULL; - xmlNode *match; - - CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) - && (value != NULL), return FALSE); - - /* Search for the node's attributes in the CIB. While the schema allows - * multiple sets of instance attributes, and allows instance attributes to - * use id-ref to reference values elsewhere, that is intended for resources, - * so we ignore that here. - */ - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, - "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE - "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS - "/" XML_CIB_TAG_NVPAIR - "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " - "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); - - match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); - - g_string_free(xpath, TRUE); - return (match != NULL); -} - /*! * \internal * \brief Check whether a node does watchdog-fencing @@ -1043,201 +448,7 @@ node_does_watchdog_fencing(const char *node) pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } - -static void -update_fencing_topology(const char *event, xmlNode * msg) -{ - int format = 1; - const char *xpath; - xmlXPathObjectPtr xpathObj = NULL; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, "format", &format); - - if(format == 1) { - /* Process deletions (only) */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - remove_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - /* Process additions and changes */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - register_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - } else if(format == 2) { - xmlNode *change = NULL; - int add[] = { 0, 0, 0 }; - int del[] = { 0, 0, 0 }; - - xml_patch_versions(patchset, add, del); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - - if(op == NULL) { - continue; - - } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { - /* Change to a specific entry */ - - crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); - if(strcmp(op, "move") == 0) { - continue; - - } else if(strcmp(op, "create") == 0) { - add_topology_level(change->children); - - } else if(strcmp(op, "modify") == 0) { - xmlNode *match = first_named_child(change, XML_DIFF_RESULT); - - if(match) { - remove_topology_level(match->children); - add_topology_level(match->children); - } - - } else if(strcmp(op, "delete") == 0) { - /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ - crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { - /* Change to the topology in general */ - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - - } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { - /* Changes to the whole config section, possibly including the topology as a whild */ - if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - - } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - } - } - - } else { - crm_warn("Unknown patch format: %d", format); - } -} -static bool have_cib_devices = FALSE; - -static void -update_cib_cache_cb(const char *event, xmlNode * msg) -{ - int rc = pcmk_ok; - long timeout_ms_saved = stonith_watchdog_timeout_ms; - bool need_full_refresh = false; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); - return; - - } else if(msg == NULL) { - crm_trace("Missing %s update", event); - return; - } - - /* Maintain a local copy of the CIB so that we have full access - * to device definitions, location constraints, and node attributes - */ - if (local_cib != NULL) { - int rc = pcmk_ok; - xmlNode *patchset = NULL; - - crm_element_value_int(msg, F_CIB_RC, &rc); - if (rc != pcmk_ok) { - return; - } - - patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - pcmk__output_set_log_level(logger_out, LOG_TRACE); - out->message(out, "xml-patchset", patchset); - rc = xml_apply_patchset(local_cib, patchset, TRUE); - switch (rc) { - case pcmk_ok: - case -pcmk_err_old_data: - break; - case -pcmk_err_diff_resync: - case -pcmk_err_diff_failed: - crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - break; - default: - crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - } - } - - if (local_cib == NULL) { - crm_trace("Re-requesting full CIB"); - rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); - if(rc != pcmk_ok) { - crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); - return; - } - CRM_ASSERT(local_cib != NULL); - need_full_refresh = true; - } - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - if (timeout_ms_saved != stonith_watchdog_timeout_ms) { - need_full_refresh = true; - } - - if (need_full_refresh) { - fencing_topology_init(); - cib_devices_update(); - } else { - // Partial refresh - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - } - - watchdog_device_update(); -} - -static void -init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) -{ - crm_info("Updating device list from CIB"); - have_cib_devices = TRUE; - local_cib = copy_xml(output); - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - fencing_topology_init(); - cib_devices_update(); - watchdog_device_update(); -} - -static void +void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); @@ -1248,28 +459,9 @@ stonith_shutdown(int nsig) } static void -cib_connection_destroy(gpointer user_data) -{ - if (stonith_shutdown_flag) { - crm_info("Connection to the CIB manager closed"); - return; - } else { - crm_crit("Lost connection to the CIB manager, shutting down"); - } - if (cib_api) { - cib_api->cmds->signoff(cib_api); - } - stonith_shutdown(0); -} - -static void stonith_cleanup(void) { - if (cib_api) { - cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); - cib_api->cmds->signoff(cib_api); - } - + fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } @@ -1284,9 +476,6 @@ stonith_cleanup(void) free(stonith_our_uname); stonith_our_uname = NULL; - - free_xml(local_cib); - local_cib = NULL; } static gboolean @@ -1298,38 +487,6 @@ stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, return TRUE; } -static void -setup_cib(void) -{ - int rc, retries = 0; - - cib_api = cib_new(); - if (cib_api == NULL) { - crm_err("No connection to the CIB manager"); - return; - } - - do { - sleep(retries); - rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); - } while (rc == -ENOTCONN && ++retries < 5); - - if (rc != pcmk_ok) { - crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); - - } else if (pcmk_ok != - cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { - crm_err("Could not set CIB notification callback"); - - } else { - rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); - cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", - init_cib_cache_cb); - cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_info("Watching for fencing topology changes"); - } -} - struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, @@ -1435,10 +592,11 @@ static pcmk__cluster_option_t fencer_options[] = { "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") }, { - "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL, - N_("Advanced use only: An alternate command to run instead of 'reboot'"), + "pcmk_reboot_action", NULL, "string", NULL, + PCMK_ACTION_REBOOT, NULL, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), N_("Some devices do not support the standard commands or may provide additional ones.\n" - "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") }, { "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, @@ -1454,10 +612,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.") }, { - "pcmk_off_action",NULL, "string", NULL, "off", NULL, - N_("Advanced use only: An alternate command to run instead of \'off\'"), + "pcmk_off_action", NULL, "string", NULL, + PCMK_ACTION_OFF, NULL, + N_("Advanced use only: An alternate command to run instead of \'off\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") }, { "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, @@ -1473,10 +632,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.") }, { - "pcmk_on_action",NULL, "string", NULL, "on", NULL, - N_("Advanced use only: An alternate command to run instead of 'on'"), + "pcmk_on_action", NULL, "string", NULL, + PCMK_ACTION_ON, NULL, + N_("Advanced use only: An alternate command to run instead of 'on'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") }, { "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, @@ -1492,10 +652,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.") }, { - "pcmk_list_action",NULL, "string", NULL, "list", NULL, - N_("Advanced use only: An alternate command to run instead of \'list\'"), + "pcmk_list_action",NULL, "string", NULL, + PCMK_ACTION_LIST, NULL, + N_("Advanced use only: An alternate command to run instead of \'list\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") }, { "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, @@ -1511,7 +672,8 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.") }, { - "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL, + "pcmk_monitor_action", NULL, "string", NULL, + PCMK_ACTION_MONITOR, NULL, N_("Advanced use only: An alternate command to run instead of \'monitor\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") @@ -1530,10 +692,11 @@ static pcmk__cluster_option_t fencer_options[] = { " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.") }, { - "pcmk_status_action",NULL, "string", NULL, "status", NULL, - N_("Advanced use only: An alternate command to run instead of \'status\'"), + "pcmk_status_action", NULL, "string", NULL, + PCMK_ACTION_STATUS, NULL, + N_("Advanced use only: An alternate command to run instead of \'status\'"), N_("Some devices do not support the standard commands or may provide additional ones." - "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") }, { "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, @@ -1568,13 +731,13 @@ fencer_metadata(void) static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, - "Deprecated (will be removed in a future release)", NULL }, + N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, - stand_alone_cpg_cb, "Intended for use in regression testing only", NULL }, + stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, - &options.log_files, "Send logs to the additional named logfile", NULL }, + &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; @@ -1649,7 +812,7 @@ main(int argc, char **argv) goto done; } - if (crm_ipc_connect(old_instance)) { + if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); @@ -1665,26 +828,15 @@ main(int argc, char **argv) crm_peer_init(); - fenced_data_set = pe_new_working_set(); - CRM_ASSERT(fenced_data_set != NULL); - - cluster = pcmk_cluster_new(); - - /* Initialize the logger prior to setup_cib(). update_cib_cache_cb() may - * call the "xml-patchset" message function, which needs the logger, after - * setup_cib() has run. - */ - rc = pcmk__log_output_new(&logger_out) != pcmk_rc_ok; + rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, - "Error creating output format log: %s", pcmk_rc_str(rc)); + "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } - pe__register_messages(logger_out); - pcmk__register_lib_messages(logger_out); - pcmk__output_set_log_level(logger_out, LOG_TRACE); - fenced_data_set->priv = logger_out; + + cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC @@ -1732,15 +884,10 @@ done: stonith_cleanup(); pcmk_cluster_free(cluster); - pe_free_working_set(fenced_data_set); + fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); - if (logger_out != NULL) { - logger_out->finish(logger_out, exit_code, true, NULL); - pcmk__output_free(logger_out); - } - if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index a3d2e17..220978a 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -6,7 +6,12 @@ */ #include <stdint.h> // uint32_t, uint64_t +#include <libxml/tree.h> // xmlNode + #include <crm/common/mainloop.h> +#include <crm/cluster.h> +#include <crm/stonith-ng.h> +#include <crm/fencing/internal.h> /*! * \internal @@ -104,9 +109,12 @@ typedef struct remote_fencing_op_s { * values associated with the devices this fencing operation may call */ gint total_timeout; - /*! Requested fencing delay. - * Value -1 means disable any static/random fencing delays. */ - int delay; + /*! + * Fencing delay (in seconds) requested by API client (used by controller to + * implement priority-fencing-delay). A value of -1 means disable all + * configured delays. + */ + int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations @@ -205,6 +213,8 @@ typedef struct stonith_topology_s { } stonith_topology_t; +void stonith_shutdown(int nsig); + void init_device_list(void); void free_device_list(void); void init_topology_list(void); @@ -231,7 +241,7 @@ void fenced_unregister_level(xmlNode *msg, char **desc, stonith_topology_t *find_topology_for_host(const char *host); -void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, +void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, @@ -280,6 +290,14 @@ gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); +void fencing_topology_init(void); +void setup_cib(void); +void fenced_cib_cleanup(void); + +int fenced_scheduler_init(void); +void fenced_scheduler_cleanup(void); +void fenced_scheduler_run(xmlNode *cib); + static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { @@ -299,7 +317,7 @@ fenced_set_protocol_error(pcmk__action_result_t *result) static inline uint32_t fenced_support_flag(const char *action) { - if (pcmk__str_eq(action, "on", pcmk__str_none)) { + if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; @@ -311,5 +329,6 @@ extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; - extern GHashTable *stonith_remote_op_list; +extern crm_exit_t exit_code; +extern gboolean stonith_shutdown_flag; |