summaryrefslogtreecommitdiffstats
path: root/daemons/fenced/pacemaker-fenced.c
diff options
context:
space:
mode:
Diffstat (limited to 'daemons/fenced/pacemaker-fenced.c')
-rw-r--r--daemons/fenced/pacemaker-fenced.c1751
1 files changed, 1751 insertions, 0 deletions
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
new file mode 100644
index 0000000..4edda6c
--- /dev/null
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -0,0 +1,1751 @@
+/*
+ * Copyright 2009-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/utsname.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h> // PRIu32, PRIx32
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/output_internal.h>
+#include <crm/cluster/internal.h>
+
+#include <crm/stonith-ng.h>
+#include <crm/fencing/internal.h>
+#include <crm/common/xml.h>
+#include <crm/common/xml_internal.h>
+
+#include <crm/common/mainloop.h>
+
+#include <crm/cib/internal.h>
+#include <crm/pengine/status.h>
+#include <pacemaker-internal.h>
+
+#include <pacemaker-fenced.h>
+
+#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
+
+char *stonith_our_uname = NULL;
+long stonith_watchdog_timeout_ms = 0;
+GList *stonith_watchdog_targets = NULL;
+
+static GMainLoop *mainloop = NULL;
+
+gboolean stand_alone = FALSE;
+static gboolean stonith_shutdown_flag = FALSE;
+
+static qb_ipcs_service_t *ipcs = NULL;
+static xmlNode *local_cib = NULL;
+static pe_working_set_t *fenced_data_set = NULL;
+static const unsigned long long data_set_flags = pe_flag_quick_location
+ | pe_flag_no_compat
+ | pe_flag_no_counts;
+
+static cib_t *cib_api = NULL;
+
+static pcmk__output_t *logger_out = NULL;
+static pcmk__output_t *out = NULL;
+
+pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static struct {
+ bool no_cib_connect;
+ gchar **log_files;
+} options;
+
+static crm_exit_t exit_code = CRM_EX_OK;
+
+static void stonith_shutdown(int nsig);
+static void stonith_cleanup(void);
+
+static int32_t
+st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ if (stonith_shutdown_flag) {
+ crm_info("Ignoring new client [%d] during shutdown",
+ pcmk__client_pid(c));
+ return -EPERM;
+ }
+
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Exit code means? */
+static int32_t
+st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ int call_options = 0;
+ xmlNode *request = NULL;
+ pcmk__client_t *c = pcmk__find_client(qbc);
+ const char *op = NULL;
+
+ if (c == NULL) {
+ crm_info("Invalid client: %p", qbc);
+ return 0;
+ }
+
+ request = pcmk__client_data2xml(c, data, &id, &flags);
+ if (request == NULL) {
+ pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL);
+ return 0;
+ }
+
+
+ op = crm_element_value(request, F_CRM_TASK);
+ if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
+ crm_xml_add(request, F_TYPE, T_STONITH_NG);
+ crm_xml_add(request, F_STONITH_OPERATION, op);
+ crm_xml_add(request, F_STONITH_CLIENTID, c->id);
+ crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
+ crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
+
+ send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
+ free_xml(request);
+ return 0;
+ }
+
+ if (c->name == NULL) {
+ const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
+
+ if (value == NULL) {
+ value = "unknown";
+ }
+ c->name = crm_strdup_printf("%s.%u", value, c->pid);
+ }
+
+ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
+ crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
+ " from client %s", flags, call_options, id, pcmk__client_name(c));
+
+ if (pcmk_is_set(call_options, st_opt_sync_call)) {
+ CRM_ASSERT(flags & crm_ipc_client_response);
+ CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
+ c->request_id = id; /* Reply only to the last one */
+ }
+
+ crm_xml_add(request, F_STONITH_CLIENTID, c->id);
+ crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
+ crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
+
+ crm_log_xml_trace(request, "ipc-received");
+ stonith_command(c, id, flags, request, NULL);
+
+ free_xml(request);
+ return 0;
+}
+
+/* Error code means? */
+static int32_t
+st_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ crm_trace("Connection %p closed", c);
+ pcmk__free_client(client);
+
+ /* 0 means: yes, go ahead and destroy the connection */
+ return 0;
+}
+
+static void
+st_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p destroyed", c);
+ st_ipc_closed(c);
+}
+
+static void
+stonith_peer_callback(xmlNode * msg, void *private_data)
+{
+ const char *remote_peer = crm_element_value(msg, F_ORIG);
+ const char *op = crm_element_value(msg, F_STONITH_OPERATION);
+
+ if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
+ return;
+ }
+
+ crm_log_xml_trace(msg, "Peer[inbound]");
+ stonith_command(NULL, 0, 0, msg, remote_peer);
+}
+
+#if SUPPORT_COROSYNC
+static void
+stonith_peer_ais_callback(cpg_handle_t handle,
+ const struct cpg_name *groupName,
+ uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
+{
+ uint32_t kind = 0;
+ xmlNode *xml = NULL;
+ const char *from = NULL;
+ char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
+
+ if(data == NULL) {
+ return;
+ }
+ if (kind == crm_class_cluster) {
+ xml = string2xml(data);
+ if (xml == NULL) {
+ crm_err("Invalid XML: '%.120s'", data);
+ free(data);
+ return;
+ }
+ crm_xml_add(xml, F_ORIG, from);
+ /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
+ stonith_peer_callback(xml, NULL);
+ }
+
+ free_xml(xml);
+ free(data);
+ return;
+}
+
+static void
+stonith_peer_cs_destroy(gpointer user_data)
+{
+ crm_crit("Lost connection to cluster layer, shutting down");
+ stonith_shutdown(0);
+}
+#endif
+
+void
+do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options)
+{
+ /* send callback to originating child */
+ int local_rc = pcmk_rc_ok;
+ int rid = 0;
+ uint32_t ipc_flags = crm_ipc_server_event;
+
+ if (pcmk_is_set(call_options, st_opt_sync_call)) {
+ CRM_LOG_ASSERT(client->request_id);
+ rid = client->request_id;
+ client->request_id = 0;
+ ipc_flags = crm_ipc_flags_none;
+ }
+
+ local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
+ if (local_rc == pcmk_rc_ok) {
+ crm_trace("Sent response %d to client %s",
+ rid, pcmk__client_name(client));
+ } else {
+ crm_warn("%synchronous reply to client %s failed: %s",
+ (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
+ pcmk__client_name(client), pcmk_rc_str(local_rc));
+ }
+}
+
+uint64_t
+get_stonith_flag(const char *name)
+{
+ if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
+ return st_callback_notify_fence;
+
+ } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
+ return st_callback_device_add;
+
+ } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
+ return st_callback_device_del;
+
+ } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
+ return st_callback_notify_history;
+
+ } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
+ return st_callback_notify_history_synced;
+
+ }
+ return st_callback_unknown;
+}
+
+static void
+stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
+{
+
+ xmlNode *update_msg = user_data;
+ pcmk__client_t *client = value;
+ const char *type = NULL;
+
+ CRM_CHECK(client != NULL, return);
+ CRM_CHECK(update_msg != NULL, return);
+
+ type = crm_element_value(update_msg, F_SUBTYPE);
+ CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
+
+ if (client->ipcs == NULL) {
+ crm_trace("Skipping client with NULL channel");
+ return;
+ }
+
+ if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
+ int rc = pcmk__ipc_send_xml(client, 0, update_msg,
+ crm_ipc_server_event);
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("%s notification of client %s failed: %s "
+ CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
+ pcmk_rc_str(rc), client->id, rc);
+ } else {
+ crm_trace("Sent %s notification to client %s",
+ type, pcmk__client_name(client));
+ }
+ }
+}
+
+void
+do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
+{
+ pcmk__client_t *client = NULL;
+ xmlNode *notify_data = NULL;
+
+ if (!timeout || !call_id || !client_id) {
+ return;
+ }
+
+ client = pcmk__find_client_by_id(client_id);
+ if (!client) {
+ return;
+ }
+
+ notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
+ crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
+ crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
+ crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
+
+ crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
+
+ if (client) {
+ pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
+ }
+
+ free_xml(notify_data);
+}
+
+/*!
+ * \internal
+ * \brief Notify relevant IPC clients of a fencing operation result
+ *
+ * \param[in] type Notification type
+ * \param[in] result Result of fencing operation (assume success if NULL)
+ * \param[in] data If not NULL, add to notification as call data
+ */
+void
+fenced_send_notification(const char *type, const pcmk__action_result_t *result,
+ xmlNode *data)
+{
+ /* TODO: Standardize the contents of data */
+ xmlNode *update_msg = create_xml_node(NULL, "notify");
+
+ CRM_LOG_ASSERT(type != NULL);
+
+ crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
+ crm_xml_add(update_msg, F_SUBTYPE, type);
+ crm_xml_add(update_msg, F_STONITH_OPERATION, type);
+ stonith__xe_set_result(update_msg, result);
+
+ if (data != NULL) {
+ add_message_xml(update_msg, F_STONITH_CALLDATA, data);
+ }
+
+ crm_trace("Notifying clients");
+ pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
+ free_xml(update_msg);
+ crm_trace("Notify complete");
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a configuration change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_DEVICE_ADD,
+ * STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
+ * STONITH_OP_LEVEL_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc Description of what changed
+ * \param[in] active Current number of devices or topologies in use
+ */
+static void
+send_config_notification(const char *op, const pcmk__action_result_t *result,
+ const char *desc, int active)
+{
+ xmlNode *notify_data = create_xml_node(NULL, op);
+
+ CRM_CHECK(notify_data != NULL, return);
+
+ crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
+ crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
+
+ fenced_send_notification(op, result, notify_data);
+ free_xml(notify_data);
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a device change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_DEVICE_ADD or
+ * STONITH_OP_DEVICE_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc ID of device that changed
+ */
+void
+fenced_send_device_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc)
+{
+ send_config_notification(op, result, desc, g_hash_table_size(device_list));
+}
+
+/*!
+ * \internal
+ * \brief Send notifications for a topology level change to subscribed clients
+ *
+ * \param[in] op Notification type (STONITH_OP_LEVEL_ADD or
+ * STONITH_OP_LEVEL_DEL)
+ * \param[in] result Operation result
+ * \param[in] desc String representation of level (<target>[<level_index>])
+ */
+void
+fenced_send_level_notification(const char *op,
+ const pcmk__action_result_t *result,
+ const char *desc)
+{
+ send_config_notification(op, result, desc, g_hash_table_size(topology));
+}
+
+static void
+topology_remove_helper(const char *node, int level)
+{
+ char *desc = NULL;
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
+
+ crm_xml_add(data, F_STONITH_ORIGIN, __func__);
+ crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
+ crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
+
+ fenced_unregister_level(data, &desc, &result);
+ fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc);
+ pcmk__reset_result(&result);
+ free_xml(data);
+ free(desc);
+}
+
+static void
+remove_cib_device(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *rsc_id = NULL;
+ const char *standard = NULL;
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if(match != NULL) {
+ standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
+ }
+
+ if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ continue;
+ }
+
+ rsc_id = crm_element_value(match, XML_ATTR_ID);
+
+ stonith_device_remove(rsc_id, true);
+ }
+}
+
+static void
+remove_topology_level(xmlNode *match)
+{
+ int index = 0;
+ char *key = NULL;
+
+ CRM_CHECK(match != NULL, return);
+
+ key = stonith_level_key(match, fenced_target_by_unknown);
+ crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
+ topology_remove_helper(key, index);
+ free(key);
+}
+
+static void
+add_topology_level(xmlNode *match)
+{
+ char *desc = NULL;
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ CRM_CHECK(match != NULL, return);
+
+ fenced_register_level(match, &desc, &result);
+ fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc);
+ pcmk__reset_result(&result);
+ free(desc);
+}
+
+static void
+remove_fencing_topology(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ CRM_LOG_ASSERT(match != NULL);
+ if (match && crm_element_value(match, XML_DIFF_MARKER)) {
+ /* Deletion */
+ int index = 0;
+ char *target = stonith_level_key(match, fenced_target_by_unknown);
+
+ crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
+ if (target == NULL) {
+ crm_err("Invalid fencing target in element %s", ID(match));
+
+ } else if (index <= 0) {
+ crm_err("Invalid level for %s in element %s", target, ID(match));
+
+ } else {
+ topology_remove_helper(target, index);
+ }
+ /* } else { Deal with modifications during the 'addition' stage */
+ }
+ }
+}
+
+static void
+register_fencing_topology(xmlXPathObjectPtr xpathObj)
+{
+ int max = numXpathResults(xpathObj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpathObj, lpc);
+
+ remove_topology_level(match);
+ add_topology_level(match);
+ }
+}
+
+/* Fencing
+<diff crm_feature_set="3.0.6">
+ <diff-removed>
+ <fencing-topology>
+ <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
+ <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
+ <fencing-level devices="disk,network" id="f-p2.1"/>
+ </fencing-topology>
+ </diff-removed>
+ <diff-added>
+ <fencing-topology>
+ <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
+ <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
+ <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
+ </fencing-topology>
+ </diff-added>
+</diff>
+*/
+
+static void
+fencing_topology_init(void)
+{
+ xmlXPathObjectPtr xpathObj = NULL;
+ const char *xpath = "//" XML_TAG_FENCING_LEVEL;
+
+ crm_trace("Full topology refresh");
+ free_topology_list();
+ init_topology_list();
+
+ /* Grab everything */
+ xpathObj = xpath_search(local_cib, xpath);
+ register_fencing_topology(xpathObj);
+
+ freeXpathObject(xpathObj);
+}
+
+#define rsc_name(x) x->clone_name?x->clone_name:x->id
+
+/*!
+ * \internal
+ * \brief Check whether our uname is in a resource's allowed node list
+ *
+ * \param[in] rsc Resource to check
+ *
+ * \return Pointer to node object if found, NULL otherwise
+ */
+static pe_node_t *
+our_node_allowed_for(const pe_resource_t *rsc)
+{
+ GHashTableIter iter;
+ pe_node_t *node = NULL;
+
+ if (rsc && stonith_our_uname) {
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
+ break;
+ }
+ node = NULL;
+ }
+ }
+ return node;
+}
+
+static void
+watchdog_device_update(void)
+{
+ if (stonith_watchdog_timeout_ms > 0) {
+ if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
+ !stonith_watchdog_targets) {
+ /* getting here watchdog-fencing enabled, no device there yet
+ and reason isn't stonith_watchdog_targets preventing that
+ */
+ int rc;
+ xmlNode *xml;
+
+ xml = create_device_registration_xml(
+ STONITH_WATCHDOG_ID,
+ st_namespace_internal,
+ STONITH_WATCHDOG_AGENT,
+ NULL, /* stonith_device_register will add our
+ own name as PCMK_STONITH_HOST_LIST param
+ so we can skip that here
+ */
+ NULL);
+ rc = stonith_device_register(xml, TRUE);
+ free_xml(xml);
+ if (rc != pcmk_ok) {
+ rc = pcmk_legacy2rc(rc);
+ exit_code = CRM_EX_FATAL;
+ crm_crit("Cannot register watchdog pseudo fence agent: %s",
+ pcmk_rc_str(rc));
+ stonith_shutdown(0);
+ }
+ }
+
+ } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) {
+ /* be silent if no device - todo parameter to stonith_device_remove */
+ stonith_device_remove(STONITH_WATCHDOG_ID, true);
+ }
+}
+
+static void
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
+{
+ long timeout_ms = 0;
+ xmlNode *stonith_watchdog_xml = NULL;
+ const char *value = NULL;
+
+ stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']",
+ cib, LOG_NEVER);
+ if (stonith_watchdog_xml) {
+ value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
+ }
+ if (value) {
+ timeout_ms = crm_get_msec(value);
+ }
+
+ if (timeout_ms < 0) {
+ timeout_ms = pcmk__auto_watchdog_timeout();
+ }
+
+ stonith_watchdog_timeout_ms = timeout_ms;
+}
+
+/*!
+ * \internal
+ * \brief If a resource or any of its children are STONITH devices, update their
+ * definitions given a cluster working set.
+ *
+ * \param[in,out] rsc Resource to check
+ * \param[in,out] data_set Cluster working set with device information
+ */
+static void
+cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
+{
+ pe_node_t *node = NULL;
+ const char *value = NULL;
+ const char *rclass = NULL;
+ pe_node_t *parent = NULL;
+
+ /* If this is a complex resource, check children rather than this resource itself. */
+ if(rsc->children) {
+ GList *gIter = NULL;
+ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
+ cib_device_update(gIter->data, data_set);
+ if(pe_rsc_is_clone(rsc)) {
+ crm_trace("Only processing one copy of the clone %s", rsc->id);
+ break;
+ }
+ }
+ return;
+ }
+
+ /* We only care about STONITH resources. */
+ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+ if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ return;
+ }
+
+ /* If this STONITH resource is disabled, remove it. */
+ if (pe__resource_is_disabled(rsc)) {
+ crm_info("Device %s has been disabled", rsc->id);
+ return;
+ }
+
+ /* if watchdog-fencing is disabled handle any watchdog-fence
+ resource as if it was disabled
+ */
+ if ((stonith_watchdog_timeout_ms <= 0) &&
+ pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
+ crm_info("Watchdog-fencing disabled thus handling "
+ "device %s as disabled", rsc->id);
+ return;
+ }
+
+ /* Check whether our node is allowed for this resource (and its parent if in a group) */
+ node = our_node_allowed_for(rsc);
+ if (rsc->parent && (rsc->parent->variant == pe_group)) {
+ parent = our_node_allowed_for(rsc->parent);
+ }
+
+ if(node == NULL) {
+ /* Our node is disallowed, so remove the device */
+ GHashTableIter iter;
+
+ crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
+ crm_trace("Available: %s = %d", pe__node_name(node), node->weight);
+ }
+
+ return;
+
+ } else if(node->weight < 0 || (parent && parent->weight < 0)) {
+ /* Our node (or its group) is disallowed by score, so remove the device */
+ int score = (node->weight < 0)? node->weight : parent->weight;
+
+ crm_info("Device %s has been disabled on %s: score=%s",
+ rsc->id, stonith_our_uname, pcmk_readable_score(score));
+ return;
+
+ } else {
+ /* Our node is allowed, so update the device information */
+ int rc;
+ xmlNode *data;
+ GHashTable *rsc_params = NULL;
+ GHashTableIter gIter;
+ stonith_key_value_t *params = NULL;
+
+ const char *name = NULL;
+ const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
+ const char *rsc_provides = NULL;
+
+ crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
+ rsc_params = pe_rsc_params(rsc, node, data_set);
+ get_meta_attributes(rsc->meta, rsc, node, data_set);
+
+ rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES);
+
+ g_hash_table_iter_init(&gIter, rsc_params);
+ while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
+ if (!name || !value) {
+ continue;
+ }
+ params = stonith_key_value_add(params, name, value);
+ crm_trace(" %s=%s", name, value);
+ }
+
+ data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
+ agent, params, rsc_provides);
+ stonith_key_value_freeall(params, 1, 1);
+ rc = stonith_device_register(data, TRUE);
+ CRM_ASSERT(rc == pcmk_ok);
+ free_xml(data);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Update all STONITH device definitions based on current CIB
+ */
+static void
+cib_devices_update(void)
+{
+ GHashTableIter iter;
+ stonith_device_t *device = NULL;
+
+ crm_info("Updating devices to version %s.%s.%s",
+ crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
+ crm_element_value(local_cib, XML_ATTR_GENERATION),
+ crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
+
+ if (fenced_data_set->now != NULL) {
+ crm_time_free(fenced_data_set->now);
+ fenced_data_set->now = NULL;
+ }
+ fenced_data_set->localhost = stonith_our_uname;
+ pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set);
+
+ g_hash_table_iter_init(&iter, device_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
+ if (device->cib_registered) {
+ device->dirty = TRUE;
+ }
+ }
+
+ /* have list repopulated if cib has a watchdog-fencing-resource
+ TODO: keep a cached list for queries happening while we are refreshing
+ */
+ g_list_free_full(stonith_watchdog_targets, free);
+ stonith_watchdog_targets = NULL;
+ g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set);
+
+ g_hash_table_iter_init(&iter, device_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
+ if (device->dirty) {
+ g_hash_table_iter_remove(&iter);
+ }
+ }
+
+ fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
+ pe_reset_working_set(fenced_data_set);
+}
+
+static void
+update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
+{
+ xmlNode *change = NULL;
+ char *reason = NULL;
+ bool needs_update = FALSE;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ for (change = pcmk__xml_first_child(patchset); change != NULL;
+ change = pcmk__xml_next(change)) {
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+ const char *shortpath = NULL;
+
+ if ((op == NULL) ||
+ (strcmp(op, "move") == 0) ||
+ strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
+ continue;
+ } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
+ const char *rsc_id = NULL;
+ char *search = NULL;
+ char *mutable = NULL;
+
+ if (strstr(xpath, XML_TAG_ATTR_SETS) ||
+ strstr(xpath, XML_TAG_META_SETS)) {
+ needs_update = TRUE;
+ pcmk__str_update(&reason,
+ "(meta) attribute deleted from resource");
+ break;
+ }
+ pcmk__str_update(&mutable, xpath);
+ rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'");
+ if (rsc_id != NULL) {
+ rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'");
+ search = strchr(rsc_id, '\'');
+ }
+ if (search != NULL) {
+ *search = 0;
+ stonith_device_remove(rsc_id, true);
+ /* watchdog_device_update called afterwards
+ to fall back to implicit definition if needed */
+ } else {
+ crm_warn("Ignoring malformed CIB update (resource deletion)");
+ }
+ free(mutable);
+
+ } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
+ strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
+ strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
+ shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
+ reason = crm_strdup_printf("%s %s", op, shortpath+1);
+ needs_update = TRUE;
+ break;
+ }
+ }
+
+ if(needs_update) {
+ crm_info("Updating device list from CIB: %s", reason);
+ cib_devices_update();
+ } else {
+ crm_trace("No updates for device list found in CIB");
+ }
+ free(reason);
+}
+
+
+static void
+update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
+{
+ const char *reason = "none";
+ gboolean needs_update = FALSE;
+ xmlXPathObjectPtr xpath_obj = NULL;
+
+ /* process new constraints */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
+ if (numXpathResults(xpath_obj) > 0) {
+ int max = numXpathResults(xpath_obj), lpc = 0;
+
+ /* Safest and simplest to always recompute */
+ needs_update = TRUE;
+ reason = "new location constraint";
+
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *match = getXpathResult(xpath_obj, lpc);
+
+ crm_log_xml_trace(match, "new constraint");
+ }
+ }
+ freeXpathObject(xpath_obj);
+
+ /* process deletions */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
+ if (numXpathResults(xpath_obj) > 0) {
+ remove_cib_device(xpath_obj);
+ }
+ freeXpathObject(xpath_obj);
+
+ /* process additions */
+ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
+ if (numXpathResults(xpath_obj) > 0) {
+ int max = numXpathResults(xpath_obj), lpc = 0;
+
+ for (lpc = 0; lpc < max; lpc++) {
+ const char *rsc_id = NULL;
+ const char *standard = NULL;
+ xmlNode *match = getXpathResult(xpath_obj, lpc);
+
+ rsc_id = crm_element_value(match, XML_ATTR_ID);
+ standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
+
+ if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ continue;
+ }
+
+ crm_trace("Fencing resource %s was added or modified", rsc_id);
+ reason = "new resource";
+ needs_update = TRUE;
+ }
+ }
+ freeXpathObject(xpath_obj);
+
+ if(needs_update) {
+ crm_info("Updating device list from CIB: %s", reason);
+ cib_devices_update();
+ }
+}
+
+static void
+update_cib_stonith_devices(const char *event, xmlNode * msg)
+{
+ int format = 1;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ CRM_ASSERT(patchset);
+ crm_element_value_int(patchset, "format", &format);
+ switch(format) {
+ case 1:
+ update_cib_stonith_devices_v1(event, msg);
+ break;
+ case 2:
+ update_cib_stonith_devices_v2(event, msg);
+ break;
+ default:
+ crm_warn("Unknown patch format: %d", format);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node has a specific attribute name/value
+ *
+ * \param[in] node Name of node to check
+ * \param[in] name Name of an attribute to look for
+ * \param[in] value The value the named attribute needs to be set to in order to be considered a match
+ *
+ * \return TRUE if the locally cached CIB has the specified node attribute
+ */
+gboolean
+node_has_attr(const char *node, const char *name, const char *value)
+{
+ GString *xpath = NULL;
+ xmlNode *match;
+
+ CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL)
+ && (value != NULL), return FALSE);
+
+ /* Search for the node's attributes in the CIB. While the schema allows
+ * multiple sets of instance attributes, and allows instance attributes to
+ * use id-ref to reference values elsewhere, that is intended for resources,
+ * so we ignore that here.
+ */
+ xpath = g_string_sized_new(256);
+ pcmk__g_strcat(xpath,
+ "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
+ "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS
+ "/" XML_CIB_TAG_NVPAIR
+ "[@" XML_NVPAIR_ATTR_NAME "='", name, "' "
+ "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL);
+
+ match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER);
+
+ g_string_free(xpath, TRUE);
+ return (match != NULL);
+}
+
+/*!
+ * \internal
+ * \brief Check whether a node does watchdog-fencing
+ *
+ * \param[in] node Name of node to check
+ *
+ * \return TRUE if node found in stonith_watchdog_targets
+ * or stonith_watchdog_targets is empty indicating
+ * all nodes are doing watchdog-fencing
+ */
+gboolean
+node_does_watchdog_fencing(const char *node)
+{
+ return ((stonith_watchdog_targets == NULL) ||
+ pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
+}
+
+
+static void
+update_fencing_topology(const char *event, xmlNode * msg)
+{
+ int format = 1;
+ const char *xpath;
+ xmlXPathObjectPtr xpathObj = NULL;
+ xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ CRM_ASSERT(patchset);
+ crm_element_value_int(patchset, "format", &format);
+
+ if(format == 1) {
+ /* Process deletions (only) */
+ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
+ xpathObj = xpath_search(msg, xpath);
+
+ remove_fencing_topology(xpathObj);
+ freeXpathObject(xpathObj);
+
+ /* Process additions and changes */
+ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
+ xpathObj = xpath_search(msg, xpath);
+
+ register_fencing_topology(xpathObj);
+ freeXpathObject(xpathObj);
+
+ } else if(format == 2) {
+ xmlNode *change = NULL;
+ int add[] = { 0, 0, 0 };
+ int del[] = { 0, 0, 0 };
+
+ xml_patch_versions(patchset, add, del);
+
+ for (change = pcmk__xml_first_child(patchset); change != NULL;
+ change = pcmk__xml_next(change)) {
+ const char *op = crm_element_value(change, XML_DIFF_OP);
+ const char *xpath = crm_element_value(change, XML_DIFF_PATH);
+
+ if(op == NULL) {
+ continue;
+
+ } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
+ /* Change to a specific entry */
+
+ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
+ if(strcmp(op, "move") == 0) {
+ continue;
+
+ } else if(strcmp(op, "create") == 0) {
+ add_topology_level(change->children);
+
+ } else if(strcmp(op, "modify") == 0) {
+ xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
+
+ if(match) {
+ remove_topology_level(match->children);
+ add_topology_level(match->children);
+ }
+
+ } else if(strcmp(op, "delete") == 0) {
+ /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
+ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+ }
+
+ } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
+ /* Change to the topology in general */
+ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+
+ } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
+ /* Changes to the whole config section, possibly including the topology as a whild */
+ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
+ crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
+ op, add[0], add[1], add[2], xpath);
+
+ } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
+ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
+ op, add[0], add[1], add[2], xpath);
+ fencing_topology_init();
+ return;
+ }
+
+ } else {
+ crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
+ op, add[0], add[1], add[2], xpath);
+ }
+ }
+
+ } else {
+ crm_warn("Unknown patch format: %d", format);
+ }
+}
+static bool have_cib_devices = FALSE;
+
+static void
+update_cib_cache_cb(const char *event, xmlNode * msg)
+{
+ int rc = pcmk_ok;
+ long timeout_ms_saved = stonith_watchdog_timeout_ms;
+ bool need_full_refresh = false;
+
+ if(!have_cib_devices) {
+ crm_trace("Skipping updates until we get a full dump");
+ return;
+
+ } else if(msg == NULL) {
+ crm_trace("Missing %s update", event);
+ return;
+ }
+
+ /* Maintain a local copy of the CIB so that we have full access
+ * to device definitions, location constraints, and node attributes
+ */
+ if (local_cib != NULL) {
+ int rc = pcmk_ok;
+ xmlNode *patchset = NULL;
+
+ crm_element_value_int(msg, F_CIB_RC, &rc);
+ if (rc != pcmk_ok) {
+ return;
+ }
+
+ patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+ out->message(out, "xml-patchset", patchset);
+ rc = xml_apply_patchset(local_cib, patchset, TRUE);
+ switch (rc) {
+ case pcmk_ok:
+ case -pcmk_err_old_data:
+ break;
+ case -pcmk_err_diff_resync:
+ case -pcmk_err_diff_failed:
+ crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
+ free_xml(local_cib);
+ local_cib = NULL;
+ break;
+ default:
+ crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
+ free_xml(local_cib);
+ local_cib = NULL;
+ }
+ }
+
+ if (local_cib == NULL) {
+ crm_trace("Re-requesting full CIB");
+ rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
+ if(rc != pcmk_ok) {
+ crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
+ return;
+ }
+ CRM_ASSERT(local_cib != NULL);
+ need_full_refresh = true;
+ }
+
+ pcmk__refresh_node_caches_from_cib(local_cib);
+ update_stonith_watchdog_timeout_ms(local_cib);
+
+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
+ need_full_refresh = true;
+ }
+
+ if (need_full_refresh) {
+ fencing_topology_init();
+ cib_devices_update();
+ } else {
+ // Partial refresh
+ update_fencing_topology(event, msg);
+ update_cib_stonith_devices(event, msg);
+ }
+
+ watchdog_device_update();
+}
+
+static void
+init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
+{
+ crm_info("Updating device list from CIB");
+ have_cib_devices = TRUE;
+ local_cib = copy_xml(output);
+
+ pcmk__refresh_node_caches_from_cib(local_cib);
+ update_stonith_watchdog_timeout_ms(local_cib);
+
+ fencing_topology_init();
+ cib_devices_update();
+ watchdog_device_update();
+}
+
+static void
+stonith_shutdown(int nsig)
+{
+ crm_info("Terminating with %d clients", pcmk__ipc_client_count());
+ stonith_shutdown_flag = TRUE;
+ if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
+ g_main_loop_quit(mainloop);
+ }
+}
+
+static void
+cib_connection_destroy(gpointer user_data)
+{
+ if (stonith_shutdown_flag) {
+ crm_info("Connection to the CIB manager closed");
+ return;
+ } else {
+ crm_crit("Lost connection to the CIB manager, shutting down");
+ }
+ if (cib_api) {
+ cib_api->cmds->signoff(cib_api);
+ }
+ stonith_shutdown(0);
+}
+
+static void
+stonith_cleanup(void)
+{
+ if (cib_api) {
+ cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb);
+ cib_api->cmds->signoff(cib_api);
+ }
+
+ if (ipcs) {
+ qb_ipcs_destroy(ipcs);
+ }
+
+ crm_peer_destroy();
+ pcmk__client_cleanup();
+ free_stonith_remote_op_list();
+ free_topology_list();
+ free_device_list();
+ free_metadata_cache();
+ fenced_unregister_handlers();
+
+ free(stonith_our_uname);
+ stonith_our_uname = NULL;
+
+ free_xml(local_cib);
+ local_cib = NULL;
+}
+
+static gboolean
+stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
+ GError **error)
+{
+ stand_alone = FALSE;
+ options.no_cib_connect = true;
+ return TRUE;
+}
+
+static void
+setup_cib(void)
+{
+ int rc, retries = 0;
+
+ cib_api = cib_new();
+ if (cib_api == NULL) {
+ crm_err("No connection to the CIB manager");
+ return;
+ }
+
+ do {
+ sleep(retries);
+ rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
+ } while (rc == -ENOTCONN && ++retries < 5);
+
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
+
+ } else if (pcmk_ok !=
+ cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
+ crm_err("Could not set CIB notification callback");
+
+ } else {
+ rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
+ cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
+ init_cib_cache_cb);
+ cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
+ crm_info("Watching for fencing topology changes");
+ }
+}
+
+struct qb_ipcs_service_handlers ipc_callbacks = {
+ .connection_accept = st_ipc_accept,
+ .connection_created = NULL,
+ .msg_process = st_ipc_dispatch,
+ .connection_closed = st_ipc_closed,
+ .connection_destroyed = st_ipc_destroy
+};
+
+/*!
+ * \internal
+ * \brief Callback for peer status changes
+ *
+ * \param[in] type What changed
+ * \param[in] node What peer had the change
+ * \param[in] data Previous value of what changed
+ */
+static void
+st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
+{
+ if ((type != crm_status_processes)
+ && !pcmk_is_set(node->flags, crm_remote_node)) {
+ /*
+ * This is a hack until we can send to a nodeid and/or we fix node name lookups
+ * These messages are ignored in stonith_peer_callback()
+ */
+ xmlNode *query = create_xml_node(NULL, "stonith_command");
+
+ crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
+ crm_xml_add(query, F_TYPE, T_STONITH_NG);
+ crm_xml_add(query, F_STONITH_OPERATION, "poke");
+
+ crm_debug("Broadcasting our uname because of node %u", node->id);
+ send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
+
+ free_xml(query);
+ }
+}
+
+static pcmk__cluster_option_t fencer_options[] = {
+ /* name, old name, type, allowed values,
+ * default value, validator,
+ * short description,
+ * long description
+ */
+ {
+ PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL,
+ N_("Advanced use only: An alternate parameter to supply instead of 'port'"),
+ N_("some devices do not support the "
+ "standard 'port' parameter or may provide additional ones. Use "
+ "this to specify an alternate, device-specific, parameter "
+ "that should indicate the machine to be fenced. A value of "
+ "none can be used to tell the cluster not to supply any "
+ "additional parameters.")
+ },
+ {
+ PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL,
+ N_("A mapping of host names to ports numbers for devices that do not support host names."),
+ N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2")
+ },
+ {
+ PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL,
+ N_("Eg. node1,node2,node3"),
+ N_("A list of machines controlled by "
+ "this device (Optional unless pcmk_host_list=static-list)")
+ },
+ {
+ PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL,
+ N_("How to determine which machines are controlled by the device."),
+ N_("Allowed values: dynamic-list "
+ "(query the device via the 'list' command), static-list "
+ "(check the pcmk_host_list attribute), status "
+ "(query the device via the 'status' command), "
+ "none (assume every device can fence every "
+ "machine)")
+ },
+ {
+ PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL,
+ N_("Enable a base delay for fencing actions and specify base delay value."),
+ N_("Enable a delay of no more than the "
+ "time specified before executing fencing actions. Pacemaker "
+ "derives the overall delay by taking the value of "
+ "pcmk_delay_base and adding a random delay value such "
+ "that the sum is kept below this maximum.")
+ },
+ {
+ PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL,
+ N_("Enable a base delay for "
+ "fencing actions and specify base delay value."),
+ N_("This enables a static delay for "
+ "fencing actions, which can help avoid \"death matches\" where "
+ "two nodes try to fence each other at the same time. If "
+ "pcmk_delay_max is also used, a random delay will be "
+ "added such that the total delay is kept below that value."
+ "This can be set to a single time value to apply to any node "
+ "targeted by this device (useful if a separate device is "
+ "configured for each target), or to a node map (for example, "
+ "\"node1:1s;node2:5\") to set a different value per target.")
+ },
+ {
+ PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL,
+ N_("The maximum number of actions can be performed in parallel on this device"),
+ N_("Cluster property concurrent-fencing=true needs to be configured first."
+ "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.")
+ },
+ {
+ "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL,
+ N_("Advanced use only: An alternate command to run instead of 'reboot'"),
+ N_("Some devices do not support the standard commands or may provide additional ones.\n"
+ "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.")
+ },
+ {
+ "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.")
+ },
+ {
+ "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.")
+ },
+ {
+ "pcmk_off_action",NULL, "string", NULL, "off", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'off\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'off\' action.")
+ },
+ {
+ "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'off\' actions.")
+ },
+ {
+ "pcmk_off_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.")
+ },
+ {
+ "pcmk_on_action",NULL, "string", NULL, "on", NULL,
+ N_("Advanced use only: An alternate command to run instead of 'on'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'on\' action.")
+ },
+ {
+ "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'on\' actions.")
+ },
+ {
+ "pcmk_on_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.")
+ },
+ {
+ "pcmk_list_action",NULL, "string", NULL, "list", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'list\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'list\' action.")
+ },
+ {
+ "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'list\' actions.")
+ },
+ {
+ "pcmk_list_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.")
+ },
+ {
+ "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'monitor\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.")
+ },
+ {
+ "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal.\n"
+ "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.")
+ },
+ {
+ "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.")
+ },
+ {
+ "pcmk_status_action",NULL, "string", NULL, "status", NULL,
+ N_("Advanced use only: An alternate command to run instead of \'status\'"),
+ N_("Some devices do not support the standard commands or may provide additional ones."
+ "Use this to specify an alternate, device-specific, command that implements the \'status\' action.")
+ },
+ {
+ "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL,
+ N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"),
+ N_("Some devices need much more/less time to complete than normal."
+ "Use this to specify an alternate, device-specific, timeout for \'status\' actions.")
+ },
+ {
+ "pcmk_status_retries",NULL, "integer", NULL, "2", NULL,
+ N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"),
+ N_("Some devices do not support multiple connections."
+ " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
+ " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.")
+ },
+};
+
+void
+fencer_metadata(void)
+{
+ const char *desc_short = N_("Instance attributes available for all "
+ "\"stonith\"-class resources");
+ const char *desc_long = N_("Instance attributes available for all \"stonith\"-"
+ "class resources and used by Pacemaker's fence "
+ "daemon, formerly known as stonithd");
+
+ gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short,
+ desc_long, fencer_options,
+ PCMK__NELEM(fencer_options));
+ printf("%s", s);
+ g_free(s);
+}
+
+static GOptionEntry entries[] = {
+ { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
+ "Deprecated (will be removed in a future release)", NULL },
+
+ { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
+ stand_alone_cpg_cb, "Intended for use in regression testing only", NULL },
+
+ { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
+ &options.log_files, "Send logs to the additional named logfile", NULL },
+
+ { NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group,
+ "[metadata]");
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ int rc = pcmk_rc_ok;
+ crm_cluster_t *cluster = NULL;
+ crm_ipc_t *old_instance = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+ crm_log_preinit(NULL, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ if ((g_strv_length(processed_args) >= 2)
+ && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
+ fencer_metadata();
+ goto done;
+ }
+
+ // Open additional log files
+ pcmk__add_logfiles(options.log_files, out);
+
+ crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
+ (args->verbosity > 0), argc, argv, FALSE);
+
+ crm_notice("Starting Pacemaker fencer");
+
+ old_instance = crm_ipc_new("stonith-ng", 0);
+ if (old_instance == NULL) {
+ /* crm_ipc_new() will have already logged an error message with
+ * crm_err()
+ */
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+ if (crm_ipc_connect(old_instance)) {
+ // IPC endpoint already up
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("pacemaker-fenced is already active, aborting startup");
+ goto done;
+ } else {
+ // Not up or not authentic, we'll proceed either way
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
+ mainloop_add_signal(SIGTERM, stonith_shutdown);
+
+ crm_peer_init();
+
+ fenced_data_set = pe_new_working_set();
+ CRM_ASSERT(fenced_data_set != NULL);
+
+ cluster = pcmk_cluster_new();
+
+ /* Initialize the logger prior to setup_cib(). update_cib_cache_cb() may
+ * call the "xml-patchset" message function, which needs the logger, after
+ * setup_cib() has run.
+ */
+ rc = pcmk__log_output_new(&logger_out) != pcmk_rc_ok;
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_FATAL;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format log: %s", pcmk_rc_str(rc));
+ goto done;
+ }
+ pe__register_messages(logger_out);
+ pcmk__register_lib_messages(logger_out);
+ pcmk__output_set_log_level(logger_out, LOG_TRACE);
+ fenced_data_set->priv = logger_out;
+
+ if (!stand_alone) {
+#if SUPPORT_COROSYNC
+ if (is_corosync_cluster()) {
+ cluster->destroy = stonith_peer_cs_destroy;
+ cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback;
+ cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
+ }
+#endif // SUPPORT_COROSYNC
+
+ crm_set_status_callback(&st_peer_update_callback);
+
+ if (crm_cluster_connect(cluster) == FALSE) {
+ exit_code = CRM_EX_FATAL;
+ crm_crit("Cannot sign in to the cluster... terminating");
+ goto done;
+ }
+ pcmk__str_update(&stonith_our_uname, cluster->uname);
+
+ if (!options.no_cib_connect) {
+ setup_cib();
+ }
+
+ } else {
+ pcmk__str_update(&stonith_our_uname, "localhost");
+ crm_warn("Stand-alone mode is deprecated and will be removed "
+ "in a future release");
+ }
+
+ init_device_list();
+ init_topology_list();
+
+ pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
+
+ // Create the mainloop and run it...
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker fencer successfully started and accepting connections");
+ g_main_loop_run(mainloop);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ g_strfreev(options.log_files);
+
+ stonith_cleanup();
+ pcmk_cluster_free(cluster);
+ pe_free_working_set(fenced_data_set);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (logger_out != NULL) {
+ logger_out->finish(logger_out, exit_code, true, NULL);
+ pcmk__output_free(logger_out);
+ }
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}