summaryrefslogtreecommitdiffstats
path: root/daemons/execd
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:53:20 +0000
commite5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch)
treea6716c9275b4b413f6c9194798b34b91affb3cc7 /daemons/execd
parentInitial commit. (diff)
downloadpacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz
pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemons/execd')
-rw-r--r--daemons/execd/Makefile.am76
-rw-r--r--daemons/execd/cts-exec-helper.c624
-rw-r--r--daemons/execd/execd_alerts.c205
-rw-r--r--daemons/execd/execd_commands.c1927
-rw-r--r--daemons/execd/pacemaker-execd.c582
-rw-r--r--daemons/execd/pacemaker-execd.h110
-rw-r--r--daemons/execd/pacemaker-remoted.8.inc5
-rw-r--r--daemons/execd/pacemaker_remote.in176
-rw-r--r--daemons/execd/pacemaker_remote.service.in52
-rw-r--r--daemons/execd/remoted_pidone.c298
-rw-r--r--daemons/execd/remoted_proxy.c470
-rw-r--r--daemons/execd/remoted_tls.c428
12 files changed, 4953 insertions, 0 deletions
diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am
new file mode 100644
index 0000000..466f0df
--- /dev/null
+++ b/daemons/execd/Makefile.am
@@ -0,0 +1,76 @@
+#
+# Copyright 2012-2021 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
+#
+# This source code is licensed under the GNU Lesser General Public License
+# version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+#
+
+include $(top_srcdir)/mk/common.mk
+include $(top_srcdir)/mk/man.mk
+
+halibdir = $(CRM_DAEMON_DIR)
+
+halib_PROGRAMS = pacemaker-execd cts-exec-helper
+
+EXTRA_DIST = pacemaker-remoted.8.inc
+
+pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/fencing/libstonithd.la
+pacemaker_execd_SOURCES = pacemaker-execd.c execd_commands.c \
+ execd_alerts.c
+
+if BUILD_REMOTE
+sbin_PROGRAMS = pacemaker-remoted
+if BUILD_SYSTEMD
+systemdsystemunit_DATA = pacemaker_remote.service
+else
+initdir = $(INITDIR)
+init_SCRIPTS = pacemaker_remote
+endif
+
+pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE $(AM_CPPFLAGS)
+
+pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE)
+pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
+
+pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \
+ $(top_builddir)/lib/lrmd/liblrmd.la
+pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \
+ remoted_tls.c remoted_pidone.c remoted_proxy.c
+endif
+
+cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
+ $(top_builddir)/lib/lrmd/liblrmd.la \
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/pengine/libpe_status.la
+cts_exec_helper_SOURCES = cts-exec-helper.c
+
+noinst_HEADERS = pacemaker-execd.h
+
+CLEANFILES = $(man8_MANS)
+
+# Always create a symlink for the old pacemaker_remoted name, so that bundle
+# container images using a current Pacemaker will run on cluster nodes running
+# Pacemaker 1 (>=1.1.17).
+install-exec-hook:
+if BUILD_LEGACY_LINKS
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd && $(LN_S) pacemaker-execd lrmd
+endif
+if BUILD_REMOTE
+ cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted && $(LN_S) pacemaker-remoted pacemaker_remoted
+endif
+
+uninstall-hook:
+if BUILD_LEGACY_LINKS
+ cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd
+endif
+if BUILD_REMOTE
+ cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted
+endif
diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c
new file mode 100644
index 0000000..2af5e16
--- /dev/null
+++ b/daemons/execd/cts-exec-helper.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/services.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/mainloop.h>
+
+#include <crm/pengine/status.h>
+#include <crm/pengine/internal.h>
+#include <crm/cib.h>
+#include <crm/cib/internal.h>
+#include <crm/lrmd.h>
+
+#define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events"
+
+static int exec_call_id = 0;
+static gboolean start_test(gpointer user_data);
+static void try_connect(void);
+
+static char *key = NULL;
+static char *val = NULL;
+
+static struct {
+ int verbose;
+ int quiet;
+ guint interval_ms;
+ int timeout;
+ int start_delay;
+ int cancel_call_id;
+ gboolean no_wait;
+ gboolean is_running;
+ gboolean no_connect;
+ int exec_call_opts;
+ const char *api_call;
+ const char *rsc_id;
+ const char *provider;
+ const char *class;
+ const char *type;
+ const char *action;
+ const char *listen;
+ gboolean use_tls;
+ lrmd_key_value_t *params;
+} options;
+
+static gboolean
+interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ options.interval_ms = crm_parse_interval_spec(optarg);
+ return errno == 0;
+}
+
+static gboolean
+notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) {
+ options.exec_call_opts = lrmd_opt_notify_orig_only;
+ } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) {
+ options.exec_call_opts = lrmd_opt_notify_changes_only;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
+ if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) {
+ pcmk__str_update(&key, optarg);
+ } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) {
+ pcmk__str_update(&val, optarg);
+ }
+
+ if (key != NULL && val != NULL) {
+ options.params = lrmd_key_value_add(options.params, key, val);
+ pcmk__str_update(&key, NULL);
+ pcmk__str_update(&val, NULL);
+ }
+
+ return TRUE;
+}
+
+static GOptionEntry basic_entries[] = {
+ { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call,
+ "Directly relates to executor API functions",
+ NULL },
+
+ { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running,
+ "Determine if a resource is registered and running",
+ NULL },
+
+ { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen,
+ "Listen for a specific event string",
+ NULL },
+
+ { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait,
+ "Make api call and do not wait for result",
+ NULL },
+
+ { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
+ "Only notify client changes to recurring operations",
+ NULL },
+
+ { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
+ "Only notify this client of the results of an API action",
+ NULL },
+
+ { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls,
+ "Use TLS backend for local connection",
+ NULL },
+
+ { NULL }
+};
+
+static GOptionEntry api_call_entries[] = {
+ { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action,
+ NULL, NULL },
+
+ { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id,
+ NULL, NULL },
+
+ { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class,
+ NULL, NULL },
+
+ { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb,
+ NULL, NULL },
+
+ { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
+ NULL, NULL },
+
+ { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
+ NULL, NULL },
+
+ { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider,
+ NULL, NULL },
+
+ { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id,
+ NULL, NULL },
+
+ { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay,
+ NULL, NULL },
+
+ { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout,
+ NULL, NULL },
+
+ { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type,
+ NULL, NULL },
+
+ { NULL }
+};
+
+static GMainLoop *mainloop = NULL;
+static lrmd_t *lrmd_conn = NULL;
+
+static char event_buf_v0[1024];
+
+static crm_exit_t
+test_exit(crm_exit_t exit_code)
+{
+ lrmd_api_delete(lrmd_conn);
+ return crm_exit(exit_code);
+}
+
+#define print_result(fmt, args...) \
+ if (!options.quiet) { \
+ printf(fmt "\n" , ##args); \
+ }
+
+#define report_event(event) \
+ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
+ lrmd_event_type2str(event->type), \
+ event->rsc_id, \
+ event->op_type ? event->op_type : "none", \
+ services_ocf_exitcode_str(event->rc), \
+ pcmk_exec_status_str(event->op_status)); \
+ crm_info("%s", event_buf_v0);
+
+static void
+test_shutdown(int nsig)
+{
+ lrmd_api_delete(lrmd_conn);
+ lrmd_conn = NULL;
+}
+
+static void
+read_events(lrmd_event_data_t * event)
+{
+ report_event(event);
+ if (options.listen) {
+ if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) {
+ print_result("LISTEN EVENT SUCCESSFUL");
+ test_exit(CRM_EX_OK);
+ }
+ }
+
+ if (exec_call_id && (event->call_id == exec_call_id)) {
+ if (event->op_status == 0 && event->rc == 0) {
+ print_result("API-CALL SUCCESSFUL for 'exec'");
+ } else {
+ print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s",
+ event->rc, pcmk_exec_status_str(event->op_status));
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (!options.listen) {
+ test_exit(CRM_EX_OK);
+ }
+ }
+}
+
+static gboolean
+timeout_err(gpointer data)
+{
+ print_result("LISTEN EVENT FAILURE - timeout occurred, never found");
+ test_exit(CRM_EX_TIMEOUT);
+ return FALSE;
+}
+
+static void
+connection_events(lrmd_event_data_t * event)
+{
+ int rc = event->connection_rc;
+
+ if (event->type != lrmd_event_connect) {
+ /* ignore */
+ return;
+ }
+
+ if (!rc) {
+ crm_info("Executor client connection established");
+ start_test(NULL);
+ return;
+ } else {
+ sleep(1);
+ try_connect();
+ crm_notice("Executor client connection failed");
+ }
+}
+
+static void
+try_connect(void)
+{
+ int tries = 10;
+ static int num_tries = 0;
+ int rc = 0;
+
+ lrmd_conn->cmds->set_callback(lrmd_conn, connection_events);
+ for (; num_tries < tries; num_tries++) {
+ rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000);
+
+ if (!rc) {
+ return; /* we'll hear back in async callback */
+ }
+ sleep(1);
+ }
+
+ print_result("API CONNECTION FAILURE");
+ test_exit(CRM_EX_ERROR);
+}
+
+static gboolean
+start_test(gpointer user_data)
+{
+ int rc = 0;
+
+ if (!options.no_connect) {
+ if (!lrmd_conn->cmds->is_connected(lrmd_conn)) {
+ try_connect();
+ /* async connect -- this function will get called back into */
+ return 0;
+ }
+ }
+ lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
+
+ if (options.timeout) {
+ g_timeout_add(options.timeout, timeout_err, NULL);
+ }
+
+ if (!options.api_call) {
+ return 0;
+ }
+
+ if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->exec(lrmd_conn,
+ options.rsc_id,
+ options.action,
+ NULL,
+ options.interval_ms,
+ options.timeout,
+ options.start_delay,
+ options.exec_call_opts,
+ options.params);
+
+ if (rc > 0) {
+ exec_call_id = rc;
+ print_result("API-CALL 'exec' action pending, waiting on response");
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
+ options.rsc_id,
+ options.class, options.provider, options.type, 0);
+ } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) {
+ lrmd_rsc_info_t *rsc_info;
+
+ rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
+
+ if (rsc_info) {
+ print_result("RSC_INFO: id:%s class:%s provider:%s type:%s",
+ rsc_info->id, rsc_info->standard,
+ (rsc_info->provider? rsc_info->provider : "<none>"),
+ rsc_info->type);
+ lrmd_free_rsc_info(rsc_info);
+ rc = pcmk_ok;
+ } else {
+ rc = -1;
+ }
+ } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0);
+ } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) {
+ rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action,
+ options.interval_ms);
+ } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) {
+ char *output = NULL;
+
+ rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
+ options.class,
+ options.provider, options.type, &output, 0);
+ if (rc == pcmk_ok) {
+ print_result("%s", output);
+ free(output);
+ }
+ } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
+
+ if (rc > 0) {
+ print_result("%d agents found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no agents found");
+ rc = -1;
+ }
+ } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
+
+ if (rc > 0) {
+ print_result("%d providers found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no providers found");
+ rc = -1;
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) {
+ lrmd_list_t *list = NULL;
+ lrmd_list_t *iter = NULL;
+
+ rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list);
+
+ if (rc > 0) {
+ print_result("%d standards found", rc);
+ for (iter = list; iter != NULL; iter = iter->next) {
+ print_result("%s", iter->val);
+ }
+ lrmd_list_freeall(list);
+ rc = 0;
+ } else {
+ print_result("API_CALL FAILURE - no providers found");
+ rc = -1;
+ }
+
+ } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) {
+ GList *op_list = NULL;
+ GList *op_item = NULL;
+ rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0,
+ &op_list);
+
+ for (op_item = op_list; op_item != NULL; op_item = op_item->next) {
+ lrmd_op_info_t *op_info = op_item->data;
+
+ print_result("RECURRING_OP: %s_%s_%s timeout=%sms",
+ op_info->rsc_id, op_info->action,
+ op_info->interval_ms_s, op_info->timeout_ms_s);
+ lrmd_free_op_info(op_info);
+ }
+ g_list_free(op_list);
+
+ } else if (options.api_call) {
+ print_result("API-CALL FAILURE unknown action '%s'", options.action);
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (rc < 0) {
+ print_result("API-CALL FAILURE for '%s' api_rc:%d",
+ options.api_call, rc);
+ test_exit(CRM_EX_ERROR);
+ }
+
+ if (options.api_call && rc == pcmk_ok) {
+ print_result("API-CALL SUCCESSFUL for '%s'", options.api_call);
+ if (!options.listen) {
+ test_exit(CRM_EX_OK);
+ }
+ }
+
+ if (options.no_wait) {
+ /* just make the call and exit regardless of anything else. */
+ test_exit(CRM_EX_OK);
+ }
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Generate resource parameters from CIB if none explicitly given
+ *
+ * \return Standard Pacemaker return code
+ */
+static int
+generate_params(void)
+{
+ int rc = pcmk_rc_ok;
+ pe_working_set_t *data_set = NULL;
+ xmlNode *cib_xml_copy = NULL;
+ pe_resource_t *rsc = NULL;
+ GHashTable *params = NULL;
+ GHashTable *meta = NULL;
+ GHashTableIter iter;
+ char *key = NULL;
+ char *value = NULL;
+
+ if (options.params != NULL) {
+ return pcmk_rc_ok; // User specified parameters explicitly
+ }
+
+ // Retrieve and update CIB
+ rc = cib__signon_query(NULL, NULL, &cib_xml_copy);
+ if (rc != pcmk_rc_ok) {
+ return rc;
+ }
+ if (!cli_config_update(&cib_xml_copy, NULL, FALSE)) {
+ crm_err("Could not update CIB");
+ return pcmk_rc_cib_corrupt;
+ }
+
+ // Calculate cluster status
+ data_set = pe_new_working_set();
+ if (data_set == NULL) {
+ crm_crit("Could not allocate working set");
+ return ENOMEM;
+ }
+ pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat);
+ data_set->input = cib_xml_copy;
+ data_set->now = crm_time_new(NULL);
+ cluster_status(data_set);
+
+ // Find resource in CIB
+ rsc = pe_find_resource_with_flags(data_set->resources, options.rsc_id,
+ pe_find_renamed|pe_find_any);
+ if (rsc == NULL) {
+ crm_err("Resource does not exist in config");
+ pe_free_working_set(data_set);
+ return EINVAL;
+ }
+
+ // Add resource instance parameters to options.params
+ params = pe_rsc_params(rsc, NULL, data_set);
+ if (params != NULL) {
+ g_hash_table_iter_init(&iter, params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ options.params = lrmd_key_value_add(options.params, key, value);
+ }
+ }
+
+ // Add resource meta-attributes to options.params
+ meta = pcmk__strkey_table(free, free);
+ get_meta_attributes(meta, rsc, NULL, data_set);
+ g_hash_table_iter_init(&iter, meta);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &value)) {
+ char *crm_name = crm_meta_name(key);
+
+ options.params = lrmd_key_value_add(options.params, crm_name, value);
+ free(crm_name);
+ }
+ g_hash_table_destroy(meta);
+
+ pe_free_working_set(data_set);
+ return rc;
+}
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, NULL, group, NULL);
+
+ pcmk__add_main_args(context, basic_entries);
+ pcmk__add_arg_group(context, "api-call", "API Call Options:",
+ "Parameters for api-call option", api_call_entries);
+
+ return context;
+}
+
+int
+main(int argc, char **argv)
+{
+ GError *error = NULL;
+ crm_exit_t exit_code = CRM_EX_OK;
+ crm_trigger_t *trig = NULL;
+
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+ /* Typically we'd pass all the single character options that take an argument
+ * as the second parameter here (and there's a bunch of those in this tool).
+ * However, we control how this program is called so we can just not call it
+ * in a way where the preprocessing ever matters.
+ */
+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
+ GOptionContext *context = build_arg_context(args, NULL);
+
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ /* We have to use crm_log_init here to set up the logging because there's
+ * different handling for daemons vs. command line programs, and
+ * pcmk__cli_init_logging is set up to only handle the latter.
+ */
+ crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc,
+ argv, FALSE);
+
+ for (int i = 0; i < args->verbosity; i++) {
+ crm_bump_log_level(argc, argv);
+ }
+
+ if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents",
+ "list_standards", "list_ocf_providers", NULL)) {
+ options.no_connect = TRUE;
+ }
+
+ if (options.is_running) {
+ int rc = pcmk_rc_ok;
+
+ if (options.rsc_id == NULL) {
+ exit_code = CRM_EX_USAGE;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "--is-running requires --rsc-id");
+ goto done;
+ }
+
+ options.interval_ms = 0;
+ if (options.timeout == 0) {
+ options.timeout = 30000;
+ }
+
+ rc = generate_params();
+ if (rc != pcmk_rc_ok) {
+ exit_code = pcmk_rc2exitc(rc);
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Can not determine resource status: "
+ "unable to get parameters from CIB");
+ goto done;
+ }
+ options.api_call = "exec";
+ options.action = "monitor";
+ options.exec_call_opts = lrmd_opt_notify_orig_only;
+ }
+
+ if (!options.api_call && !options.listen) {
+ exit_code = CRM_EX_USAGE;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Must specify at least one of --api-call, --listen, "
+ "or --is-running");
+ goto done;
+ }
+
+ if (options.use_tls) {
+ lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0);
+ } else {
+ lrmd_conn = lrmd_api_new();
+ }
+ trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
+ mainloop_set_trigger(trig);
+ mainloop_add_signal(SIGTERM, test_shutdown);
+
+ crm_info("Starting");
+ mainloop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(mainloop);
+
+done:
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ free(key);
+ free(val);
+
+ pcmk__output_and_clear_error(&error, NULL);
+ return test_exit(exit_code);
+}
diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c
new file mode 100644
index 0000000..5944d93
--- /dev/null
+++ b/daemons/execd/execd_alerts.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2016-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+
+#include <crm/crm.h>
+#include <crm/services.h>
+#include <crm/services_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/alerts_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-execd.h"
+
+/* Track in-flight alerts so we can wait for them at shutdown */
+static GHashTable *inflight_alerts; /* key = call_id, value = timeout */
+static gboolean draining_alerts = FALSE;
+
+static inline void
+add_inflight_alert(int call_id, int timeout)
+{
+ if (inflight_alerts == NULL) {
+ inflight_alerts = pcmk__intkey_table(NULL);
+ }
+ pcmk__intkey_table_insert(inflight_alerts, call_id,
+ GINT_TO_POINTER(timeout));
+}
+
+static inline void
+remove_inflight_alert(int call_id)
+{
+ if (inflight_alerts != NULL) {
+ pcmk__intkey_table_remove(inflight_alerts, call_id);
+ }
+}
+
+static int
+max_inflight_timeout(void)
+{
+ GHashTableIter iter;
+ gpointer timeout;
+ int max_timeout = 0;
+
+ if (inflight_alerts) {
+ g_hash_table_iter_init(&iter, inflight_alerts);
+ while (g_hash_table_iter_next(&iter, NULL, &timeout)) {
+ if (GPOINTER_TO_INT(timeout) > max_timeout) {
+ max_timeout = GPOINTER_TO_INT(timeout);
+ }
+ }
+ }
+ return max_timeout;
+}
+
+struct alert_cb_s {
+ char *client_id;
+ int call_id;
+};
+
+static void
+alert_complete(svc_action_t *action)
+{
+ struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data);
+
+ CRM_CHECK(cb_data != NULL, return);
+
+ remove_inflight_alert(cb_data->call_id);
+
+ if (action->status != PCMK_EXEC_DONE) {
+ const char *reason = services__exit_reason(action);
+
+ crm_notice("Could not send alert: %s%s%s%s " CRM_XS " client=%s",
+ pcmk_exec_status_str(action->status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")",
+ cb_data->client_id);
+
+ } else if (action->rc != 0) {
+ crm_notice("Alert [%d] completed but exited with status %d "
+ CRM_XS " client=%s",
+ action->pid, action->rc, cb_data->client_id);
+
+ } else {
+ crm_debug("Alert [%d] completed " CRM_XS " client=%s",
+ action->pid, cb_data->client_id);
+ }
+
+ free(cb_data->client_id);
+ free(action->cb_data);
+ action->cb_data = NULL;
+}
+
+int
+process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ static int alert_sequence_no = 0;
+
+ xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR);
+ const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID);
+ const char *alert_path = crm_element_value(alert_xml, F_LRMD_ALERT_PATH);
+ svc_action_t *action = NULL;
+ int alert_timeout = 0;
+ int rc = pcmk_ok;
+ GHashTable *params = NULL;
+ struct alert_cb_s *cb_data = NULL;
+
+ if ((alert_id == NULL) || (alert_path == NULL) ||
+ (client == NULL) || (client->id == NULL)) { /* hint static analyzer */
+ return -EINVAL;
+ }
+ if (draining_alerts) {
+ return pcmk_ok;
+ }
+
+ crm_element_value_int(alert_xml, F_LRMD_TIMEOUT, &alert_timeout);
+
+ crm_info("Executing alert %s for %s", alert_id, client->id);
+
+ params = xml2list(alert_xml);
+ pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence,
+ ++alert_sequence_no);
+
+ cb_data = calloc(1, sizeof(struct alert_cb_s));
+ if (cb_data == NULL) {
+ rc = -errno;
+ goto err;
+ }
+
+ /* coverity[deref_ptr] False Positive */
+ cb_data->client_id = strdup(client->id);
+ if (cb_data->client_id == NULL) {
+ rc = -errno;
+ goto err;
+ }
+
+ crm_element_value_int(request, F_LRMD_CALLID, &(cb_data->call_id));
+
+ action = services_alert_create(alert_id, alert_path, alert_timeout, params,
+ alert_sequence_no, cb_data);
+ if (action->rc != PCMK_OCF_UNKNOWN) {
+ rc = -E2BIG;
+ goto err;
+ }
+
+ rc = services_action_user(action, CRM_DAEMON_USER);
+ if (rc < 0) {
+ goto err;
+ }
+
+ add_inflight_alert(cb_data->call_id, alert_timeout);
+ if (services_alert_async(action, alert_complete) == FALSE) {
+ services_action_free(action);
+ }
+ return pcmk_ok;
+
+err:
+ if (cb_data) {
+ if (cb_data->client_id) {
+ free(cb_data->client_id);
+ }
+ free(cb_data);
+ }
+ services_action_free(action);
+ return rc;
+}
+
+static bool
+drain_check(guint remaining_timeout_ms)
+{
+ if (inflight_alerts != NULL) {
+ guint count = g_hash_table_size(inflight_alerts);
+
+ if (count > 0) {
+ crm_trace("%d alerts pending (%.3fs timeout remaining)",
+ count, remaining_timeout_ms / 1000.0);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+void
+lrmd_drain_alerts(GMainLoop *mloop)
+{
+ if (inflight_alerts != NULL) {
+ guint timer_ms = max_inflight_timeout() + 5000;
+
+ crm_trace("Draining in-flight alerts (timeout %.3fs)",
+ timer_ms / 1000.0);
+ draining_alerts = TRUE;
+ pcmk_drain_main_loop(mloop, timer_ms, drain_check);
+ g_hash_table_destroy(inflight_alerts);
+ inflight_alerts = NULL;
+ }
+}
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
new file mode 100644
index 0000000..fa2761e
--- /dev/null
+++ b/daemons/execd/execd_commands.c
@@ -0,0 +1,1927 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+#include <crm/fencing/internal.h>
+
+#include <glib.h>
+
+// Check whether we have a high-resolution monotonic clock
+#undef PCMK__TIME_USE_CGT
+#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
+# define PCMK__TIME_USE_CGT
+# include <time.h> /* clock_gettime */
+#endif
+
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/fencing/internal.h>
+#include <crm/services.h>
+#include <crm/services_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/msg_xml.h>
+
+#include "pacemaker-execd.h"
+
+GHashTable *rsc_list = NULL;
+
+typedef struct lrmd_cmd_s {
+ int timeout;
+ guint interval_ms;
+ int start_delay;
+ int timeout_orig;
+
+ int call_id;
+
+ int call_opts;
+ /* Timer ids, must be removed on cmd destruction. */
+ int delay_id;
+ int stonith_recurring_id;
+
+ int rsc_deleted;
+
+ int service_flags;
+
+ char *client_id;
+ char *origin;
+ char *rsc_id;
+ char *action;
+ char *real_action;
+ char *userdata_str;
+
+ pcmk__action_result_t result;
+
+ /* We can track operation queue time and run time, to be saved with the CIB
+ * resource history (and displayed in cluster status). We need
+ * high-resolution monotonic time for this purpose, so we use
+ * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
+ * is disabled).
+ *
+ * However, we also need epoch timestamps for recording the time the command
+ * last ran and the time its return value last changed, for use in time
+ * displays (as opposed to interval calculations). We keep time_t values for
+ * this purpose.
+ *
+ * The last run time is used for both purposes, so we keep redundant
+ * monotonic and epoch values for this. Technically the two could represent
+ * different times, but since time_t has only second resolution and the
+ * values are used for distinct purposes, that is not significant.
+ */
+#ifdef PCMK__TIME_USE_CGT
+ /* Recurring and systemd operations may involve more than one executor
+ * command per operation, so they need info about the original and the most
+ * recent.
+ */
+ struct timespec t_first_run; // When op first ran
+ struct timespec t_run; // When op most recently ran
+ struct timespec t_first_queue; // When op was first queued
+ struct timespec t_queue; // When op was most recently queued
+#endif
+ time_t epoch_last_run; // Epoch timestamp of when op last ran
+ time_t epoch_rcchange; // Epoch timestamp of when rc last changed
+
+ bool first_notify_sent;
+ int last_notify_rc;
+ int last_notify_op_status;
+ int last_pid;
+
+ GHashTable *params;
+} lrmd_cmd_t;
+
+static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
+static gboolean execute_resource_action(gpointer user_data);
+static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
+
+#ifdef PCMK__TIME_USE_CGT
+
+/*!
+ * \internal
+ * \brief Check whether a struct timespec has been set
+ *
+ * \param[in] timespec Time to check
+ *
+ * \return true if timespec has been set (i.e. is nonzero), false otherwise
+ */
+static inline bool
+time_is_set(const struct timespec *timespec)
+{
+ return (timespec != NULL) &&
+ ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
+}
+
+/*
+ * \internal
+ * \brief Set a timespec (and its original if unset) to the current time
+ *
+ * \param[out] t_current Where to store current time
+ * \param[out] t_orig Where to copy t_current if unset
+ */
+static void
+get_current_time(struct timespec *t_current, struct timespec *t_orig)
+{
+ clock_gettime(CLOCK_MONOTONIC, t_current);
+ if ((t_orig != NULL) && !time_is_set(t_orig)) {
+ *t_orig = *t_current;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Return difference between two times in milliseconds
+ *
+ * \param[in] now More recent time (or NULL to use current time)
+ * \param[in] old Earlier time
+ *
+ * \return milliseconds difference (or 0 if old is NULL or unset)
+ *
+ * \note Can overflow on 32bit machines when the differences is around
+ * 24 days or more.
+ */
+static int
+time_diff_ms(const struct timespec *now, const struct timespec *old)
+{
+ int diff_ms = 0;
+
+ if (time_is_set(old)) {
+ struct timespec local_now = { 0, };
+
+ if (now == NULL) {
+ clock_gettime(CLOCK_MONOTONIC, &local_now);
+ now = &local_now;
+ }
+ diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ + (now->tv_nsec - old->tv_nsec) / 1000000;
+ }
+ return diff_ms;
+}
+
+/*!
+ * \internal
+ * \brief Reset a command's operation times to their original values.
+ *
+ * Reset a command's run and queued timestamps to the timestamps of the original
+ * command, so we report the entire time since then and not just the time since
+ * the most recent command (for recurring and systemd operations).
+ *
+ * \param[in,out] cmd Executor command object to reset
+ *
+ * \note It's not obvious what the queued time should be for a systemd
+ * start/stop operation, which might go like this:
+ * initial command queued 5ms, runs 3s
+ * monitor command queued 10ms, runs 10s
+ * monitor command queued 10ms, runs 10s
+ * Is the queued time for that operation 5ms, 10ms or 25ms? The current
+ * implementation will report 5ms. If it's 25ms, then we need to
+ * subtract 20ms from the total exec time so as not to count it twice.
+ * We can implement that later if it matters to anyone ...
+ */
+static void
+cmd_original_times(lrmd_cmd_t * cmd)
+{
+ cmd->t_run = cmd->t_first_run;
+ cmd->t_queue = cmd->t_first_queue;
+}
+#endif
+
+static inline bool
+action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
+{
+ return (cmd->interval_ms == interval_ms)
+ && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
+}
+
+/*!
+ * \internal
+ * \brief Log the result of an asynchronous command
+ *
+ * \param[in] cmd Command to log result for
+ * \param[in] exec_time_ms Execution time in milliseconds, if known
+ * \param[in] queue_time_ms Queue time in milliseconds, if known
+ */
+static void
+log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
+{
+ int log_level = LOG_INFO;
+ GString *str = g_string_sized_new(100); // reasonable starting size
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ log_level = LOG_DEBUG;
+ }
+
+ g_string_append_printf(str, "%s %s (call %d",
+ cmd->rsc_id, cmd->action, cmd->call_id);
+ if (cmd->last_pid != 0) {
+ g_string_append_printf(str, ", PID %d", cmd->last_pid);
+ }
+ if (cmd->result.execution_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(str, ") exited with status %d",
+ cmd->result.exit_status);
+ } else {
+ pcmk__g_strcat(str, ") could not be executed: ",
+ pcmk_exec_status_str(cmd->result.execution_status),
+ NULL);
+ }
+ if (cmd->result.exit_reason != NULL) {
+ pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
+ }
+
+#ifdef PCMK__TIME_USE_CGT
+ pcmk__g_strcat(str, " (execution time ",
+ pcmk__readable_interval(exec_time_ms), NULL);
+ if (queue_time_ms > 0) {
+ pcmk__g_strcat(str, " after being queued ",
+ pcmk__readable_interval(queue_time_ms), NULL);
+ }
+ g_string_append_c(str, ')');
+#endif
+
+ do_crm_log(log_level, "%s", str->str);
+ g_string_free(str, TRUE);
+}
+
+static void
+log_execute(lrmd_cmd_t * cmd)
+{
+ int log_level = LOG_INFO;
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ log_level = LOG_DEBUG;
+ }
+
+ do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
+ cmd->rsc_id, cmd->action, cmd->call_id);
+}
+
+static const char *
+normalize_action_name(lrmd_rsc_t * rsc, const char *action)
+{
+ if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
+ pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
+ return "status";
+ }
+ return action;
+}
+
+static lrmd_rsc_t *
+build_rsc_from_xml(xmlNode * msg)
+{
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
+ lrmd_rsc_t *rsc = NULL;
+
+ rsc = calloc(1, sizeof(lrmd_rsc_t));
+
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
+
+ rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
+ rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
+ rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
+ rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
+ rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
+ rsc);
+
+ // Initialize fence device probes (to return "not running")
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+ return rsc;
+}
+
+static lrmd_cmd_t *
+create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
+{
+ int call_options = 0;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
+ lrmd_cmd_t *cmd = NULL;
+
+ cmd = calloc(1, sizeof(lrmd_cmd_t));
+
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
+ cmd->call_opts = call_options;
+ cmd->client_id = strdup(client->id);
+
+ crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
+ crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
+ crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
+ crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
+ cmd->timeout_orig = cmd->timeout;
+
+ cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
+ cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
+ cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
+ cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
+
+ cmd->params = xml2list(rsc_xml);
+
+ if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
+ crm_debug("Setting flag to leave pid group on timeout and "
+ "only kill action pid for " PCMK__OP_FMT,
+ cmd->rsc_id, cmd->action, cmd->interval_ms);
+ cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
+ LOG_TRACE, "Action",
+ cmd->action, 0,
+ SVC_ACTION_LEAVE_GROUP,
+ "SVC_ACTION_LEAVE_GROUP");
+ }
+ return cmd;
+}
+
+static void
+stop_recurring_timer(lrmd_cmd_t *cmd)
+{
+ if (cmd) {
+ if (cmd->stonith_recurring_id) {
+ g_source_remove(cmd->stonith_recurring_id);
+ }
+ cmd->stonith_recurring_id = 0;
+ }
+}
+
+static void
+free_lrmd_cmd(lrmd_cmd_t * cmd)
+{
+ stop_recurring_timer(cmd);
+ if (cmd->delay_id) {
+ g_source_remove(cmd->delay_id);
+ }
+ if (cmd->params) {
+ g_hash_table_destroy(cmd->params);
+ }
+ pcmk__reset_result(&(cmd->result));
+ free(cmd->origin);
+ free(cmd->action);
+ free(cmd->real_action);
+ free(cmd->userdata_str);
+ free(cmd->rsc_id);
+ free(cmd->client_id);
+ free(cmd);
+}
+
+static gboolean
+stonith_recurring_op_helper(gpointer data)
+{
+ lrmd_cmd_t *cmd = data;
+ lrmd_rsc_t *rsc;
+
+ cmd->stonith_recurring_id = 0;
+
+ if (!cmd->rsc_id) {
+ return FALSE;
+ }
+
+ rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+
+ CRM_ASSERT(rsc != NULL);
+ /* take it out of recurring_ops list, and put it in the pending ops
+ * to be executed */
+ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
+ rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
+#endif
+ mainloop_set_trigger(rsc->work);
+
+ return FALSE;
+}
+
+static inline void
+start_recurring_timer(lrmd_cmd_t *cmd)
+{
+ if (cmd && (cmd->interval_ms > 0)) {
+ cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
+ stonith_recurring_op_helper,
+ cmd);
+ }
+}
+
+static gboolean
+start_delay_helper(gpointer data)
+{
+ lrmd_cmd_t *cmd = data;
+ lrmd_rsc_t *rsc = NULL;
+
+ cmd->delay_id = 0;
+ rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
+
+ if (rsc) {
+ mainloop_set_trigger(rsc->work);
+ }
+
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Check whether a list already contains the equivalent of a given action
+ *
+ * \param[in] action_list List to search
+ * \param[in] cmd Action to search for
+ */
+static lrmd_cmd_t *
+find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
+{
+ for (const GList *item = action_list; item != NULL; item = item->next) {
+ lrmd_cmd_t *dup = item->data;
+
+ if (action_matches(cmd, dup->action, dup->interval_ms)) {
+ return dup;
+ }
+ }
+ return NULL;
+}
+
+static bool
+merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+{
+ lrmd_cmd_t * dup = NULL;
+ bool dup_pending = true;
+
+ if (cmd->interval_ms == 0) {
+ return false;
+ }
+
+ // Search for a duplicate of this action (in-flight or not)
+ dup = find_duplicate_action(rsc->pending_ops, cmd);
+ if (dup == NULL) {
+ dup_pending = false;
+ dup = find_duplicate_action(rsc->recurring_ops, cmd);
+ if (dup == NULL) {
+ return false;
+ }
+ }
+
+ /* Do not merge fencing monitors marked for cancellation, so we can reply to
+ * the cancellation separately.
+ */
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei)
+ && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
+ return false;
+ }
+
+ /* This should not occur. If it does, we need to investigate how something
+ * like this is possible in the controller.
+ */
+ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
+ "), merging with previous op entry",
+ rsc->rsc_id, normalize_action_name(rsc, dup->action),
+ dup->interval_ms);
+
+ // Merge new action's call ID and user data into existing action
+ dup->first_notify_sent = false;
+ free(dup->userdata_str);
+ dup->userdata_str = cmd->userdata_str;
+ cmd->userdata_str = NULL;
+ dup->call_id = cmd->call_id;
+ free_lrmd_cmd(cmd);
+ cmd = NULL;
+
+ /* If dup is not pending, that means it has already executed at least once
+ * and is waiting in the interval. In that case, stop waiting and initiate
+ * a new instance now.
+ */
+ if (!dup_pending) {
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei)) {
+ stop_recurring_timer(dup);
+ stonith_recurring_op_helper(dup);
+ } else {
+ services_action_kick(rsc->rsc_id,
+ normalize_action_name(rsc, dup->action),
+ dup->interval_ms);
+ }
+ }
+ return true;
+}
+
+static void
+schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+{
+ CRM_CHECK(cmd != NULL, return);
+ CRM_CHECK(rsc != NULL, return);
+
+ crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
+
+ if (merge_recurring_duplicate(rsc, cmd)) {
+ // Equivalent of cmd has already been scheduled
+ return;
+ }
+
+ /* The controller expects the executor to automatically cancel
+ * recurring operations before a resource stops.
+ */
+ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ cancel_all_recurring(rsc, NULL);
+ }
+
+ rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
+#endif
+ mainloop_set_trigger(rsc->work);
+
+ if (cmd->start_delay) {
+ cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+ }
+}
+
+static xmlNode *
+create_lrmd_reply(const char *origin, int rc, int call_id)
+{
+ xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
+
+ crm_xml_add(reply, F_LRMD_ORIGIN, origin);
+ crm_xml_add_int(reply, F_LRMD_RC, rc);
+ crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
+ return reply;
+}
+
+static void
+send_client_notify(gpointer key, gpointer value, gpointer user_data)
+{
+ xmlNode *update_msg = user_data;
+ pcmk__client_t *client = value;
+ int rc;
+ int log_level = LOG_WARNING;
+ const char *msg = NULL;
+
+ CRM_CHECK(client != NULL, return);
+ if (client->name == NULL) {
+ crm_trace("Skipping notification to client without name");
+ return;
+ }
+ if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
+ /* We only want to notify clients of the executor IPC API. If we are
+ * running as Pacemaker Remote, we may have clients proxied to other
+ * IPC services in the cluster, so skip those.
+ */
+ crm_trace("Skipping executor API notification to client %s",
+ pcmk__client_name(client));
+ return;
+ }
+
+ rc = lrmd_server_send_notify(client, update_msg);
+ if (rc == pcmk_rc_ok) {
+ return;
+ }
+
+ switch (rc) {
+ case ENOTCONN:
+ case EPIPE: // Client exited without waiting for notification
+ log_level = LOG_INFO;
+ msg = "Disconnected";
+ break;
+
+ default:
+ msg = pcmk_rc_str(rc);
+ break;
+ }
+ do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
+ pcmk__client_name(client), msg, rc);
+}
+
+static void
+send_cmd_complete_notify(lrmd_cmd_t * cmd)
+{
+ xmlNode *notify = NULL;
+ int exec_time = 0;
+ int queue_time = 0;
+
+#ifdef PCMK__TIME_USE_CGT
+ exec_time = time_diff_ms(NULL, &(cmd->t_run));
+ queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
+#endif
+ log_finished(cmd, exec_time, queue_time);
+
+ /* If the originator requested to be notified only for changes in recurring
+ * operation results, skip the notification if the result hasn't changed.
+ */
+ if (cmd->first_notify_sent
+ && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
+ && (cmd->last_notify_rc == cmd->result.exit_status)
+ && (cmd->last_notify_op_status == cmd->result.execution_status)) {
+ return;
+ }
+
+ cmd->first_notify_sent = true;
+ cmd->last_notify_rc = cmd->result.exit_status;
+ cmd->last_notify_op_status = cmd->result.execution_status;
+
+ notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+
+ crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
+ crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
+ crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
+ crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
+ crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
+ crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
+ crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
+
+ crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
+ (long long) cmd->epoch_last_run);
+ crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
+ (long long) cmd->epoch_rcchange);
+#ifdef PCMK__TIME_USE_CGT
+ crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
+ crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
+#endif
+
+ crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
+ crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
+ if(cmd->real_action) {
+ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
+ } else {
+ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
+ }
+ crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
+ crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
+
+ if (cmd->result.action_stderr != NULL) {
+ crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
+
+ } else if (cmd->result.action_stdout != NULL) {
+ crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
+ }
+
+ if (cmd->params) {
+ char *key = NULL;
+ char *value = NULL;
+ GHashTableIter iter;
+
+ xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
+
+ g_hash_table_iter_init(&iter, cmd->params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ hash2smartfield((gpointer) key, (gpointer) value, args);
+ }
+ }
+ if ((cmd->client_id != NULL)
+ && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
+
+ pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
+
+ if (client != NULL) {
+ send_client_notify(client->id, client, notify);
+ }
+ } else {
+ pcmk__foreach_ipc_client(send_client_notify, notify);
+ }
+
+ free_xml(notify);
+}
+
+static void
+send_generic_notify(int rc, xmlNode * request)
+{
+ if (pcmk__ipc_client_count() != 0) {
+ int call_id = 0;
+ xmlNode *notify = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ const char *op = crm_element_value(request, F_LRMD_OPERATION);
+
+ crm_element_value_int(request, F_LRMD_CALLID, &call_id);
+
+ notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+ crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add_int(notify, F_LRMD_RC, rc);
+ crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
+ crm_xml_add(notify, F_LRMD_OPERATION, op);
+ crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
+
+ pcmk__foreach_ipc_client(send_client_notify, notify);
+
+ free_xml(notify);
+ }
+}
+
+static void
+cmd_reset(lrmd_cmd_t * cmd)
+{
+ cmd->last_pid = 0;
+#ifdef PCMK__TIME_USE_CGT
+ memset(&cmd->t_run, 0, sizeof(cmd->t_run));
+ memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
+#endif
+ cmd->epoch_last_run = 0;
+
+ pcmk__reset_result(&(cmd->result));
+ cmd->result.execution_status = PCMK_EXEC_DONE;
+}
+
+static void
+cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
+{
+ crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
+ rsc ? rsc->active : NULL, cmd);
+
+ if (rsc && (rsc->active == cmd)) {
+ rsc->active = NULL;
+ mainloop_set_trigger(rsc->work);
+ }
+
+ if (!rsc) {
+ cmd->rsc_deleted = 1;
+ }
+
+ /* reset original timeout so client notification has correct information */
+ cmd->timeout = cmd->timeout_orig;
+
+ send_cmd_complete_notify(cmd);
+
+ if ((cmd->interval_ms != 0)
+ && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
+
+ if (rsc) {
+ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
+ rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
+ }
+ free_lrmd_cmd(cmd);
+ } else if (cmd->interval_ms == 0) {
+ if (rsc) {
+ rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
+ }
+ free_lrmd_cmd(cmd);
+ } else {
+ /* Clear all the values pertaining just to the last iteration of a recurring op. */
+ cmd_reset(cmd);
+ }
+}
+
+struct notify_new_client_data {
+ xmlNode *notify;
+ pcmk__client_t *new_client;
+};
+
+static void
+notify_one_client(gpointer key, gpointer value, gpointer user_data)
+{
+ pcmk__client_t *client = value;
+ struct notify_new_client_data *data = user_data;
+
+ if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
+ send_client_notify(key, (gpointer) client, (gpointer) data->notify);
+ }
+}
+
+void
+notify_of_new_client(pcmk__client_t *new_client)
+{
+ struct notify_new_client_data data;
+
+ data.new_client = new_client;
+ data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
+ crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
+ crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
+ pcmk__foreach_ipc_client(notify_one_client, &data);
+ free_xml(data.notify);
+}
+
+void
+client_disconnect_cleanup(const char *client_id)
+{
+ GHashTableIter iter;
+ lrmd_rsc_t *rsc = NULL;
+ char *key = NULL;
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
+ if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
+ /* This client is disconnecting, drop any recurring operations
+ * it may have initiated on the resource */
+ cancel_all_recurring(rsc, client_id);
+ }
+ }
+}
+
+static void
+action_complete(svc_action_t * action)
+{
+ lrmd_rsc_t *rsc;
+ lrmd_cmd_t *cmd = action->cb_data;
+ enum ocf_exitcode code;
+
+#ifdef PCMK__TIME_USE_CGT
+ const char *rclass = NULL;
+ bool goagain = false;
+#endif
+
+ if (!cmd) {
+ crm_err("Completed executor action (%s) does not match any known operations",
+ action->id);
+ return;
+ }
+
+#ifdef PCMK__TIME_USE_CGT
+ if (cmd->result.exit_status != action->rc) {
+ cmd->epoch_rcchange = time(NULL);
+ }
+#endif
+
+ cmd->last_pid = action->pid;
+
+ // Cast variable instead of function return to keep compilers happy
+ code = services_result2ocf(action->standard, cmd->action, action->rc);
+ pcmk__set_result(&(cmd->result), (int) code,
+ action->status, services__exit_reason(action));
+
+ rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
+
+#ifdef PCMK__TIME_USE_CGT
+ if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
+ rclass = resources_find_service_class(rsc->type);
+ } else if(rsc) {
+ rclass = rsc->class;
+ }
+
+ if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
+ if (pcmk__result_ok(&(cmd->result))
+ && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
+ /* systemd returns from start and stop actions after the action
+ * begins, not after it completes. We have to jump through a few
+ * hoops so that we don't report 'complete' to the rest of pacemaker
+ * until it's actually done.
+ */
+ goagain = true;
+ cmd->real_action = cmd->action;
+ cmd->action = strdup("monitor");
+
+ } else if (cmd->real_action != NULL) {
+ // This is follow-up monitor to check whether start/stop completed
+ if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
+ goagain = true;
+
+ } else if (pcmk__result_ok(&(cmd->result))
+ && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
+ goagain = true;
+
+ } else {
+ int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
+ int timeout_left = cmd->timeout_orig - time_sum;
+
+ crm_debug("%s systemd %s is now complete (elapsed=%dms, "
+ "remaining=%dms): %s (%d)",
+ cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
+ services_ocf_exitcode_str(cmd->result.exit_status),
+ cmd->result.exit_status);
+ cmd_original_times(cmd);
+
+ // Monitors may return "not running", but start/stop shouldn't
+ if ((cmd->result.execution_status == PCMK_EXEC_DONE)
+ && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
+
+ if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
+ cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
+ } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
+ cmd->result.exit_status = PCMK_OCF_OK;
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if SUPPORT_NAGIOS
+ if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
+ if (action_matches(cmd, "monitor", 0)
+ && pcmk__result_ok(&(cmd->result))) {
+ /* Successfully executed --version for the nagios plugin */
+ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
+
+ } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
+ && !pcmk__result_ok(&(cmd->result))) {
+#ifdef PCMK__TIME_USE_CGT
+ goagain = true;
+#endif
+ }
+ }
+#endif
+
+#ifdef PCMK__TIME_USE_CGT
+ if (goagain) {
+ int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
+ int timeout_left = cmd->timeout_orig - time_sum;
+ int delay = cmd->timeout_orig / 10;
+
+ if(delay >= timeout_left && timeout_left > 20) {
+ delay = timeout_left/2;
+ }
+
+ delay = QB_MIN(2000, delay);
+ if (delay < timeout_left) {
+ cmd->start_delay = delay;
+ cmd->timeout = timeout_left;
+
+ if (pcmk__result_ok(&(cmd->result))) {
+ crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
+
+ } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
+ crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
+
+ } else {
+ crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
+ cmd->rsc_id, cmd->action,
+ services_ocf_exitcode_str(cmd->result.exit_status),
+ cmd->result.exit_status, time_sum, timeout_left,
+ delay);
+ }
+
+ cmd_reset(cmd);
+ if(rsc) {
+ rsc->active = NULL;
+ }
+ schedule_lrmd_cmd(rsc, cmd);
+
+ /* Don't finalize cmd, we're not done with it yet */
+ return;
+
+ } else {
+ crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
+ cmd->rsc_id,
+ (cmd->real_action? cmd->real_action : cmd->action),
+ cmd->result.exit_status, time_sum, timeout_left);
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_TIMEOUT,
+ "Investigate reason for timeout, and adjust "
+ "configured operation timeout if necessary");
+ cmd_original_times(cmd);
+ }
+ }
+#endif
+
+ pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
+ services__grab_stderr(action));
+ cmd_finalize(cmd, rsc);
+}
+
+/*!
+ * \internal
+ * \brief Process the result of a fence device action (start, stop, or monitor)
+ *
+ * \param[in,out] cmd Fence device action that completed
+ * \param[in] exit_status Fencer API exit status for action
+ * \param[in] execution_status Fencer API execution status for action
+ * \param[in] exit_reason Human-friendly detail, if action failed
+ */
+static void
+stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
+ enum pcmk_exec_status execution_status,
+ const char *exit_reason)
+{
+ // This can be NULL if resource was removed before command completed
+ lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+
+ // Simplify fencer exit status to uniform exit status
+ if (exit_status != CRM_EX_OK) {
+ exit_status = PCMK_OCF_UNKNOWN_ERROR;
+ }
+
+ if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
+ /* An in-flight fence action was cancelled. The execution status is
+ * already correct, so don't overwrite it.
+ */
+ execution_status = PCMK_EXEC_CANCELLED;
+
+ } else {
+ /* Some execution status codes have specific meanings for the fencer
+ * that executor clients may not expect, so map them to a simple error
+ * status.
+ */
+ switch (execution_status) {
+ case PCMK_EXEC_NOT_CONNECTED:
+ case PCMK_EXEC_INVALID:
+ execution_status = PCMK_EXEC_ERROR;
+ break;
+
+ case PCMK_EXEC_NO_FENCE_DEVICE:
+ /* This should be possible only for probes in practice, but
+ * interpret for all actions to be safe.
+ */
+ if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS,
+ pcmk__str_none)) {
+ exit_status = PCMK_OCF_NOT_RUNNING;
+
+ } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP,
+ pcmk__str_none)) {
+ exit_status = PCMK_OCF_OK;
+
+ } else {
+ exit_status = PCMK_OCF_NOT_INSTALLED;
+ }
+ execution_status = PCMK_EXEC_ERROR;
+ break;
+
+ case PCMK_EXEC_NOT_SUPPORTED:
+ exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
+
+ // Certain successful actions change the known state of the resource
+ if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
+
+ if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
+ PCMK_EXEC_DONE, NULL); // "running"
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
+ }
+ }
+
+ /* The recurring timer should not be running at this point in any case, but
+ * as a failsafe, stop it if it is.
+ */
+ stop_recurring_timer(cmd);
+
+ /* Reschedule this command if appropriate. If a recurring command is *not*
+ * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
+ * not be removed from recurring_ops by cmd_finalize().
+ */
+ if (rsc && (cmd->interval_ms > 0)
+ && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
+ start_recurring_timer(cmd);
+ }
+
+ cmd_finalize(cmd, rsc);
+}
+
+static void
+lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
+{
+ if ((data == NULL) || (data->userdata == NULL)) {
+ crm_err("Ignoring fence action result: "
+ "Invalid callback arguments (bug?)");
+ } else {
+ stonith_action_complete((lrmd_cmd_t *) data->userdata,
+ stonith__exit_status(data),
+ stonith__execution_status(data),
+ stonith__exit_reason(data));
+ }
+}
+
+void
+stonith_connection_failed(void)
+{
+ GHashTableIter iter;
+ lrmd_rsc_t *rsc = NULL;
+
+ crm_warn("Connection to fencer lost (any pending operations for "
+ "fence devices will be considered failed)");
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
+ if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_none)) {
+ continue;
+ }
+
+ /* If we registered this fence device, we don't know whether the
+ * fencer still has the registration or not. Cause future probes to
+ * return an error until the resource is stopped or started
+ * successfully. This is especially important if the controller also
+ * went away (possibly due to a cluster layer restart) and won't
+ * receive our client notification of any monitors finalized below.
+ */
+ if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
+ pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
+ PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+
+ // Consider any active, pending, or recurring operations as failed
+
+ for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
+ lrmd_cmd_t *cmd = op->data;
+
+ /* This won't free a recurring op but instead restart its timer.
+ * If cmd is rsc->active, this will set rsc->active to NULL, so we
+ * don't have to worry about finalizing it a second time below.
+ */
+ stonith_action_complete(cmd,
+ CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+
+ if (rsc->active != NULL) {
+ rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
+ }
+ while (rsc->pending_ops != NULL) {
+ // This will free the op and remove it from rsc->pending_ops
+ stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
+ CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
+ "Lost connection to fencer");
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Execute a stonith resource "start" action
+ *
+ * Start a stonith resource by registering it with the fencer.
+ * (Stonith agents don't have a start command.)
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to start
+ * \param[in] cmd Start command to execute
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static int
+execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
+ const lrmd_cmd_t *cmd)
+{
+ char *key = NULL;
+ char *value = NULL;
+ stonith_key_value_t *device_params = NULL;
+ int rc = pcmk_ok;
+
+ // Convert command parameters to stonith API key/values
+ if (cmd->params) {
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, cmd->params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ device_params = stonith_key_value_add(device_params, key, value);
+ }
+ }
+
+ /* The fencer will automatically register devices via CIB notifications
+ * when the CIB changes, but to avoid a possible race condition between
+ * the fencer receiving the notification and the executor requesting that
+ * resource, the executor registers the device as well. The fencer knows how
+ * to handle duplicate registrations.
+ */
+ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
+ cmd->rsc_id, rsc->provider,
+ rsc->type, device_params);
+
+ stonith_key_value_freeall(device_params, 1, 1);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Execute a stonith resource "stop" action
+ *
+ * Stop a stonith resource by unregistering it with the fencer.
+ * (Stonith agents don't have a stop command.)
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to stop
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static inline int
+execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
+{
+ /* @TODO Failure would indicate a problem communicating with fencer;
+ * perhaps we should try reconnecting and retrying a few times?
+ */
+ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
+ rsc->rsc_id);
+}
+
+/*!
+ * \internal
+ * \brief Initiate a stonith resource agent recurring "monitor" action
+ *
+ * \param[in,out] stonith_api Connection to fencer
+ * \param[in,out] rsc Stonith resource to monitor
+ * \param[in] cmd Monitor command being executed
+ *
+ * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
+ */
+static inline int
+execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
+ cmd->timeout / 1000);
+
+ rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
+ "lrmd_stonith_callback",
+ lrmd_stonith_callback);
+ if (rc == TRUE) {
+ rsc->active = cmd;
+ rc = pcmk_ok;
+ } else {
+ rc = -pcmk_err_generic;
+ }
+ return rc;
+}
+
+static void
+execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ int rc = 0;
+ bool do_monitor = FALSE;
+
+ stonith_t *stonith_api = get_stonith_connection();
+
+ if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)
+ && (cmd->interval_ms == 0)) {
+ // Probes don't require a fencer connection
+ stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
+ rsc->fence_probe_result.execution_status,
+ rsc->fence_probe_result.exit_reason);
+ return;
+
+ } else if (stonith_api == NULL) {
+ stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_NOT_CONNECTED,
+ "No connection to fencer");
+ return;
+
+ } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+ rc = execd_stonith_start(stonith_api, rsc, cmd);
+ if (rc == pcmk_ok) {
+ do_monitor = TRUE;
+ }
+
+ } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+ rc = execd_stonith_stop(stonith_api, rsc);
+
+ } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+ do_monitor = TRUE;
+
+ } else {
+ stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
+ PCMK_EXEC_ERROR,
+ "Invalid fence device action (bug?)");
+ return;
+ }
+
+ if (do_monitor) {
+ rc = execd_stonith_monitor(stonith_api, rsc, cmd);
+ if (rc == pcmk_ok) {
+ // Don't clean up yet, we will find out result of the monitor later
+ return;
+ }
+ }
+
+ stonith_action_complete(cmd,
+ ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+ stonith__legacy2status(rc),
+ ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
+}
+
+static void
+execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ svc_action_t *action = NULL;
+ GHashTable *params_copy = NULL;
+
+ CRM_ASSERT(rsc);
+ CRM_ASSERT(cmd);
+
+ crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
+ rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
+
+#if SUPPORT_NAGIOS
+ /* Recurring operations are cancelled anyway for a stop operation */
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
+ && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+
+ cmd->result.exit_status = PCMK_OCF_OK;
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+#endif
+
+ params_copy = pcmk__str_table_dup(cmd->params);
+
+ action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
+ rsc->type,
+ normalize_action_name(rsc, cmd->action),
+ cmd->interval_ms, cmd->timeout,
+ params_copy, cmd->service_flags);
+
+ if (action == NULL) {
+ pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR, strerror(ENOMEM));
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+
+ if (action->rc != PCMK_OCF_UNKNOWN) {
+ pcmk__set_result(&(cmd->result), action->rc, action->status,
+ services__exit_reason(action));
+ services_action_free(action);
+ cmd_finalize(cmd, rsc);
+ return;
+ }
+
+ action->cb_data = cmd;
+
+ if (services_action_async(action, action_complete)) {
+ /* The services library has taken responsibility for the action. It
+ * could be pending, blocked, or merged into a duplicate recurring
+ * action, in which case the action callback (action_complete())
+ * will be called when the action completes, otherwise the callback has
+ * already been called.
+ *
+ * action_complete() calls cmd_finalize() which can free cmd, so cmd
+ * cannot be used here.
+ */
+ } else {
+ /* This is a recurring action that is not being cancelled and could not
+ * be initiated. It has been rescheduled, and the action callback
+ * (action_complete()) has been called, which in this case has already
+ * called cmd_finalize(), which in this case should only reset (not
+ * free) cmd.
+ */
+
+ pcmk__set_result(&(cmd->result), action->rc, action->status,
+ services__exit_reason(action));
+ services_action_free(action);
+ }
+}
+
+static gboolean
+execute_resource_action(gpointer user_data)
+{
+ lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
+ lrmd_cmd_t *cmd = NULL;
+
+ CRM_CHECK(rsc != NULL, return FALSE);
+
+ if (rsc->active) {
+ crm_trace("%s is still active", rsc->rsc_id);
+ return TRUE;
+ }
+
+ if (rsc->pending_ops) {
+ GList *first = rsc->pending_ops;
+
+ cmd = first->data;
+ if (cmd->delay_id) {
+ crm_trace
+ ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
+ cmd->rsc_id, cmd->action, cmd->start_delay);
+ return TRUE;
+ }
+ rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
+ g_list_free_1(first);
+
+#ifdef PCMK__TIME_USE_CGT
+ get_current_time(&(cmd->t_run), &(cmd->t_first_run));
+#endif
+ cmd->epoch_last_run = time(NULL);
+ }
+
+ if (!cmd) {
+ crm_trace("Nothing further to do for %s", rsc->rsc_id);
+ return TRUE;
+ }
+
+ rsc->active = cmd; /* only one op at a time for a rsc */
+ if (cmd->interval_ms) {
+ rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
+ }
+
+ log_execute(cmd);
+
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ execute_stonith_action(rsc, cmd);
+ } else {
+ execute_nonstonith_action(rsc, cmd);
+ }
+
+ return TRUE;
+}
+
+void
+free_rsc(gpointer data)
+{
+ GList *gIter = NULL;
+ lrmd_rsc_t *rsc = data;
+ int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei);
+
+ gIter = rsc->pending_ops;
+ while (gIter != NULL) {
+ GList *next = gIter->next;
+ lrmd_cmd_t *cmd = gIter->data;
+
+ /* command was never executed */
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ cmd_finalize(cmd, NULL);
+
+ gIter = next;
+ }
+ /* frees list, but not list elements. */
+ g_list_free(rsc->pending_ops);
+
+ gIter = rsc->recurring_ops;
+ while (gIter != NULL) {
+ GList *next = gIter->next;
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (is_stonith) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ /* If a stonith command is in-flight, just mark it as cancelled;
+ * it is not safe to finalize/free the cmd until the stonith api
+ * says it has either completed or timed out.
+ */
+ if (rsc->active != cmd) {
+ cmd_finalize(cmd, NULL);
+ }
+ } else {
+ /* This command is already handed off to service library,
+ * let service library cancel it and tell us via the callback
+ * when it is cancelled. The rsc can be safely destroyed
+ * even if we are waiting for the cancel result */
+ services_action_cancel(rsc->rsc_id,
+ normalize_action_name(rsc, cmd->action),
+ cmd->interval_ms);
+ }
+
+ gIter = next;
+ }
+ /* frees list, but not list elements. */
+ g_list_free(rsc->recurring_ops);
+
+ free(rsc->rsc_id);
+ free(rsc->class);
+ free(rsc->provider);
+ free(rsc->type);
+ mainloop_destroy_trigger(rsc->work);
+
+ free(rsc);
+}
+
+static int
+process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
+ xmlNode **reply)
+{
+ int rc = pcmk_ok;
+ time_t now = time(NULL);
+ const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
+
+ if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
+ crm_err("Cluster API version must be greater than or equal to %s, not %s",
+ LRMD_MIN_PROTOCOL_VERSION, protocol_version);
+ rc = -EPROTO;
+ }
+
+ if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) {
+#ifdef PCMK__COMPILE_REMOTE
+ if ((client->remote != NULL)
+ && pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+
+ // This is a remote connection from a cluster node's controller
+ ipc_proxy_add_provider(client);
+ } else {
+ rc = -EACCES;
+ }
+#else
+ rc = -EPROTONOSUPPORT;
+#endif
+ }
+
+ *reply = create_lrmd_reply(__func__, rc, call_id);
+ crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
+ crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
+ crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
+
+ return rc;
+}
+
+static int
+process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ int rc = pcmk_ok;
+ lrmd_rsc_t *rsc = build_rsc_from_xml(request);
+ lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
+
+ if (dup &&
+ pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
+ pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
+
+ crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
+ free_rsc(rsc);
+ return rc;
+ }
+
+ g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
+ crm_info("Cached agent information for '%s'", rsc->rsc_id);
+ return rc;
+}
+
+static xmlNode *
+process_lrmd_get_rsc_info(xmlNode *request, int call_id)
+{
+ int rc = pcmk_ok;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ xmlNode *reply = NULL;
+ lrmd_rsc_t *rsc = NULL;
+
+ if (rsc_id == NULL) {
+ rc = -ENODEV;
+ } else {
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Agent information for '%s' not in cache", rsc_id);
+ rc = -ENODEV;
+ }
+ }
+
+ reply = create_lrmd_reply(__func__, rc, call_id);
+ if (rsc) {
+ crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
+ crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
+ crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
+ crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
+ }
+ return reply;
+}
+
+static int
+process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
+ xmlNode *request)
+{
+ int rc = pcmk_ok;
+ lrmd_rsc_t *rsc = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+
+ if (!rsc_id) {
+ return -ENODEV;
+ }
+
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Ignoring unregistration of resource '%s', which is not registered",
+ rsc_id);
+ return pcmk_ok;
+ }
+
+ if (rsc->active) {
+ /* let the caller know there are still active ops on this rsc to watch for */
+ crm_trace("Operation (%p) still in progress for unregistered resource %s",
+ rsc->active, rsc_id);
+ rc = -EINPROGRESS;
+ }
+
+ g_hash_table_remove(rsc_list, rsc_id);
+
+ return rc;
+}
+
+static int
+process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ lrmd_rsc_t *rsc = NULL;
+ lrmd_cmd_t *cmd = NULL;
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ int call_id;
+
+ if (!rsc_id) {
+ return -EINVAL;
+ }
+ if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
+ crm_info("Resource '%s' not found (%d active resources)",
+ rsc_id, g_hash_table_size(rsc_list));
+ return -ENODEV;
+ }
+
+ cmd = create_lrmd_cmd(request, client);
+ call_id = cmd->call_id;
+
+ /* Don't reference cmd after handing it off to be scheduled.
+ * The cmd could get merged and freed. */
+ schedule_lrmd_cmd(rsc, cmd);
+
+ return call_id;
+}
+
+static int
+cancel_op(const char *rsc_id, const char *action, guint interval_ms)
+{
+ GList *gIter = NULL;
+ lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
+
+ /* How to cancel an action.
+ * 1. Check pending ops list, if it hasn't been handed off
+ * to the service library or stonith recurring list remove
+ * it there and that will stop it.
+ * 2. If it isn't in the pending ops list, then it's either a
+ * recurring op in the stonith recurring list, or the service
+ * library's recurring list. Stop it there
+ * 3. If not found in any lists, then this operation has either
+ * been executed already and is not a recurring operation, or
+ * never existed.
+ */
+ if (!rsc) {
+ return -ENODEV;
+ }
+
+ for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (action_matches(cmd, action, interval_ms)) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ cmd_finalize(cmd, rsc);
+ return pcmk_ok;
+ }
+ }
+
+ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+ /* The service library does not handle stonith operations.
+ * We have to handle recurring stonith operations ourselves. */
+ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
+ lrmd_cmd_t *cmd = gIter->data;
+
+ if (action_matches(cmd, action, interval_ms)) {
+ cmd->result.execution_status = PCMK_EXEC_CANCELLED;
+ if (rsc->active != cmd) {
+ cmd_finalize(cmd, rsc);
+ }
+ return pcmk_ok;
+ }
+ }
+ } else if (services_action_cancel(rsc_id,
+ normalize_action_name(rsc, action),
+ interval_ms) == TRUE) {
+ /* The service library will tell the action_complete callback function
+ * this action was cancelled, which will destroy the cmd and remove
+ * it from the recurring_op list. Do not do that in this function
+ * if the service library says it cancelled it. */
+ return pcmk_ok;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static void
+cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
+{
+ GList *cmd_list = NULL;
+ GList *cmd_iter = NULL;
+
+ /* Notice a copy of each list is created when concat is called.
+ * This prevents odd behavior from occurring when the cmd_list
+ * is iterated through later on. It is possible the cancel_op
+ * function may end up modifying the recurring_ops and pending_ops
+ * lists. If we did not copy those lists, our cmd_list iteration
+ * could get messed up.*/
+ if (rsc->recurring_ops) {
+ cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
+ }
+ if (rsc->pending_ops) {
+ cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
+ }
+ if (!cmd_list) {
+ return;
+ }
+
+ for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
+ lrmd_cmd_t *cmd = cmd_iter->data;
+
+ if (cmd->interval_ms == 0) {
+ continue;
+ }
+
+ if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
+ continue;
+ }
+
+ cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
+ }
+ /* frees only the copied list data, not the cmds */
+ g_list_free(cmd_list);
+}
+
+static int
+process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+ const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
+ guint interval_ms = 0;
+
+ crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
+
+ if (!rsc_id || !action) {
+ return -EINVAL;
+ }
+
+ return cancel_op(rsc_id, action, interval_ms);
+}
+
+static void
+add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
+{
+ xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
+
+ crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
+ for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
+ lrmd_cmd_t *cmd = item->data;
+ xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
+
+ crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
+ (cmd->real_action? cmd->real_action : cmd->action));
+ crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
+ crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
+ }
+}
+
+static xmlNode *
+process_lrmd_get_recurring(xmlNode *request, int call_id)
+{
+ int rc = pcmk_ok;
+ const char *rsc_id = NULL;
+ lrmd_rsc_t *rsc = NULL;
+ xmlNode *reply = NULL;
+ xmlNode *rsc_xml = NULL;
+
+ // Resource ID is optional
+ rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
+ if (rsc_xml) {
+ rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
+ }
+ if (rsc_xml) {
+ rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
+ }
+
+ // If resource ID is specified, resource must exist
+ if (rsc_id != NULL) {
+ rsc = g_hash_table_lookup(rsc_list, rsc_id);
+ if (rsc == NULL) {
+ crm_info("Resource '%s' not found (%d active resources)",
+ rsc_id, g_hash_table_size(rsc_list));
+ rc = -ENODEV;
+ }
+ }
+
+ reply = create_lrmd_reply(__func__, rc, call_id);
+
+ // If resource ID is not specified, check all resources
+ if (rsc_id == NULL) {
+ GHashTableIter iter;
+ char *key = NULL;
+
+ g_hash_table_iter_init(&iter, rsc_list);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
+ (gpointer *) &rsc)) {
+ add_recurring_op_xml(reply, rsc);
+ }
+ } else if (rsc) {
+ add_recurring_op_xml(reply, rsc);
+ }
+ return reply;
+}
+
+void
+process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
+{
+ int rc = pcmk_ok;
+ int call_id = 0;
+ const char *op = crm_element_value(request, F_LRMD_OPERATION);
+ int do_reply = 0;
+ int do_notify = 0;
+ xmlNode *reply = NULL;
+
+ /* Certain IPC commands may be done only by privileged users (i.e. root or
+ * hacluster), because they would otherwise provide a means of bypassing
+ * ACLs.
+ */
+ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
+
+ crm_trace("Processing %s operation from %s", op, client->id);
+ crm_element_value_int(request, F_LRMD_CALLID, &call_id);
+
+ if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
+#ifdef PCMK__COMPILE_REMOTE
+ if (allowed) {
+ ipc_proxy_forward_client(client, request);
+ } else {
+ rc = -EACCES;
+ }
+#else
+ rc = -EPROTONOSUPPORT;
+#endif
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
+ rc = process_lrmd_signon(client, request, call_id, &reply);
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_register(client, id, request);
+ do_notify = 1;
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
+ if (allowed) {
+ reply = process_lrmd_get_rsc_info(request, call_id);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_unregister(client, id, request);
+ /* don't notify anyone about failed un-registers */
+ if (rc == pcmk_ok || rc == -EINPROGRESS) {
+ do_notify = 1;
+ }
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_exec(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_rsc_cancel(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
+ do_notify = 1;
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
+ if (allowed) {
+ xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
+
+ CRM_LOG_ASSERT(data != NULL);
+ pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
+ } else {
+ rc = -EACCES;
+ }
+ } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
+ if (allowed) {
+ rc = process_lrmd_alert_exec(client, id, request);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
+ if (allowed) {
+ reply = process_lrmd_get_recurring(request, call_id);
+ } else {
+ rc = -EACCES;
+ }
+ do_reply = 1;
+ } else {
+ rc = -EOPNOTSUPP;
+ do_reply = 1;
+ crm_err("Unknown IPC request '%s' from client %s",
+ op, pcmk__client_name(client));
+ }
+
+ if (rc == -EACCES) {
+ crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
+ op, pcmk__client_name(client));
+ }
+
+ crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
+ op, client->id, rc, do_reply, do_notify);
+
+ if (do_reply) {
+ int send_rc = pcmk_rc_ok;
+
+ if (reply == NULL) {
+ reply = create_lrmd_reply(__func__, rc, call_id);
+ }
+ send_rc = lrmd_server_send_reply(client, id, reply);
+ free_xml(reply);
+ if (send_rc != pcmk_rc_ok) {
+ crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
+ pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
+ }
+ }
+
+ if (do_notify) {
+ send_generic_notify(rc, request);
+ }
+}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
new file mode 100644
index 0000000..83a8cd7
--- /dev/null
+++ b/daemons/execd/pacemaker-execd.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <signal.h>
+#include <sys/types.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/services.h>
+#include <crm/common/cmdline_internal.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/output_internal.h>
+#include <crm/common/remote_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include "pacemaker-execd.h"
+
+#ifdef PCMK__COMPILE_REMOTE
+# define EXECD_TYPE "remote"
+# define EXECD_NAME "pacemaker-remoted"
+# define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes"
+#else
+# define EXECD_TYPE "local"
+# define EXECD_NAME "pacemaker-execd"
+# define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes"
+#endif
+
+static GMainLoop *mainloop = NULL;
+static qb_ipcs_service_t *ipcs = NULL;
+static stonith_t *stonith_api = NULL;
+int lrmd_call_id = 0;
+time_t start_time;
+
+static struct {
+ gchar **log_files;
+#ifdef PCMK__COMPILE_REMOTE
+ gchar *port;
+#endif // PCMK__COMPILE_REMOTE
+} options;
+
+#ifdef PCMK__COMPILE_REMOTE
+/* whether shutdown request has been sent */
+static gboolean shutting_down = FALSE;
+
+/* timer for waiting for acknowledgment of shutdown request */
+static guint shutdown_ack_timer = 0;
+
+static gboolean lrmd_exit(gpointer data);
+#endif
+
+static void
+stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
+{
+ stonith_api->state = stonith_disconnected;
+ stonith_connection_failed();
+}
+
+stonith_t *
+get_stonith_connection(void)
+{
+ if (stonith_api && stonith_api->state == stonith_disconnected) {
+ stonith_api_delete(stonith_api);
+ stonith_api = NULL;
+ }
+
+ if (stonith_api == NULL) {
+ int rc = pcmk_ok;
+
+ stonith_api = stonith_api_new();
+ if (stonith_api == NULL) {
+ crm_err("Could not connect to fencer: API memory allocation failed");
+ return NULL;
+ }
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 10 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
+ stonith_api_delete(stonith_api);
+ stonith_api = NULL;
+ } else {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ stonith_connection_destroy_cb);
+ }
+ }
+ return stonith_api;
+}
+
+static int32_t
+lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ crm_trace("Connection %p", c);
+ if (pcmk__new_client(c, uid, gid) == NULL) {
+ return -EIO;
+ }
+ return 0;
+}
+
+static void
+lrmd_ipc_created(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *new_client = pcmk__find_client(c);
+
+ crm_trace("Connection %p", c);
+ CRM_ASSERT(new_client != NULL);
+ /* Now that the connection is offically established, alert
+ * the other clients a new connection exists. */
+
+ notify_of_new_client(new_client);
+}
+
+static int32_t
+lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+ xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags);
+
+ CRM_CHECK(client != NULL, crm_err("Invalid client");
+ return FALSE);
+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
+ return FALSE);
+
+ CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
+ return FALSE);
+
+ if (!request) {
+ return 0;
+ }
+
+ if (!client->name) {
+ const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
+
+ if (value == NULL) {
+ client->name = pcmk__itoa(pcmk__client_pid(c));
+ } else {
+ client->name = strdup(value);
+ }
+ }
+
+ lrmd_call_id++;
+ if (lrmd_call_id < 1) {
+ lrmd_call_id = 1;
+ }
+
+ crm_xml_add(request, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
+ crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
+
+ process_lrmd_message(client, id, request);
+
+ free_xml(request);
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Free a client connection, and exit if appropriate
+ *
+ * \param[in,out] client Client connection to free
+ */
+void
+lrmd_client_destroy(pcmk__client_t *client)
+{
+ pcmk__free_client(client);
+
+#ifdef PCMK__COMPILE_REMOTE
+ /* If we were waiting to shut down, we can now safely do so
+ * if there are no more proxied IPC providers
+ */
+ if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
+ lrmd_exit(NULL);
+ }
+#endif
+}
+
+static int32_t
+lrmd_ipc_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ crm_trace("Connection %p", c);
+ client_disconnect_cleanup(client->id);
+#ifdef PCMK__COMPILE_REMOTE
+ ipc_proxy_remove_provider(client);
+#endif
+ lrmd_client_destroy(client);
+ return 0;
+}
+
+static void
+lrmd_ipc_destroy(qb_ipcs_connection_t * c)
+{
+ lrmd_ipc_closed(c);
+ crm_trace("Connection %p", c);
+}
+
+static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
+ .connection_accept = lrmd_ipc_accept,
+ .connection_created = lrmd_ipc_created,
+ .msg_process = lrmd_ipc_dispatch,
+ .connection_closed = lrmd_ipc_closed,
+ .connection_destroyed = lrmd_ipc_destroy
+};
+
+// \return Standard Pacemaker return code
+int
+lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply)
+{
+ crm_trace("Sending reply (%d) to client (%s)", id, client->id);
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_ipc:
+ return pcmk__ipc_send_xml(client, id, reply, FALSE);
+#ifdef PCMK__COMPILE_REMOTE
+ case pcmk__client_tls:
+ return lrmd__remote_send_xml(client->remote, reply, id, "reply");
+#endif
+ default:
+ crm_err("Could not send reply: unknown type for client %s "
+ CRM_XS " flags=%#llx",
+ pcmk__client_name(client), client->flags);
+ }
+ return ENOTCONN;
+}
+
+// \return Standard Pacemaker return code
+int
+lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg)
+{
+ crm_trace("Sending notification to client (%s)", client->id);
+ switch (PCMK__CLIENT_TYPE(client)) {
+ case pcmk__client_ipc:
+ if (client->ipcs == NULL) {
+ crm_trace("Could not notify local client: disconnected");
+ return ENOTCONN;
+ }
+ return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event);
+#ifdef PCMK__COMPILE_REMOTE
+ case pcmk__client_tls:
+ if (client->remote == NULL) {
+ crm_trace("Could not notify remote client: disconnected");
+ return ENOTCONN;
+ } else {
+ return lrmd__remote_send_xml(client->remote, msg, 0, "notify");
+ }
+#endif
+ default:
+ crm_err("Could not notify client %s with unknown transport "
+ CRM_XS " flags=%#llx",
+ pcmk__client_name(client), client->flags);
+ }
+ return ENOTCONN;
+}
+
+/*!
+ * \internal
+ * \brief Clean up and exit immediately
+ *
+ * \param[in] data Ignored
+ *
+ * \return Doesn't return
+ * \note This can be used as a timer callback.
+ */
+static gboolean
+lrmd_exit(gpointer data)
+{
+ crm_info("Terminating with %d clients", pcmk__ipc_client_count());
+ if (stonith_api) {
+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
+ stonith_api->cmds->disconnect(stonith_api);
+ stonith_api_delete(stonith_api);
+ }
+ if (ipcs) {
+ mainloop_del_ipc_server(ipcs);
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ execd_stop_tls_server();
+ ipc_proxy_cleanup();
+#endif
+
+ pcmk__client_cleanup();
+ g_hash_table_destroy(rsc_list);
+
+ if (mainloop) {
+ lrmd_drain_alerts(mainloop);
+ }
+
+ crm_exit(CRM_EX_OK);
+ return FALSE;
+}
+
+/*!
+ * \internal
+ * \brief Request cluster shutdown if appropriate, otherwise exit immediately
+ *
+ * \param[in] nsig Signal that caused invocation (ignored)
+ */
+static void
+lrmd_shutdown(int nsig)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
+
+ /* If there are active proxied IPC providers, then we may be running
+ * resources, so notify the cluster that we wish to shut down.
+ */
+ if (ipc_proxy) {
+ if (shutting_down) {
+ crm_notice("Waiting for cluster to stop resources before exiting");
+ return;
+ }
+
+ crm_info("Sending shutdown request to cluster");
+ if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
+ crm_crit("Shutdown request failed, exiting immediately");
+
+ } else {
+ /* We requested a shutdown. Now, we need to wait for an
+ * acknowledgement from the proxy host (which ensures the proxy host
+ * supports shutdown requests), then wait for all proxy hosts to
+ * disconnect (which ensures that all resources have been stopped).
+ */
+ shutting_down = TRUE;
+
+ /* Stop accepting new proxy connections */
+ execd_stop_tls_server();
+
+ /* Older controller versions will never acknowledge our request, so
+ * set a fairly short timeout to exit quickly in that case. If we
+ * get the ack, we'll defuse this timer.
+ */
+ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);
+
+ /* Currently, we let the OS kill us if the clients don't disconnect
+ * in a reasonable time. We could instead set a long timer here
+ * (shorter than what the OS is likely to use) and exit immediately
+ * if it pops.
+ */
+ return;
+ }
+ }
+#endif
+ lrmd_exit(NULL);
+}
+
+/*!
+ * \internal
+ * \brief Defuse short exit timer if shutting down
+ */
+void
+handle_shutdown_ack(void)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ if (shutting_down) {
+ crm_info("Received shutdown ack");
+ if (shutdown_ack_timer > 0) {
+ g_source_remove(shutdown_ack_timer);
+ shutdown_ack_timer = 0;
+ }
+ return;
+ }
+#endif
+ crm_debug("Ignoring unexpected shutdown ack");
+}
+
+/*!
+ * \internal
+ * \brief Make short exit timer fire immediately
+ */
+void
+handle_shutdown_nack(void)
+{
+#ifdef PCMK__COMPILE_REMOTE
+ if (shutting_down) {
+ crm_info("Received shutdown nack");
+ if (shutdown_ack_timer > 0) {
+ g_source_remove(shutdown_ack_timer);
+ shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL);
+ }
+ return;
+ }
+#endif
+ crm_debug("Ignoring unexpected shutdown nack");
+}
+
+static GOptionEntry entries[] = {
+ { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
+ &options.log_files, "Send logs to the additional named logfile", NULL },
+
+#ifdef PCMK__COMPILE_REMOTE
+
+ { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port,
+ "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL },
+#endif // PCMK__COMPILE_REMOTE
+
+ { NULL }
+};
+
+static pcmk__supported_format_t formats[] = {
+ PCMK__SUPPORTED_FORMAT_NONE,
+ PCMK__SUPPORTED_FORMAT_TEXT,
+ PCMK__SUPPORTED_FORMAT_XML,
+ { NULL, NULL, NULL }
+};
+
+static GOptionContext *
+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
+{
+ GOptionContext *context = NULL;
+
+ context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
+ pcmk__add_main_args(context, entries);
+ return context;
+}
+
+int
+main(int argc, char **argv, char **envp)
+{
+ int rc = pcmk_rc_ok;
+ crm_exit_t exit_code = CRM_EX_OK;
+
+ const char *option = NULL;
+
+ pcmk__output_t *out = NULL;
+
+ GError *error = NULL;
+
+ GOptionGroup *output_group = NULL;
+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
+#ifdef PCMK__COMPILE_REMOTE
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "lp");
+#else
+ gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
+#endif // PCMK__COMPILE_REMOTE
+ GOptionContext *context = build_arg_context(args, &output_group);
+
+#ifdef PCMK__COMPILE_REMOTE
+ // If necessary, create PID 1 now before any file descriptors are opened
+ remoted_spawn_pidone(argc, argv, envp);
+#endif
+
+ crm_log_preinit(EXECD_NAME, argc, argv);
+
+ pcmk__register_formats(output_group, formats);
+ if (!g_option_context_parse_strv(context, &processed_args, &error)) {
+ exit_code = CRM_EX_USAGE;
+ goto done;
+ }
+
+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+ if (rc != pcmk_rc_ok) {
+ exit_code = CRM_EX_ERROR;
+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
+ "Error creating output format %s: %s",
+ args->output_ty, pcmk_rc_str(rc));
+ goto done;
+ }
+
+ if (args->version) {
+ out->version(out, false);
+ goto done;
+ }
+
+ // Open additional log files
+ if (options.log_files != NULL) {
+ for (gchar **fname = options.log_files; *fname != NULL; fname++) {
+ rc = pcmk__add_logfile(*fname);
+
+ if (rc != pcmk_rc_ok) {
+ out->err(out, "Logging to %s is disabled: %s",
+ *fname, pcmk_rc_str(rc));
+ }
+ }
+ }
+
+ pcmk__cli_init_logging(EXECD_NAME, args->verbosity);
+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+
+ option = pcmk__env_option(PCMK__ENV_LOGFACILITY);
+ if (!pcmk__str_eq(option, PCMK__VALUE_NONE,
+ pcmk__str_casei|pcmk__str_null_matches)
+ && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) {
+ setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */
+ }
+
+ option = pcmk__env_option(PCMK__ENV_LOGFILE);
+ if (!pcmk__str_eq(option, PCMK__VALUE_NONE,
+ pcmk__str_casei|pcmk__str_null_matches)) {
+ setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */
+
+ if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) {
+ setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */
+ }
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ if (options.port != NULL) {
+ setenv("PCMK_remote_port", options.port, 1);
+ }
+#endif // PCMK__COMPILE_REMOTE
+
+ start_time = time(NULL);
+
+ crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
+
+ /* The presence of this variable allegedly controls whether child
+ * processes like httpd will try and use Systemd's sd_notify
+ * API
+ */
+ unsetenv("NOTIFY_SOCKET");
+
+ {
+ // Temporary directory for resource agent use (leave owned by root)
+ int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755);
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not create resource agent temporary directory "
+ CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc));
+ }
+ }
+
+ rsc_list = pcmk__strkey_table(NULL, free_rsc);
+ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
+ if (ipcs == NULL) {
+ crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+
+#ifdef PCMK__COMPILE_REMOTE
+ if (lrmd_init_remote_tls_server() < 0) {
+ crm_err("Failed to create TLS listener: shutting down and staying down");
+ exit_code = CRM_EX_FATAL;
+ goto done;
+ }
+ ipc_proxy_init();
+#endif
+
+ mainloop_add_signal(SIGTERM, lrmd_shutdown);
+ mainloop = g_main_loop_new(NULL, FALSE);
+ crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
+ crm_notice("OCF resource agent search path is %s", OCF_RA_PATH);
+ g_main_loop_run(mainloop);
+
+ /* should never get here */
+ lrmd_exit(NULL);
+
+done:
+ g_strfreev(options.log_files);
+#ifdef PCMK__COMPILE_REMOTE
+ g_free(options.port);
+#endif // PCMK__COMPILE_REMOTE
+
+ g_strfreev(processed_args);
+ pcmk__free_arg_context(context);
+
+ pcmk__output_and_clear_error(&error, out);
+
+ if (out != NULL) {
+ out->finish(out, exit_code, true, NULL);
+ pcmk__output_free(out);
+ }
+ pcmk__unregister_formats();
+ crm_exit(exit_code);
+}
diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h
new file mode 100644
index 0000000..9c1d173
--- /dev/null
+++ b/daemons/execd/pacemaker-execd.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PACEMAKER_EXECD__H
+# define PACEMAKER_EXECD__H
+
+# include <glib.h>
+# include <crm/common/ipc_internal.h>
+# include <crm/lrmd.h>
+# include <crm/stonith-ng.h>
+
+# ifdef HAVE_GNUTLS_GNUTLS_H
+# include <gnutls/gnutls.h>
+# endif
+
+extern GHashTable *rsc_list;
+extern time_t start_time;
+
+typedef struct lrmd_rsc_s {
+ char *rsc_id;
+ char *class;
+ char *provider;
+ char *type;
+
+ int call_opts;
+
+ /* NEVER dereference this pointer,
+ * It simply exists as a switch to let us know
+ * when the currently active operation has completed */
+ void *active;
+
+ /* Operations in this list
+ * have not been executed yet. */
+ GList *pending_ops;
+ /* Operations in this list are recurring operations
+ * that have been handed off from the pending ops list. */
+ GList *recurring_ops;
+
+ /* If this resource is a fence device, probes are handled internally by the
+ * executor, and this value indicates the result that should currently be
+ * returned for probes. It should be one of:
+ * PCMK_EXEC_DONE (to indicate "running"),
+ * PCMK_EXEC_NO_FENCE_DEVICE ("not running"), or
+ * PCMK_EXEC_NOT_CONNECTED ("unknown because fencer connection was lost").
+ */
+ pcmk__action_result_t fence_probe_result;
+
+ crm_trigger_t *work;
+} lrmd_rsc_t;
+
+# ifdef HAVE_GNUTLS_GNUTLS_H
+// in remoted_tls.c
+int lrmd_init_remote_tls_server(void);
+void execd_stop_tls_server(void);
+# endif
+
+int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply);
+
+int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg);
+
+void notify_of_new_client(pcmk__client_t *new_client);
+
+void process_lrmd_message(pcmk__client_t *client, uint32_t id,
+ xmlNode *request);
+
+void free_rsc(gpointer data);
+
+void handle_shutdown_ack(void);
+
+void handle_shutdown_nack(void);
+
+void lrmd_client_destroy(pcmk__client_t *client);
+
+void client_disconnect_cleanup(const char *client_id);
+
+/*!
+ * \brief Don't worry about freeing this connection. It is
+ * taken care of after mainloop exits by the main() function.
+ */
+stonith_t *get_stonith_connection(void);
+
+/*!
+ * \brief This is a callback that tells the lrmd
+ * the current stonith connection has gone away. This allows
+ * us to timeout any pending stonith commands
+ */
+void stonith_connection_failed(void);
+
+#ifdef PCMK__COMPILE_REMOTE
+void ipc_proxy_init(void);
+void ipc_proxy_cleanup(void);
+void ipc_proxy_add_provider(pcmk__client_t *client);
+void ipc_proxy_remove_provider(pcmk__client_t *client);
+void ipc_proxy_forward_client(pcmk__client_t *client, xmlNode *xml);
+pcmk__client_t *ipc_proxy_get_provider(void);
+int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy);
+void remoted_spawn_pidone(int argc, char **argv, char **envp);
+#endif
+
+int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id,
+ xmlNode *request);
+void lrmd_drain_alerts(GMainLoop *mloop);
+
+#endif // PACEMAKER_EXECD__H
diff --git a/daemons/execd/pacemaker-remoted.8.inc b/daemons/execd/pacemaker-remoted.8.inc
new file mode 100644
index 0000000..bc86acc
--- /dev/null
+++ b/daemons/execd/pacemaker-remoted.8.inc
@@ -0,0 +1,5 @@
+[synopsis]
+pacemaker-remoted [options]
+
+/for Pacemaker Remote nodes/
+.SH OPTIONS
diff --git a/daemons/execd/pacemaker_remote.in b/daemons/execd/pacemaker_remote.in
new file mode 100644
index 0000000..2096c5f
--- /dev/null
+++ b/daemons/execd/pacemaker_remote.in
@@ -0,0 +1,176 @@
+#!@BASH_PATH@
+
+# Authors:
+# Andrew Beekhof <abeekhof@redhat.com>
+#
+# License: Revised BSD
+
+# chkconfig: - 99 01
+# description: Pacemaker Cluster Manager
+# processname: pacemaker-remoted
+#
+### BEGIN INIT INFO
+# Provides: pacemaker_remote
+# Required-Start: $network $remote_fs
+# Should-Start: $syslog
+# Required-Stop: $network $remote_fs
+# Default-Start:
+# Default-Stop:
+# Short-Description: Manage the executor for Pacemaker Remote nodes
+# Description: Manage the executor for Pacemaker Remote nodes
+### END INIT INFO
+
+desc="Pacemaker Remote Executor"
+prog="pacemaker-remoted"
+
+# set secure PATH
+PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@"
+
+checkrc() {
+ if [ $? = 0 ]; then
+ success
+ else
+ failure
+ fi
+}
+
+success()
+{
+ echo -ne "[ OK ]\r"
+}
+
+failure()
+{
+ echo -ne "[FAILED]\r"
+}
+
+status()
+{
+ pid=$(pidof $1 2>/dev/null)
+ local rtrn=$?
+ if [ $rtrn -ne 0 ]; then
+ echo "$1 is stopped"
+ if [ -f "@localstatedir@/run/$prog.pid" ]; then
+ rtrn=1
+ else
+ rtrn=3
+ fi
+ else
+ echo "$1 (pid $pid) is running..."
+ fi
+ return $rtrn
+}
+
+if [ -d @CONFIGDIR@ ]; then
+ [ -f @INITDIR@/functions ] && . @INITDIR@/functions
+set -a
+ [ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker
+ [ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd
+set +a
+fi
+
+LOCK_DIR="."
+if [ -d "@localstatedir@/lock/subsys" ]; then
+ LOCK_DIR="@localstatedir@/lock/subsys"
+elif [ -d "@localstatedir@/lock" ]; then
+ LOCK_DIR="@localstatedir@/lock"
+fi
+[ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker_remote"
+
+# Check if there is a valid watchdog-device configured in sbd config
+if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then
+ # enhance for unavailable chkconfig - don't touch sbd for now
+ if chkconfig --list sbd_remote_helper 2>/dev/null | grep -q ":on"; then
+ SBD_SERVICE=sbd_remote_helper
+ fi
+fi
+
+start()
+{
+ echo -n "Starting $desc: "
+
+ # most recent distributions use tmpfs for $@localstatedir@/run
+ # to avoid to clean it up on every boot.
+ # they also assume that init scripts will create
+ # required subdirectories for proper operations
+ mkdir -p "@localstatedir@/run"
+
+ if status $prog > /dev/null 2>&1; then
+ success
+ else
+ $prog > /dev/null 2>&1 &
+
+ # Time to connect to corosync and fail
+ sleep 5
+
+ if status $prog > /dev/null 2>&1; then
+ touch "$LOCK_FILE"
+ pidof $prog > "@localstatedir@/run/$prog.pid"
+ success
+ else
+ failure
+ rtrn=1
+ fi
+ fi
+ echo
+
+ [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE start
+}
+
+stop()
+{
+ if status $prog > /dev/null 2>&1; then
+ echo -n "Signaling $desc to terminate: "
+ kill -TERM $(pidof $prog) > /dev/null 2>&1
+ success
+ echo
+
+ echo -n "Waiting for $desc to unload:"
+ while status $prog > /dev/null 2>&1; do
+ sleep 1
+ echo -n "."
+ done
+ else
+ echo -n "$desc is already stopped"
+ fi
+
+ rm -f "$LOCK_FILE"
+ rm -f "@localstatedir@/run/$prog.pid"
+ success
+ echo
+
+ [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE stop
+}
+
+rtrn=0
+
+case "$1" in
+start)
+ start
+;;
+restart|reload|force-reload)
+ stop
+ start
+;;
+condrestart|try-restart)
+ if status $prog > /dev/null 2>&1; then
+ stop
+ start
+ rtrn=$?
+ fi
+;;
+status)
+ status $prog
+ rtrn=$?
+;;
+stop)
+ stop
+ rtrn=$?
+;;
+*)
+ echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}"
+ rtrn=2
+;;
+esac
+
+exit $rtrn
diff --git a/daemons/execd/pacemaker_remote.service.in b/daemons/execd/pacemaker_remote.service.in
new file mode 100644
index 0000000..1e48d14
--- /dev/null
+++ b/daemons/execd/pacemaker_remote.service.in
@@ -0,0 +1,52 @@
+[Unit]
+Description=Pacemaker Remote executor daemon
+Documentation=man:pacemaker-remoted
+Documentation=https://clusterlabs.org/pacemaker/doc/
+
+# See main pacemaker unit file for descriptions of why these are needed
+After=network.target
+After=time-sync.target
+After=dbus.service
+Wants=dbus.service
+After=resource-agents-deps.target
+Wants=resource-agents-deps.target
+After=syslog.service
+After=rsyslog.service
+
+[Install]
+Alias=pacemaker-remote.service
+WantedBy=multi-user.target
+
+[Service]
+Type=simple
+KillMode=process
+NotifyAccess=none
+EnvironmentFile=-@CONFIGDIR@/pacemaker
+EnvironmentFile=-@CONFIGDIR@/sbd
+
+# Not actually success, but fatal failure -- this ensures no respawn
+SuccessExitStatus=100
+
+ExecStart=@sbindir@/pacemaker-remoted
+
+# Systemd v227 and above can limit the number of processes spawned by a
+# service. That is a bad idea for an HA cluster resource manager, so disable it
+# by default. The administrator can create a local override if they really want
+# a limit. If your systemd version does not support TasksMax, and you want to
+# get rid of the resulting log warnings, comment out this option.
+TasksMax=infinity
+
+# If connected to the cluster and when the service functions properly, it will
+# wait to exit until the cluster notifies it all resources on the remote node
+# have been stopped. The default of 30min should cover most typical cluster
+# configurations, but it may need an increase to adapt to local conditions
+# (e.g. a large, clustered database could conceivably take longer to stop).
+TimeoutStopSec=30min
+TimeoutStartSec=30s
+
+# Restart options include: no, on-success, on-failure, on-abort or always
+Restart=on-failure
+
+# crm_perror() writes directly to stderr, so ignore it here
+# to avoid double-logging with the wrong format
+StandardError=null
diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c
new file mode 100644
index 0000000..4f914eb
--- /dev/null
+++ b/daemons/execd/remoted_pidone.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2017-2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <crm/crm.h>
+#include "pacemaker-execd.h"
+
+static pid_t main_pid = 0;
+
+static void
+sigdone(void)
+{
+ exit(CRM_EX_OK);
+}
+
+static void
+sigreap(void)
+{
+ pid_t pid = 0;
+ int status;
+
+ do {
+ /*
+ * Opinions seem to differ as to what to put here:
+ * -1, any child process
+ * 0, any child process whose process group ID is equal to that of the calling process
+ */
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == main_pid) {
+ /* Exit when pacemaker-remote exits and use the same return code */
+ if (WIFEXITED(status)) {
+ exit(WEXITSTATUS(status));
+ }
+ exit(CRM_EX_ERROR);
+ }
+ } while (pid > 0);
+}
+
+static struct {
+ int sig;
+ void (*handler)(void);
+} sigmap[] = {
+ { SIGCHLD, sigreap },
+ { SIGINT, sigdone },
+};
+
+/*!
+ * \internal
+ * \brief Check a line of text for a valid environment variable name
+ *
+ * \param[in] line Text to check
+ * \param[out] first First character of valid name if found, NULL otherwise
+ * \param[out] last Last character of valid name if found, NULL otherwise
+ *
+ * \return TRUE if valid name found, FALSE otherwise
+ * \note It's reasonable to impose limitations on environment variable names
+ * beyond what C or setenv() does: We only allow names that contain only
+ * [a-zA-Z0-9_] characters and do not start with a digit.
+ */
+static bool
+find_env_var_name(char *line, char **first, char **last)
+{
+ // Skip leading whitespace
+ *first = line;
+ while (isspace(**first)) {
+ ++*first;
+ }
+
+ if (isalpha(**first) || (**first == '_')) { // Valid first character
+ *last = *first;
+ while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) {
+ ++*last;
+ }
+ return TRUE;
+ }
+
+ *first = *last = NULL;
+ return FALSE;
+}
+
+static void
+load_env_vars(const char *filename)
+{
+ /* We haven't forked or initialized logging yet, so don't leave any file
+ * descriptors open, and don't log -- silently ignore errors.
+ */
+ FILE *fp = fopen(filename, "r");
+
+ if (fp != NULL) {
+ char line[LINE_MAX] = { '\0', };
+
+ while (fgets(line, LINE_MAX, fp) != NULL) {
+ char *name = NULL;
+ char *end = NULL;
+ char *value = NULL;
+ char *quote = NULL;
+
+ // Look for valid name immediately followed by equals sign
+ if (find_env_var_name(line, &name, &end) && (*++end == '=')) {
+
+ // Null-terminate name, and advance beyond equals sign
+ *end++ = '\0';
+
+ // Check whether value is quoted
+ if ((*end == '\'') || (*end == '"')) {
+ quote = end++;
+ }
+ value = end;
+
+ if (quote) {
+ /* Value is remaining characters up to next non-backslashed
+ * matching quote character.
+ */
+ while (((*end != *quote) || (*(end - 1) == '\\'))
+ && (*end != '\0')) {
+ end++;
+ }
+ if (*end == *quote) {
+ // Null-terminate value, and advance beyond close quote
+ *end++ = '\0';
+ } else {
+ // Matching closing quote wasn't found
+ value = NULL;
+ }
+
+ } else {
+ /* Value is remaining characters up to next non-backslashed
+ * whitespace.
+ */
+ while ((!isspace(*end) || (*(end - 1) == '\\'))
+ && (*end != '\0')) {
+ ++end;
+ }
+
+ if (end == (line + LINE_MAX - 1)) {
+ // Line was too long
+ value = NULL;
+ }
+ // Do NOT null-terminate value (yet)
+ }
+
+ /* We have a valid name and value, and end is now the character
+ * after the closing quote or the first whitespace after the
+ * unquoted value. Make sure the rest of the line is just
+ * whitespace or a comment.
+ */
+ if (value) {
+ char *value_end = end;
+
+ while (isspace(*end) && (*end != '\n')) {
+ ++end;
+ }
+ if ((*end == '\n') || (*end == '#')) {
+ if (quote == NULL) {
+ // Now we can null-terminate an unquoted value
+ *value_end = '\0';
+ }
+
+ // Don't overwrite (bundle options take precedence)
+ setenv(name, value, 0);
+
+ } else {
+ value = NULL;
+ }
+ }
+ }
+
+ if ((value == NULL) && (strchr(line, '\n') == NULL)) {
+ // Eat remainder of line beyond LINE_MAX
+ if (fscanf(fp, "%*[^\n]\n") == EOF) {
+ value = NULL; // Don't care, make compiler happy
+ }
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void
+remoted_spawn_pidone(int argc, char **argv, char **envp)
+{
+ sigset_t set;
+
+ /* This environment variable exists for two purposes:
+ * - For testing, setting it to "full" enables full PID 1 behavior even
+ * when PID is not 1
+ * - Setting to "vars" enables just the loading of environment variables
+ * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or
+ * containers with a custom PID 1 script that launches pacemaker-remoted.
+ */
+ const char *pid1 = (getpid() == 1)? "full" : getenv("PCMK_remote_pid1");
+
+ if (pid1 == NULL) {
+ return;
+ }
+
+ /* When a container is launched, it may be given specific environment
+ * variables, which for Pacemaker bundles are given in the bundle
+ * configuration. However, that does not allow for host-specific values.
+ * To allow for that, look for a special file containing a shell-like syntax
+ * of name/value pairs, and export those into the environment.
+ */
+ load_env_vars("/etc/pacemaker/pcmk-init.env");
+
+ if (strcmp(pid1, "full")) {
+ return;
+ }
+
+ /* Containers can be expected to have /var/log, but they may not have
+ * /var/log/pacemaker, so use a different default if no value has been
+ * explicitly configured in the container's environment.
+ */
+ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) {
+ pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log");
+ }
+
+ sigfillset(&set);
+ sigprocmask(SIG_BLOCK, &set, 0);
+
+ main_pid = fork();
+ switch (main_pid) {
+ case 0:
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ setsid();
+ setpgid(0, 0);
+
+ // Child remains as pacemaker-remoted
+ return;
+ case -1:
+ perror("fork");
+ }
+
+ /* Parent becomes the reaper of zombie processes */
+ /* Safe to initialize logging now if needed */
+
+# ifdef HAVE_PROGNAME
+ /* Differentiate ourselves in the 'ps' output */
+ {
+ char *p;
+ int i, maxlen;
+ char *LastArgv = NULL;
+ const char *name = "pcmk-init";
+
+ for (i = 0; i < argc; i++) {
+ if (!i || (LastArgv + 1 == argv[i]))
+ LastArgv = argv[i] + strlen(argv[i]);
+ }
+
+ for (i = 0; envp[i] != NULL; i++) {
+ if ((LastArgv + 1) == envp[i]) {
+ LastArgv = envp[i] + strlen(envp[i]);
+ }
+ }
+
+ maxlen = (LastArgv - argv[0]) - 2;
+
+ i = strlen(name);
+
+ /* We can overwrite individual argv[] arguments */
+ snprintf(argv[0], maxlen, "%s", name);
+
+ /* Now zero out everything else */
+ p = &argv[0][i];
+ while (p < LastArgv) {
+ *p++ = '\0';
+ }
+ argv[1] = NULL;
+ }
+# endif // HAVE_PROGNAME
+
+ while (1) {
+ int sig;
+ size_t i;
+
+ sigwait(&set, &sig);
+ for (i = 0; i < PCMK__NELEM(sigmap); i++) {
+ if (sigmap[i].sig == sig) {
+ sigmap[i].handler();
+ break;
+ }
+ }
+ }
+}
diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c
new file mode 100644
index 0000000..62c8c3a
--- /dev/null
+++ b/daemons/execd/remoted_proxy.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2012-2022 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include "pacemaker-execd.h"
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/services.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/cib/internal.h>
+#include <crm/fencing/internal.h>
+
+static qb_ipcs_service_t *cib_ro = NULL;
+static qb_ipcs_service_t *cib_rw = NULL;
+static qb_ipcs_service_t *cib_shm = NULL;
+
+static qb_ipcs_service_t *attrd_ipcs = NULL;
+static qb_ipcs_service_t *crmd_ipcs = NULL;
+static qb_ipcs_service_t *stonith_ipcs = NULL;
+static qb_ipcs_service_t *pacemakerd_ipcs = NULL;
+
+// An IPC provider is a cluster node controller connecting as a client
+static GList *ipc_providers = NULL;
+/* ipc clients == things like cibadmin, crm_resource, connecting locally */
+static GHashTable *ipc_clients = NULL;
+
+/*!
+ * \internal
+ * \brief Get an IPC proxy provider
+ *
+ * \return Pointer to a provider if one exists, NULL otherwise
+ *
+ * \note Grab the first provider, which is the most recent connection. That way,
+ * if we haven't yet timed out an old, failed connection, we don't try to
+ * use it.
+ */
+pcmk__client_t *
+ipc_proxy_get_provider(void)
+{
+ return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL;
+}
+
+/*!
+ * \internal
+ * \brief Accept a client connection on a proxy IPC server
+ *
+ * \param[in] c Client's IPC connection
+ * \param[in] uid Client's user ID
+ * \param[in] gid Client's group ID
+ * \param[in] ipc_channel Name of IPC server to proxy
+ *
+ * \return pcmk_ok on success, -errno on error
+ */
+static int32_t
+ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel)
+{
+ pcmk__client_t *client;
+ pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
+ xmlNode *msg;
+
+ if (ipc_proxy == NULL) {
+ crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s "
+ "because not connected to cluster", uid, gid, ipc_channel);
+ return -EREMOTEIO;
+ }
+
+ /* This new client is a local IPC client on a Pacemaker Remote controlled
+ * node, needing to access cluster node IPC services.
+ */
+ client = pcmk__new_client(c, uid, gid);
+ if (client == NULL) {
+ return -EREMOTEIO;
+ }
+
+ /* This ipc client is bound to a single ipc provider. If the
+ * provider goes away, this client is disconnected */
+ client->userdata = strdup(ipc_proxy->id);
+ client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id);
+
+ /* Allow remote executor to distinguish between proxied local clients and
+ * actual executor API clients
+ */
+ pcmk__set_client_flags(client, pcmk__client_to_proxy);
+
+ g_hash_table_insert(ipc_clients, client->id, client);
+
+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW);
+ crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ crm_debug("Accepted IPC proxy connection (session ID %s) "
+ "from uid %d gid %d on channel %s",
+ client->id, uid, gid, ipc_channel);
+ return 0;
+}
+
+static int32_t
+crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD);
+}
+
+static int32_t
+attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, T_ATTRD);
+}
+
+static int32_t
+stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, "stonith-ng");
+}
+
+static int32_t
+pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return -EREMOTEIO;
+}
+
+static int32_t
+cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW);
+}
+
+static int32_t
+cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
+{
+ return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO);
+}
+
+void
+ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml)
+{
+ const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION);
+ const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP);
+ xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG);
+ pcmk__client_t *ipc_client;
+ int rc = pcmk_rc_ok;
+
+ /* If the IPC provider is acknowledging our shutdown request,
+ * defuse the short exit timer to give the cluster time to
+ * stop any resources we're running.
+ */
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK, pcmk__str_casei)) {
+ handle_shutdown_ack();
+ return;
+ }
+
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK, pcmk__str_casei)) {
+ handle_shutdown_nack();
+ return;
+ }
+
+ ipc_client = pcmk__find_client_by_id(session);
+ if (ipc_client == NULL) {
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, session);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ return;
+ }
+
+ /* This is an event or response from the ipc provider
+ * going to the local ipc client.
+ *
+ * Looking at the chain of events.
+ *
+ * -----remote node----------------|---- cluster node ------
+ * ipc_client <--1--> this code
+ * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event()
+ * <--3--> ipc server
+ *
+ * This function is receiving a msg from connection 2
+ * and forwarding it to connection 1.
+ */
+
+ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_EVENT, pcmk__str_casei)) {
+ crm_trace("Sending event to %s", ipc_client->id);
+ rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event);
+
+ } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_RESPONSE, pcmk__str_casei)) {
+ int msg_id = 0;
+
+ crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id);
+ crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id);
+ rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE);
+
+ CRM_LOG_ASSERT(msg_id == ipc_client->request_id);
+ ipc_client->request_id = 0;
+
+ } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) {
+ qb_ipcs_disconnect(ipc_client->ipcs);
+
+ } else {
+ crm_err("Unknown ipc proxy msg type %s" , msg_type);
+ }
+
+ if (rc != pcmk_rc_ok) {
+ crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d",
+ ipc_client->id, pcmk_rc_str(rc), rc);
+ }
+}
+
+static int32_t
+ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
+{
+ uint32_t id = 0;
+ uint32_t flags = 0;
+ pcmk__client_t *client = pcmk__find_client(c);
+ pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata);
+ xmlNode *request = NULL;
+ xmlNode *msg = NULL;
+
+ if (!ipc_proxy) {
+ qb_ipcs_disconnect(client->ipcs);
+ return 0;
+ }
+
+ /* This is a request from the local ipc client going
+ * to the ipc provider.
+ *
+ * Looking at the chain of events.
+ *
+ * -----remote node----------------|---- cluster node ------
+ * ipc_client <--1--> this code
+ * <--2--> pacemaker-controld:remote_proxy_dispatch_internal()
+ * <--3--> ipc server
+ *
+ * This function is receiving a request from connection
+ * 1 and forwarding it to connection 2.
+ */
+ request = pcmk__client_data2xml(client, data, &id, &flags);
+
+ if (!request) {
+ return 0;
+ }
+
+ CRM_CHECK(client != NULL, crm_err("Invalid client");
+ free_xml(request); return FALSE);
+ CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
+ free_xml(request); return FALSE);
+
+ /* This ensures that synced request/responses happen over the event channel
+ * in the controller, allowing the controller to process the messages async.
+ */
+ pcmk__set_ipc_flags(flags, pcmk__client_name(client), crm_ipc_proxied);
+ client->request_id = id;
+
+ msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client));
+ crm_xml_add(msg, F_LRMD_IPC_USER, client->user);
+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id);
+ crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags);
+ add_message_xml(msg, F_LRMD_IPC_MSG, request);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(request);
+ free_xml(msg);
+
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Notify a proxy provider that we wish to shut down
+ *
+ * \param[in,out] ipc_proxy IPC client connection to proxy provider
+ *
+ * \return 0 on success, -1 on error
+ */
+int
+ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy)
+{
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ int rc;
+
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ);
+
+ /* We don't really have a session, but the controller needs this attribute
+ * to recognize this as proxy communication.
+ */
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, "0");
+
+ rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0;
+ free_xml(msg);
+ return rc;
+}
+
+static int32_t
+ipc_proxy_closed(qb_ipcs_connection_t * c)
+{
+ pcmk__client_t *client = pcmk__find_client(c);
+ pcmk__client_t *ipc_proxy;
+
+ if (client == NULL) {
+ return 0;
+ }
+
+ ipc_proxy = pcmk__find_client_by_id(client->userdata);
+
+ crm_trace("Connection %p", c);
+
+ if (ipc_proxy) {
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
+ crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
+ lrmd_server_send_notify(ipc_proxy, msg);
+ free_xml(msg);
+ }
+
+ g_hash_table_remove(ipc_clients, client->id);
+
+ free(client->userdata);
+ client->userdata = NULL;
+ pcmk__free_client(client);
+ return 0;
+}
+
+static void
+ipc_proxy_destroy(qb_ipcs_connection_t * c)
+{
+ crm_trace("Connection %p", c);
+ ipc_proxy_closed(c);
+}
+
+static struct qb_ipcs_service_handlers crmd_proxy_callbacks = {
+ .connection_accept = crmd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers attrd_proxy_callbacks = {
+ .connection_accept = attrd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers stonith_proxy_callbacks = {
+ .connection_accept = stonith_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = {
+ .connection_accept = pacemakerd_proxy_accept,
+ .connection_created = NULL,
+ .msg_process = NULL,
+ .connection_closed = NULL,
+ .connection_destroyed = NULL
+};
+
+static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = {
+ .connection_accept = cib_proxy_accept_ro,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = {
+ .connection_accept = cib_proxy_accept_rw,
+ .connection_created = NULL,
+ .msg_process = ipc_proxy_dispatch,
+ .connection_closed = ipc_proxy_closed,
+ .connection_destroyed = ipc_proxy_destroy
+};
+
+void
+ipc_proxy_add_provider(pcmk__client_t *ipc_proxy)
+{
+ // Prepending ensures the most recent connection is always first
+ ipc_providers = g_list_prepend(ipc_providers, ipc_proxy);
+}
+
+void
+ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy)
+{
+ GHashTableIter iter;
+ pcmk__client_t *ipc_client = NULL;
+ char *key = NULL;
+ GList *remove_these = NULL;
+ GList *gIter = NULL;
+
+ ipc_providers = g_list_remove(ipc_providers, ipc_proxy);
+
+ g_hash_table_iter_init(&iter, ipc_clients);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) {
+ const char *proxy_id = ipc_client->userdata;
+ if (pcmk__str_eq(proxy_id, ipc_proxy->id, pcmk__str_casei)) {
+ crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
+ ipc_client->id, ipc_client->pid);
+ /* we can't remove during the iteration, so copy items
+ * to a list we can destroy later */
+ remove_these = g_list_append(remove_these, ipc_client);
+ }
+ }
+
+ for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
+ ipc_client = gIter->data;
+
+ // Disconnection callback will free the client here
+ qb_ipcs_disconnect(ipc_client->ipcs);
+ }
+
+ /* just frees the list, not the elements in the list */
+ g_list_free(remove_these);
+}
+
+void
+ipc_proxy_init(void)
+{
+ ipc_clients = pcmk__strkey_table(NULL, NULL);
+
+ pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro,
+ &cib_proxy_callbacks_rw);
+ pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks);
+ pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks);
+ pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks);
+ crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks);
+ if (crmd_ipcs == NULL) {
+ crm_err("Failed to create controller: exiting and inhibiting respawn");
+ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled");
+ crm_exit(CRM_EX_FATAL);
+ }
+}
+
+void
+ipc_proxy_cleanup(void)
+{
+ if (ipc_providers) {
+ g_list_free(ipc_providers);
+ ipc_providers = NULL;
+ }
+ if (ipc_clients) {
+ g_hash_table_destroy(ipc_clients);
+ ipc_clients = NULL;
+ }
+ pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm);
+ qb_ipcs_destroy(attrd_ipcs);
+ qb_ipcs_destroy(stonith_ipcs);
+ qb_ipcs_destroy(pacemakerd_ipcs);
+ qb_ipcs_destroy(crmd_ipcs);
+ cib_ro = NULL;
+ cib_rw = NULL;
+ cib_shm = NULL;
+}
diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c
new file mode 100644
index 0000000..c65e3f3
--- /dev/null
+++ b/daemons/execd/remoted_tls.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2012-2023 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <glib.h>
+#include <unistd.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/mainloop.h>
+#include <crm/common/remote_internal.h>
+#include <crm/lrmd_internal.h>
+
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+
+#include "pacemaker-execd.h"
+
+#ifdef HAVE_GNUTLS_GNUTLS_H
+
+# include <gnutls/gnutls.h>
+
+# define LRMD_REMOTE_AUTH_TIMEOUT 10000
+gnutls_psk_server_credentials_t psk_cred_s;
+gnutls_dh_params_t dh_params;
+static int ssock = -1;
+extern int lrmd_call_id;
+
+static void
+debug_log(int level, const char *str)
+{
+ fputs(str, stderr);
+}
+
+/*!
+ * \internal
+ * \brief Read (more) TLS handshake data from client
+ *
+ * \param[in,out] client IPC client doing handshake
+ *
+ * \return 0 on success or more data needed, -1 on error
+ */
+static int
+remoted__read_handshake_data(pcmk__client_t *client)
+{
+ int rc = pcmk__read_handshake_data(client);
+
+ if (rc == EAGAIN) {
+ /* No more data is available at the moment. Just return for now;
+ * we'll get invoked again once the client sends more.
+ */
+ return 0;
+ } else if (rc != pcmk_rc_ok) {
+ return -1;
+ }
+
+ if (client->remote->auth_timeout) {
+ g_source_remove(client->remote->auth_timeout);
+ }
+ client->remote->auth_timeout = 0;
+
+ pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete);
+ crm_notice("Remote client connection accepted");
+
+ /* Only a client with access to the TLS key can connect, so we can treat
+ * it as privileged.
+ */
+ pcmk__set_client_flags(client, pcmk__client_privileged);
+
+ // Alert other clients of the new connection
+ notify_of_new_client(client);
+ return 0;
+}
+
+static int
+lrmd_remote_client_msg(gpointer data)
+{
+ int id = 0;
+ int rc;
+ xmlNode *request = NULL;
+ pcmk__client_t *client = data;
+
+ if (!pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+ return remoted__read_handshake_data(client);
+ }
+
+ switch (pcmk__remote_ready(client->remote, 0)) {
+ case pcmk_rc_ok:
+ break;
+ case ETIME: // No message available to read
+ return 0;
+ default: // Error
+ crm_info("Remote client disconnected while polling it");
+ return -1;
+ }
+
+ rc = pcmk__read_remote_message(client->remote, -1);
+
+ request = pcmk__remote_message_xml(client->remote);
+ while (request) {
+ crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id);
+ crm_trace("Processing remote client request %d", id);
+ if (!client->name) {
+ const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
+
+ if (value) {
+ client->name = strdup(value);
+ }
+ }
+
+ lrmd_call_id++;
+ if (lrmd_call_id < 1) {
+ lrmd_call_id = 1;
+ }
+
+ crm_xml_add(request, F_LRMD_CLIENTID, client->id);
+ crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
+ crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
+
+ process_lrmd_message(client, id, request);
+ free_xml(request);
+
+ /* process all the messages in the current buffer */
+ request = pcmk__remote_message_xml(client->remote);
+ }
+
+ if (rc == ENOTCONN) {
+ crm_info("Remote client disconnected while reading from it");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+lrmd_remote_client_destroy(gpointer user_data)
+{
+ pcmk__client_t *client = user_data;
+
+ if (client == NULL) {
+ return;
+ }
+
+ crm_notice("Cleaning up after remote client %s disconnected",
+ pcmk__client_name(client));
+
+ ipc_proxy_remove_provider(client);
+
+ /* if this is the last remote connection, stop recurring
+ * operations */
+ if (pcmk__ipc_client_count() == 1) {
+ client_disconnect_cleanup(NULL);
+ }
+
+ if (client->remote->tls_session) {
+ void *sock_ptr;
+ int csock;
+
+ sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session);
+ csock = GPOINTER_TO_INT(sock_ptr);
+
+ gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR);
+ gnutls_deinit(*client->remote->tls_session);
+ gnutls_free(client->remote->tls_session);
+ close(csock);
+ }
+
+ lrmd_client_destroy(client);
+ return;
+}
+
+static gboolean
+lrmd_auth_timeout_cb(gpointer data)
+{
+ pcmk__client_t *client = data;
+
+ client->remote->auth_timeout = 0;
+
+ if (pcmk_is_set(client->flags,
+ pcmk__client_tls_handshake_complete)) {
+ return FALSE;
+ }
+
+ mainloop_del_fd(client->remote->source);
+ client->remote->source = NULL;
+ crm_err("Remote client authentication timed out");
+
+ return FALSE;
+}
+
+// Dispatch callback for remote server socket
+static int
+lrmd_remote_listen(gpointer data)
+{
+ int csock = -1;
+ gnutls_session_t *session = NULL;
+ pcmk__client_t *new_client = NULL;
+
+ // For client socket
+ static struct mainloop_fd_callbacks lrmd_remote_fd_cb = {
+ .dispatch = lrmd_remote_client_msg,
+ .destroy = lrmd_remote_client_destroy,
+ };
+
+ CRM_CHECK(ssock >= 0, return TRUE);
+
+ if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) {
+ return TRUE;
+ }
+
+ session = pcmk__new_tls_session(csock, GNUTLS_SERVER, GNUTLS_CRD_PSK,
+ psk_cred_s);
+ if (session == NULL) {
+ close(csock);
+ return TRUE;
+ }
+
+ new_client = pcmk__new_unauth_client(NULL);
+ new_client->remote = calloc(1, sizeof(pcmk__remote_t));
+ pcmk__set_client_flags(new_client, pcmk__client_tls);
+ new_client->remote->tls_session = session;
+
+ // Require the client to authenticate within this time
+ new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT,
+ lrmd_auth_timeout_cb,
+ new_client);
+ crm_info("Remote client pending authentication "
+ CRM_XS " %p id: %s", new_client, new_client->id);
+
+ new_client->remote->source =
+ mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock,
+ new_client, &lrmd_remote_fd_cb);
+ return TRUE;
+}
+
+static void
+tls_server_dropped(gpointer user_data)
+{
+ crm_notice("TLS server session ended");
+ return;
+}
+
+// \return 0 on success, -1 on error (gnutls_psk_server_credentials_function)
+static int
+lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key)
+{
+ return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1;
+}
+
+static int
+bind_and_listen(struct addrinfo *addr)
+{
+ int optval;
+ int fd;
+ int rc;
+ char buffer[INET6_ADDRSTRLEN] = { 0, };
+
+ pcmk__sockaddr2str(addr->ai_addr, buffer);
+ crm_trace("Attempting to bind to address %s", buffer);
+
+ fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+ if (fd < 0) {
+ crm_perror(LOG_ERR, "Listener socket creation failed");
+ return -1;
+ }
+
+ /* reuse address */
+ optval = 1;
+ rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
+ if (rc < 0) {
+ crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer);
+ close(fd);
+ return -1;
+ }
+
+ if (addr->ai_family == AF_INET6) {
+ optval = 0;
+ rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval));
+ if (rc < 0) {
+ crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer);
+ close(fd);
+ return -1;
+ }
+ }
+
+ if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) {
+ crm_perror(LOG_ERR, "Cannot bind to %s", buffer);
+ close(fd);
+ return -1;
+ }
+
+ if (listen(fd, 10) == -1) {
+ crm_perror(LOG_ERR, "Cannot listen on %s", buffer);
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+
+static int
+get_address_info(const char *bind_name, int port, struct addrinfo **res)
+{
+ int rc;
+ char port_str[6]; // at most "65535"
+ struct addrinfo hints;
+
+ memset(&hints, 0, sizeof(struct addrinfo));
+ hints.ai_flags = AI_PASSIVE;
+ hints.ai_family = AF_UNSPEC; // IPv6 or IPv4
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+
+ snprintf(port_str, sizeof(port_str), "%d", port);
+ rc = getaddrinfo(bind_name, port_str, &hints, res);
+ if (rc) {
+ crm_err("Unable to get IP address(es) for %s: %s",
+ (bind_name? bind_name : "local node"), gai_strerror(rc));
+ return -EADDRNOTAVAIL;
+ }
+ return pcmk_ok;
+}
+
+int
+lrmd_init_remote_tls_server(void)
+{
+ int filter;
+ int port = crm_default_remote_port();
+ struct addrinfo *res = NULL, *iter;
+ gnutls_datum_t psk_key = { NULL, 0 };
+ const char *bind_name = getenv("PCMK_remote_address");
+
+ static struct mainloop_fd_callbacks remote_listen_fd_callbacks = {
+ .dispatch = lrmd_remote_listen,
+ .destroy = tls_server_dropped,
+ };
+
+ CRM_CHECK(ssock == -1, return ssock);
+
+ crm_debug("Starting TLS listener on %s port %d",
+ (bind_name? bind_name : "all addresses on"), port);
+ crm_gnutls_global_init();
+ gnutls_global_set_log_function(debug_log);
+
+ if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) {
+ return -1;
+ }
+ gnutls_psk_allocate_server_credentials(&psk_cred_s);
+ gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb);
+ gnutls_psk_set_server_dh_params(psk_cred_s, dh_params);
+
+ /* The key callback won't get called until the first client connection
+ * attempt. Do it once here, so we can warn the user at start-up if we can't
+ * read the key. We don't error out, though, because it's fine if the key is
+ * going to be added later.
+ */
+ if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) {
+ crm_warn("A cluster connection will not be possible until the key is available");
+ }
+ gnutls_free(psk_key.data);
+
+ if (get_address_info(bind_name, port, &res) != pcmk_ok) {
+ return -1;
+ }
+
+ /* Currently we listen on only one address from the resulting list (the
+ * first IPv6 address we can bind to if possible, otherwise the first IPv4
+ * address we can bind to). When bind_name is NULL, this should be the
+ * respective wildcard address.
+ *
+ * @TODO If there is demand for specifying more than one address, allow
+ * bind_name to be a space-separated list, call getaddrinfo() for each,
+ * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets
+ * since IPv4 listeners will have their own sockets).
+ */
+ iter = res;
+ filter = AF_INET6;
+ while (iter) {
+ if (iter->ai_family == filter) {
+ ssock = bind_and_listen(iter);
+ }
+ if (ssock != -1) {
+ break;
+ }
+
+ iter = iter->ai_next;
+ if (iter == NULL && filter == AF_INET6) {
+ iter = res;
+ filter = AF_INET;
+ }
+ }
+
+ if (ssock >= 0) {
+ mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock,
+ NULL, &remote_listen_fd_callbacks);
+ crm_debug("Started TLS listener on %s port %d",
+ (bind_name? bind_name : "all addresses on"), port);
+ }
+ freeaddrinfo(res);
+ return ssock;
+}
+
+void
+execd_stop_tls_server(void)
+{
+ if (psk_cred_s) {
+ gnutls_psk_free_server_credentials(psk_cred_s);
+ psk_cred_s = 0;
+ }
+
+ if (ssock >= 0) {
+ close(ssock);
+ ssock = -1;
+ }
+}
+#endif