summaryrefslogtreecommitdiffstats
path: root/exec/main.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 17:01:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-03 17:01:24 +0000
commit6dd3dfb79125cd02d02efbce435a6c82e5af92ef (patch)
tree45084fc83278586f6bbafcb935f92d53f71a6b03 /exec/main.c
parentInitial commit. (diff)
downloadcorosync-6dd3dfb79125cd02d02efbce435a6c82e5af92ef.tar.xz
corosync-6dd3dfb79125cd02d02efbce435a6c82e5af92ef.zip
Adding upstream version 3.1.8.upstream/3.1.8upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'exec/main.c')
-rw-r--r--exec/main.c1666
1 files changed, 1666 insertions, 0 deletions
diff --git a/exec/main.c b/exec/main.c
new file mode 100644
index 0000000..977aaf5
--- /dev/null
+++ b/exec/main.c
@@ -0,0 +1,1666 @@
+/*
+ * Copyright (c) 2002-2006 MontaVista Software, Inc.
+ * Copyright (c) 2006-2021 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Steven Dake (sdake@redhat.com)
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \mainpage Corosync
+ *
+ * This is the doxygen generated developer documentation for the Corosync
+ * project. For more information about Corosync, please see the project
+ * web site, <a href="http://www.corosync.org">corosync.org</a>.
+ *
+ * \section license License
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <pthread.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/poll.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <signal.h>
+#include <sched.h>
+#include <time.h>
+#include <semaphore.h>
+#include <string.h>
+
+#ifdef HAVE_LIBSYSTEMD
+#include <systemd/sd-daemon.h>
+#endif
+
+#include <qb/qbdefs.h>
+#include <qb/qblog.h>
+#include <qb/qbloop.h>
+#include <qb/qbutil.h>
+#include <qb/qbipcs.h>
+
+#include <corosync/swab.h>
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/totem/totempg.h>
+#include <corosync/logsys.h>
+#include <corosync/icmap.h>
+
+#include "quorum.h"
+#include "totemsrp.h"
+#include "logconfig.h"
+#include "totemconfig.h"
+#include "main.h"
+#include "sync.h"
+#include "timer.h"
+#include "util.h"
+#include "apidef.h"
+#include "service.h"
+#include "schedwrk.h"
+#include "ipcs_stats.h"
+#include "stats.h"
+
+#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
+#define IPC_LOGSYS_SIZE 1024*64
+#else
+#define IPC_LOGSYS_SIZE 8192*128
+#endif
+
+/*
+ * LibQB adds default "*" syslog filter so we have to set syslog_priority as low
+ * as possible so filters applied later in _logsys_config_apply_per_file takes
+ * effect.
+ */
+LOGSYS_DECLARE_SYSTEM ("corosync",
+ LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
+ LOG_DAEMON,
+ LOG_EMERG);
+
+LOGSYS_DECLARE_SUBSYS ("MAIN");
+
+#define SERVER_BACKLOG 5
+
+static int sched_priority = 0;
+
+static unsigned int service_count = 32;
+
+static struct totem_logging_configuration totem_logging_configuration;
+
+static struct corosync_api_v1 *api = NULL;
+
+static int sync_in_process = 1;
+
+static qb_loop_t *corosync_poll_handle;
+
+struct sched_param global_sched_param;
+
+static corosync_timer_handle_t corosync_stats_timer_handle;
+
+static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
+
+static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf";
+
+static int lockfile_fd = -1;
+
+enum move_to_root_cgroup_mode {
+ MOVE_TO_ROOT_CGROUP_MODE_OFF = 0,
+ MOVE_TO_ROOT_CGROUP_MODE_ON = 1,
+ MOVE_TO_ROOT_CGROUP_MODE_AUTO = 2,
+};
+
+qb_loop_t *cs_poll_handle_get (void)
+{
+ return (corosync_poll_handle);
+}
+
+int cs_poll_dispatch_add (qb_loop_t * handle,
+ int fd,
+ int events,
+ void *data,
+
+ int (*dispatch_fn) (int fd,
+ int revents,
+ void *data))
+{
+ return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
+ dispatch_fn);
+}
+
+int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
+{
+ return qb_loop_poll_del(handle, fd);
+}
+
+void corosync_state_dump (void)
+{
+ int i;
+
+ for (i = 0; i < SERVICES_COUNT_MAX; i++) {
+ if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
+ corosync_service[i]->exec_dump_fn ();
+ }
+ }
+}
+
+const char *corosync_get_config_file(void)
+{
+
+ return (corosync_config_file);
+}
+
+static void corosync_blackbox_write_to_file (void)
+{
+ char fname[PATH_MAX];
+ char fdata_fname[PATH_MAX];
+ char time_str[PATH_MAX];
+ struct tm cur_time_tm;
+ time_t cur_time_t;
+ ssize_t res;
+
+ cur_time_t = time(NULL);
+ localtime_r(&cur_time_t, &cur_time_tm);
+
+ strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
+ if (snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
+ get_state_dir(),
+ time_str,
+ (long long int)getpid()) >= PATH_MAX) {
+ log_printf(LOGSYS_LEVEL_ERROR, "Can't snprintf blackbox file name");
+ return ;
+ }
+
+ if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
+ LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
+ return ;
+ }
+ snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_state_dir());
+ unlink(fdata_fname);
+ if (symlink(fname, fdata_fname) == -1) {
+ log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
+ fname, fdata_fname);
+ }
+}
+
+static void unlink_all_completed (void)
+{
+ api->timer_delete (corosync_stats_timer_handle);
+ qb_loop_stop (corosync_poll_handle);
+ icmap_fini();
+}
+
+void corosync_shutdown_request (void)
+{
+ corosync_service_unlink_all (api, unlink_all_completed);
+}
+
+static int32_t sig_diag_handler (int num, void *data)
+{
+ corosync_state_dump ();
+ return 0;
+}
+
+static int32_t sig_exit_handler (int num, void *data)
+{
+ log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
+ corosync_service_unlink_all (api, unlink_all_completed);
+ return 0;
+}
+
+static void sigsegv_handler (int num)
+{
+ (void)signal (num, SIG_DFL);
+ corosync_blackbox_write_to_file ();
+ qb_log_fini();
+ raise (num);
+}
+
+#define LOCALHOST_IP inet_addr("127.0.0.1")
+
+static void *corosync_group_handle;
+
+static struct totempg_group corosync_group = {
+ .group = "a",
+ .group_len = 1
+};
+
+static void serialize_lock (void)
+{
+}
+
+static void serialize_unlock (void)
+{
+}
+
+static void corosync_sync_completed (void)
+{
+ log_printf (LOGSYS_LEVEL_NOTICE,
+ "Completed service synchronization, ready to provide service.");
+ sync_in_process = 0;
+
+ cs_ipcs_sync_state_changed(sync_in_process);
+ cs_ipc_allow_connections(1);
+ /*
+ * Inform totem to start using new message queue again
+ */
+ totempg_trans_ack();
+
+#ifdef HAVE_LIBSYSTEMD
+ sd_notify (0, "READY=1");
+#endif
+}
+
+static int corosync_sync_callbacks_retrieve (
+ int service_id,
+ struct sync_callbacks *callbacks)
+{
+ if (corosync_service[service_id] == NULL) {
+ return (-1);
+ }
+
+ if (callbacks == NULL) {
+ return (0);
+ }
+
+ callbacks->name = corosync_service[service_id]->name;
+
+ callbacks->sync_init = corosync_service[service_id]->sync_init;
+ callbacks->sync_process = corosync_service[service_id]->sync_process;
+ callbacks->sync_activate = corosync_service[service_id]->sync_activate;
+ callbacks->sync_abort = corosync_service[service_id]->sync_abort;
+ return (0);
+}
+
+static struct memb_ring_id corosync_ring_id;
+
+static void member_object_joined (unsigned int nodeid)
+{
+ char member_ip[ICMAP_KEYNAME_MAXLEN];
+ char member_join_count[ICMAP_KEYNAME_MAXLEN];
+ char member_status[ICMAP_KEYNAME_MAXLEN];
+
+ snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
+ "runtime.members.%u.ip", nodeid);
+ snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
+ "runtime.members.%u.join_count", nodeid);
+ snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
+ "runtime.members.%u.status", nodeid);
+
+ if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
+ icmap_inc(member_join_count);
+ icmap_set_string(member_status, "joined");
+ } else {
+ icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
+ icmap_set_uint32(member_join_count, 1);
+ icmap_set_string(member_status, "joined");
+ }
+
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "Member joined: %s", api->totem_ifaces_print (nodeid));
+}
+
+static void member_object_left (unsigned int nodeid)
+{
+ char member_status[ICMAP_KEYNAME_MAXLEN];
+
+ snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
+ "runtime.members.%u.status", nodeid);
+ icmap_set_string(member_status, "left");
+
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "Member left: %s", api->totem_ifaces_print (nodeid));
+}
+
+static void confchg_fn (
+ enum totem_configuration_type configuration_type,
+ const unsigned int *member_list, size_t member_list_entries,
+ const unsigned int *left_list, size_t left_list_entries,
+ const unsigned int *joined_list, size_t joined_list_entries,
+ const struct memb_ring_id *ring_id)
+{
+ int i;
+ int abort_activate = 0;
+
+ if (sync_in_process == 1) {
+ abort_activate = 1;
+ }
+ sync_in_process = 1;
+ cs_ipcs_sync_state_changed(sync_in_process);
+ memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
+
+ for (i = 0; i < left_list_entries; i++) {
+ member_object_left (left_list[i]);
+ }
+ for (i = 0; i < joined_list_entries; i++) {
+ member_object_joined (joined_list[i]);
+ }
+ /*
+ * Call configuration change for all services
+ */
+ for (i = 0; i < service_count; i++) {
+ if (corosync_service[i] && corosync_service[i]->confchg_fn) {
+ corosync_service[i]->confchg_fn (configuration_type,
+ member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries, ring_id);
+ }
+ }
+
+ if (abort_activate) {
+ sync_abort ();
+ }
+ if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
+ sync_save_transitional (member_list, member_list_entries, ring_id);
+ }
+ if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
+ sync_start (member_list, member_list_entries, ring_id);
+ }
+}
+
+static void priv_drop (void)
+{
+ return; /* TODO: we are still not dropping privs */
+}
+
+static void corosync_tty_detach (void)
+{
+ int devnull;
+
+ /*
+ * Disconnect from TTY if this is not a debug run
+ */
+
+ switch (fork ()) {
+ case -1:
+ corosync_exit_error (COROSYNC_DONE_FORK);
+ break;
+ case 0:
+ /*
+ * child which is disconnected, run this process
+ */
+ break;
+ default:
+ exit (0);
+ break;
+ }
+
+ /* Create new session */
+ (void)setsid();
+
+ /*
+ * Map stdin/out/err to /dev/null.
+ */
+ devnull = open("/dev/null", O_RDWR);
+ if (devnull == -1) {
+ corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
+ }
+
+ if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
+ || dup2(devnull, 2) < 0) {
+ close(devnull);
+ corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
+ }
+ close(devnull);
+}
+
+static void corosync_mlockall (void)
+{
+ int res;
+ struct rlimit rlimit;
+
+ rlimit.rlim_cur = RLIM_INFINITY;
+ rlimit.rlim_max = RLIM_INFINITY;
+
+#ifndef RLIMIT_MEMLOCK
+#define RLIMIT_MEMLOCK RLIMIT_VMEM
+#endif
+
+ res = setrlimit (RLIMIT_MEMLOCK, &rlimit);
+ if (res == -1) {
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
+ "Could not increase RLIMIT_MEMLOCK, not locking memory");
+ return;
+ }
+
+ res = mlockall (MCL_CURRENT | MCL_FUTURE);
+ if (res == -1) {
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
+ "Could not lock memory of service to avoid page faults");
+ };
+}
+
+
+static void corosync_totem_stats_updater (void *data)
+{
+ totempg_stats_t * stats;
+ uint32_t total_mtt_rx_token;
+ uint32_t total_backlog_calc;
+ uint32_t total_token_holdtime;
+ int t, prev;
+ int32_t token_count;
+ const char *cstr;
+
+ stats = api->totem_get_stats();
+
+
+ stats->srp->firewall_enabled_or_nic_failure = stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0;
+
+ if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+ stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
+ cstr = "";
+
+ if (stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
+ cstr = "number of multicast sendmsg failures is above threshold";
+ }
+
+ if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER) {
+ cstr = "totem is continuously in gather state";
+ }
+
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "Totem is unable to form a cluster because of an "
+ "operating system or network fault (reason: %s). The most common "
+ "cause of this message is that the local firewall is "
+ "configured improperly.", cstr);
+ stats->srp->firewall_enabled_or_nic_failure = 1;
+ } else {
+ stats->srp->firewall_enabled_or_nic_failure = 0;
+ }
+
+ total_mtt_rx_token = 0;
+ total_token_holdtime = 0;
+ total_backlog_calc = 0;
+ token_count = 0;
+ t = stats->srp->latest_token;
+ while (1) {
+ if (t == 0)
+ prev = TOTEM_TOKEN_STATS_MAX - 1;
+ else
+ prev = t - 1;
+ if (prev == stats->srp->earliest_token)
+ break;
+ /* if tx == 0, then dropped token (not ours) */
+ if (stats->srp->token[t].tx != 0 ||
+ (stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) {
+ total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx);
+ total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx);
+ total_backlog_calc += stats->srp->token[t].backlog_calc;
+ token_count++;
+ }
+ t = prev;
+ }
+ if (token_count) {
+ stats->srp->mtt_rx_token = (total_mtt_rx_token / token_count);
+ stats->srp->avg_token_workload = (total_token_holdtime / token_count);
+ stats->srp->avg_backlog_calc = (total_backlog_calc / token_count);
+ }
+
+ stats->srp->time_since_token_last_received = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
+ stats->srp->token[stats->srp->latest_token].rx;
+
+ stats_trigger_trackers();
+
+ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
+ corosync_totem_stats_updater,
+ &corosync_stats_timer_handle);
+}
+
+static void corosync_totem_stats_init (void)
+{
+ /* start stats timer */
+ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
+ corosync_totem_stats_updater,
+ &corosync_stats_timer_handle);
+}
+
+static void deliver_fn (
+ unsigned int nodeid,
+ const void *msg,
+ unsigned int msg_len,
+ int endian_conversion_required)
+{
+ const struct qb_ipc_request_header *header;
+ int32_t service;
+ int32_t fn_id;
+ uint32_t id;
+
+ header = msg;
+ if (endian_conversion_required) {
+ id = swab32 (header->id);
+ } else {
+ id = header->id;
+ }
+
+ /*
+ * Call the proper executive handler
+ */
+ service = id >> 16;
+ fn_id = id & 0xffff;
+
+ if (!corosync_service[service]) {
+ return;
+ }
+ if (fn_id >= corosync_service[service]->exec_engine_count) {
+ log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
+ fn_id, service, corosync_service[service]->exec_engine_count);
+ return;
+ }
+
+ icmap_fast_inc(service_stats_rx[service][fn_id]);
+
+ if (endian_conversion_required) {
+ assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
+ corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
+ ((void *)msg);
+ }
+
+ corosync_service[service]->exec_engine[fn_id].exec_handler_fn
+ (msg, nodeid);
+}
+
+int main_mcast (
+ const struct iovec *iovec,
+ unsigned int iov_len,
+ unsigned int guarantee)
+{
+ const struct qb_ipc_request_header *req = iovec->iov_base;
+ int32_t service;
+ int32_t fn_id;
+
+ service = req->id >> 16;
+ fn_id = req->id & 0xffff;
+
+ if (corosync_service[service]) {
+ icmap_fast_inc(service_stats_tx[service][fn_id]);
+ }
+
+ return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
+}
+
+static void corosync_ring_id_create_or_load (
+ struct memb_ring_id *memb_ring_id,
+ unsigned int nodeid)
+{
+ int fd;
+ int res = 0;
+ char filename[PATH_MAX];
+
+ snprintf (filename, sizeof(filename), "%s/ringid_%u",
+ get_state_dir(), nodeid);
+ fd = open (filename, O_RDONLY);
+ /*
+ * If file can be opened and read, read the ring id
+ */
+ if (fd != -1) {
+ res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
+ close (fd);
+ }
+ /*
+ * If file could not be opened or read, create a new ring id
+ */
+ if ((fd == -1) || (res != sizeof (uint64_t))) {
+ memb_ring_id->seq = 0;
+ fd = creat (filename, 0600);
+ if (fd != -1) {
+ res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
+ close (fd);
+ if (res == -1) {
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
+ "Couldn't write ringid file '%s'", filename);
+
+ corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
+ }
+ } else {
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
+ "Couldn't create ringid file '%s'", filename);
+
+ corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
+ }
+ }
+
+ memb_ring_id->rep = nodeid;
+}
+
+static void corosync_ring_id_store (
+ const struct memb_ring_id *memb_ring_id,
+ unsigned int nodeid)
+{
+ char filename[PATH_MAX];
+ int fd;
+ int res;
+
+ snprintf (filename, sizeof(filename), "%s/ringid_%u",
+ get_state_dir(), nodeid);
+
+ fd = creat (filename, 0600);
+ if (fd == -1) {
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
+ "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage",
+ memb_ring_id->seq);
+
+ corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
+ }
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "Storing new sequence id for ring " CS_PRI_RING_ID_SEQ, memb_ring_id->seq);
+ res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
+ close (fd);
+ if (res != sizeof(memb_ring_id->seq)) {
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
+ "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage",
+ memb_ring_id->seq);
+
+ corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
+ }
+}
+
+static qb_loop_timer_handle recheck_the_q_level_timer;
+void corosync_recheck_the_q_level(void *data)
+{
+ totempg_check_q_level(corosync_group_handle);
+ if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
+ qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
+ NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
+ }
+}
+
+struct sending_allowed_private_data_struct {
+ int reserved_msgs;
+};
+
+
+int corosync_sending_allowed (
+ unsigned int service,
+ unsigned int id,
+ const void *msg,
+ void *sending_allowed_private_data)
+{
+ struct sending_allowed_private_data_struct *pd =
+ (struct sending_allowed_private_data_struct *)sending_allowed_private_data;
+ struct iovec reserve_iovec;
+ struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
+ int sending_allowed;
+
+ reserve_iovec.iov_base = (char *)header;
+ reserve_iovec.iov_len = header->size;
+
+ pd->reserved_msgs = totempg_groups_joined_reserve (
+ corosync_group_handle,
+ &reserve_iovec, 1);
+ if (pd->reserved_msgs == -1) {
+ return -EINVAL;
+ }
+
+ /* Message ID out of range */
+ if (id >= corosync_service[service]->lib_engine_count) {
+ return -EINVAL;
+ }
+
+ sending_allowed = QB_FALSE;
+ if (corosync_quorum_is_quorate() == 1 ||
+ corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
+ // we are quorate
+ // now check flow control
+ if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
+ sending_allowed = QB_TRUE;
+ } else if (pd->reserved_msgs && sync_in_process == 0) {
+ sending_allowed = QB_TRUE;
+ } else if (pd->reserved_msgs == 0) {
+ return -ENOBUFS;
+ } else /* (sync_in_process) */ {
+ return -EINPROGRESS;
+ }
+ } else {
+ return -EHOSTUNREACH;
+ }
+
+ return (sending_allowed);
+}
+
+void corosync_sending_allowed_release (void *sending_allowed_private_data)
+{
+ struct sending_allowed_private_data_struct *pd =
+ (struct sending_allowed_private_data_struct *)sending_allowed_private_data;
+
+ if (pd->reserved_msgs == -1) {
+ return;
+ }
+ totempg_groups_joined_release (pd->reserved_msgs);
+}
+
+int message_source_is_local (const mar_message_source_t *source)
+{
+ int ret = 0;
+
+ assert (source != NULL);
+ if (source->nodeid == totempg_my_nodeid_get ()) {
+ ret = 1;
+ }
+ return ret;
+}
+
+void message_source_set (
+ mar_message_source_t *source,
+ void *conn)
+{
+ assert ((source != NULL) && (conn != NULL));
+ memset (source, 0, sizeof (mar_message_source_t));
+ source->nodeid = totempg_my_nodeid_get ();
+ source->conn = conn;
+}
+
+struct scheduler_pause_timeout_data {
+ struct totem_config *totem_config;
+ qb_loop_timer_handle handle;
+ unsigned long long tv_prev;
+ unsigned long long max_tv_diff;
+};
+
+static void timer_function_scheduler_timeout (void *data)
+{
+ struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
+ unsigned long long tv_current;
+ unsigned long long tv_diff;
+ uint64_t schedmiss_event_tstamp;
+
+ tv_current = qb_util_nano_current_get ();
+
+ if (timeout_data->tv_prev == 0) {
+ /*
+ * Initial call -> just pretent everything is ok
+ */
+ timeout_data->tv_prev = tv_current;
+ timeout_data->max_tv_diff = 0;
+ }
+
+ tv_diff = tv_current - timeout_data->tv_prev;
+ timeout_data->tv_prev = tv_current;
+
+ if (tv_diff > timeout_data->max_tv_diff) {
+ schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC;
+
+ log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled (@%" PRIu64 ") for %0.4f ms "
+ "(threshold is %0.4f ms). Consider token timeout increase.",
+ schedmiss_event_tstamp,
+ (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
+
+ stats_add_schedmiss_event(schedmiss_event_tstamp, (float)tv_diff / QB_TIME_NS_IN_MSEC);
+ }
+
+ /*
+ * Set next threshold, because token_timeout can change
+ */
+ timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
+ qb_loop_timer_add (corosync_poll_handle,
+ QB_LOOP_MED,
+ timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
+ timeout_data,
+ timer_function_scheduler_timeout,
+ &timeout_data->handle);
+}
+
+
+/*
+ * Set main pid RR scheduler.
+ * silent: don't log sched_get_priority_max and sched_setscheduler errors
+ * Returns: 0 - success, -1 failure, -2 platform doesn't support SCHED_RR
+ */
+static int corosync_set_rr_scheduler (int silent)
+{
+ int ret_val = 0;
+
+#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
+ int res;
+
+ sched_priority = sched_get_priority_max (SCHED_RR);
+ if (sched_priority != -1) {
+ global_sched_param.sched_priority = sched_priority;
+ res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
+ if (res == -1) {
+ if (!silent) {
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
+ "Could not set SCHED_RR at priority %d",
+ global_sched_param.sched_priority);
+ }
+
+ global_sched_param.sched_priority = 0;
+#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
+ qb_log_thread_priority_set (SCHED_OTHER, 0);
+#endif
+ ret_val = -1;
+ } else {
+
+ /*
+ * Turn on SCHED_RR in logsys system
+ */
+#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
+ res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
+#else
+ res = -1;
+#endif
+ if (res == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR,
+ "Could not set logsys thread priority."
+ " Can't continue because of priority inversions.");
+ corosync_exit_error (COROSYNC_DONE_LOGSETUP);
+ }
+ }
+ } else {
+ if (!silent) {
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
+ "Could not get maximum scheduler priority");
+ }
+ sched_priority = 0;
+ ret_val = -1;
+ }
+#else
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "The Platform is missing process priority setting features. Leaving at default.");
+ ret_val = -2;
+#endif
+
+ return (ret_val);
+}
+
+
+/* The basename man page contains scary warnings about
+ thread-safety and portability, hence this */
+static const char *corosync_basename(const char *file_name)
+{
+ char *base;
+ base = strrchr (file_name, '/');
+ if (base) {
+ return base + 1;
+ }
+
+ return file_name;
+}
+
+static void
+_logsys_log_printf(int level, int subsys,
+ const char *function_name,
+ const char *file_name,
+ int file_line,
+ const char *format,
+ ...) __attribute__((format(printf, 6, 7)));
+
+static void
+_logsys_log_printf(int level, int subsys,
+ const char *function_name,
+ const char *file_name,
+ int file_line,
+ const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ qb_log_from_external_source_va(function_name, corosync_basename(file_name),
+ format, level, file_line,
+ subsys, ap);
+ va_end(ap);
+}
+
+static void fplay_key_change_notify_fn (
+ int32_t event,
+ const char *key_name,
+ struct icmap_notify_value new_val,
+ struct icmap_notify_value old_val,
+ void *user_data)
+{
+ if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
+ fprintf(stderr,"Writetofile\n");
+ corosync_blackbox_write_to_file ();
+ }
+ if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
+ fprintf(stderr,"statefump\n");
+ corosync_state_dump ();
+ }
+}
+
+static void corosync_fplay_control_init (void)
+{
+ icmap_track_t track = NULL;
+
+ icmap_set_string("runtime.blackbox.dump_flight_data", "no");
+ icmap_set_string("runtime.blackbox.dump_state", "no");
+
+ icmap_track_add("runtime.blackbox.dump_flight_data",
+ ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
+ fplay_key_change_notify_fn,
+ NULL, &track);
+ icmap_track_add("runtime.blackbox.dump_state",
+ ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
+ fplay_key_change_notify_fn,
+ NULL, &track);
+}
+
+static void force_gather_notify_fn(
+ int32_t event,
+ const char *key_name,
+ struct icmap_notify_value new_val,
+ struct icmap_notify_value old_val,
+ void *user_data)
+{
+ char *key_val;
+
+ if (icmap_get_string(key_name, &key_val) == CS_OK && strcmp(key_val, "no") == 0)
+ goto out;
+
+ icmap_set_string("runtime.force_gather", "no");
+
+ if (strcmp(key_name, "runtime.force_gather") == 0) {
+ log_printf(LOGSYS_LEVEL_ERROR, "Forcing into GATHER state\n");
+ totempg_force_gather();
+ }
+
+out:
+ free(key_val);
+}
+
+static void corosync_force_gather_init (void)
+{
+ icmap_track_t track = NULL;
+
+ icmap_set_string("runtime.force_gather", "no");
+
+ icmap_track_add("runtime.force_gather",
+ ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
+ force_gather_notify_fn,
+ NULL, &track);
+}
+
+/*
+ * Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
+ * or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
+ *
+ * Also some RO keys cannot be determined in this stage, so they are set later in
+ * other functions (like nodelist.local_node_pos, ...)
+ */
+static void set_icmap_ro_keys_flag (void)
+{
+ /*
+ * Set RO flag for all keys of internal configuration and runtime statistics
+ */
+ icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("system.", CS_TRUE, CS_TRUE);
+ icmap_set_ro_access("nodelist.", CS_TRUE, CS_TRUE);
+
+ /*
+ * Set RO flag for constrete keys of configuration which can't be changed
+ * during runtime
+ */
+ icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.crypto_model", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.keyfile", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.key", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
+ icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
+}
+
+static void main_service_ready (void)
+{
+ int res;
+
+ /*
+ * This must occur after totempg is initialized because "this_ip" must be set
+ */
+ res = corosync_service_defaults_link_and_init (api);
+ if (res == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
+ corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
+ }
+ cs_ipcs_init();
+ corosync_totem_stats_init ();
+ corosync_fplay_control_init ();
+ corosync_force_gather_init ();
+
+ sync_init (
+ corosync_sync_callbacks_retrieve,
+ corosync_sync_completed);
+}
+
+static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
+{
+ struct flock lock;
+ enum e_corosync_done err;
+ char pid_s[17];
+ int fd_flag;
+
+ err = COROSYNC_DONE_EXIT;
+
+ lockfile_fd = open (lockfile, O_WRONLY | O_CREAT, 0640);
+ if (lockfile_fd == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
+ return (COROSYNC_DONE_ACQUIRE_LOCK);
+ }
+
+retry_fcntl:
+ lock.l_type = F_WRLCK;
+ lock.l_start = 0;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 0;
+ if (fcntl (lockfile_fd, F_SETLK, &lock) == -1) {
+ switch (errno) {
+ case EINTR:
+ goto retry_fcntl;
+ break;
+ case EAGAIN:
+ case EACCES:
+ log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
+ err = COROSYNC_DONE_ALREADY_RUNNING;
+ goto error_close;
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s",
+ strerror(errno));
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
+ goto error_close;
+ break;
+ }
+ }
+
+ if (ftruncate (lockfile_fd, 0) == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
+ strerror (errno));
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
+ goto error_close_unlink;
+ }
+
+ memset (pid_s, 0, sizeof (pid_s));
+ snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
+
+retry_write:
+ if (write (lockfile_fd, pid_s, strlen (pid_s)) != strlen (pid_s)) {
+ if (errno == EINTR) {
+ goto retry_write;
+ } else {
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
+ "Error was %s", strerror (errno));
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
+ goto error_close_unlink;
+ }
+ }
+
+ if ((fd_flag = fcntl (lockfile_fd, F_GETFD, 0)) == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
+ "Error was %s", strerror (errno));
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
+ goto error_close_unlink;
+ }
+ fd_flag |= FD_CLOEXEC;
+ if (fcntl (lockfile_fd, F_SETFD, fd_flag) == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
+ "Error was %s", strerror (errno));
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
+ goto error_close_unlink;
+ }
+
+ return (err);
+
+error_close_unlink:
+ unlink (lockfile);
+error_close:
+ close (lockfile_fd);
+
+ return (err);
+}
+
+static int corosync_move_to_root_cgroup(void) {
+ FILE *f;
+ int res = -1;
+ const char *cgroup_task_fname = NULL;
+
+ /*
+ * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
+ * using systemd and systemd uses hardcoded path of cgroup mount point.
+ *
+ * This feature is expected to be removed as soon as systemd gets support
+ * for managing RT configuration.
+ */
+ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
+ if (f == NULL) {
+ /*
+ * Try cgroup v2
+ */
+ f = fopen("/sys/fs/cgroup/cgroup.procs", "rt");
+ if (f == NULL) {
+ log_printf(LOG_DEBUG, "cpu.rt_runtime_us or cgroup.procs doesn't exist -> "
+ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
+
+ res = 0;
+ goto exit_res;
+ } else {
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v2 root cgroup");
+
+ cgroup_task_fname = "/sys/fs/cgroup/cgroup.procs";
+ }
+ } else {
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v1 root cgroup");
+
+ cgroup_task_fname = "/sys/fs/cgroup/cpu/tasks";
+ }
+ (void)fclose(f);
+
+ f = fopen(cgroup_task_fname, "w");
+ if (f == NULL) {
+ log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing");
+
+ goto exit_res;
+ }
+
+ if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
+ log_printf(LOGSYS_LEVEL_WARNING, "Can't write corosync pid into cgroups tasks file");
+
+ goto close_and_exit_res;
+ }
+
+close_and_exit_res:
+ if (fclose(f) != 0) {
+ log_printf(LOGSYS_LEVEL_WARNING, "Can't close cgroups tasks file");
+
+ goto exit_res;
+ }
+
+exit_res:
+ return (res);
+}
+
+static void show_version_info_crypto(void)
+{
+ const char *error_string;
+ const char *list_str;
+
+ if (util_is_valid_knet_crypto_model(NULL, &list_str, 1, "", &error_string) != -1) {
+ printf("Available crypto models: %s\n", list_str);
+ } else {
+ perror(error_string);
+ }
+}
+
+static void show_version_info_compress(void)
+{
+ const char *error_string;
+ const char *list_str;
+
+ if (util_is_valid_knet_compress_model(NULL, &list_str, 1, "", &error_string) != -1) {
+ printf("Available compression models: %s\n", list_str);
+ } else {
+ perror(error_string);
+ }
+}
+
+static void show_version_info(void)
+{
+
+ printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
+ printf ("Copyright (c) 2006-2021 Red Hat, Inc.\n");
+
+ printf ("\nBuilt-in features:" PACKAGE_FEATURES "\n");
+
+ show_version_info_crypto();
+ show_version_info_compress();
+}
+
+int main (int argc, char **argv, char **envp)
+{
+ const char *error_string;
+ struct totem_config totem_config;
+ int res, ch;
+ int background, sched_rr, prio, testonly;
+ enum move_to_root_cgroup_mode move_to_root_cgroup;
+ enum e_corosync_done flock_err;
+ uint64_t totem_config_warnings;
+ struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
+ long int tmpli;
+ char *ep;
+ char *tmp_str;
+ int log_subsys_id_totem;
+ int silent;
+
+ /* default configuration
+ */
+ background = 1;
+ testonly = 0;
+
+ while ((ch = getopt (argc, argv, "c:ftv")) != EOF) {
+
+ switch (ch) {
+ case 'c':
+ res = snprintf(corosync_config_file, sizeof(corosync_config_file), "%s", optarg);
+ if (res >= sizeof(corosync_config_file)) {
+ fprintf (stderr, "Config file path too long.\n");
+ syslog (LOGSYS_LEVEL_ERROR, "Config file path too long.");
+
+ logsys_system_fini();
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'f':
+ background = 0;
+ break;
+ case 't':
+ testonly = 1;
+ break;
+ case 'v':
+ show_version_info();
+ logsys_system_fini();
+ return EXIT_SUCCESS;
+
+ break;
+ default:
+ fprintf(stderr, \
+ "usage:\n"\
+ " -c : Corosync config file path.\n"\
+ " -f : Start application in foreground.\n"\
+ " -t : Test configuration and exit.\n"\
+ " -v : Display version, git revision and some useful information about Corosync and exit.\n");
+ logsys_system_fini();
+ return EXIT_FAILURE;
+ }
+ }
+
+
+ /*
+ * Other signals are registered later via qb_loop_signal_add
+ */
+ (void)signal (SIGSEGV, sigsegv_handler);
+ (void)signal (SIGABRT, sigsegv_handler);
+#if MSG_NOSIGNAL != 0
+ (void)signal (SIGPIPE, SIG_IGN);
+#endif
+
+ if (icmap_init() != CS_OK) {
+ fprintf (stderr, "Corosync Executive couldn't initialize configuration component.\n");
+ syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
+ corosync_exit_error (COROSYNC_DONE_ICMAP);
+ }
+ set_icmap_ro_keys_flag();
+
+ /*
+ * Initialize the corosync_api_v1 definition
+ */
+ api = apidef_get ();
+
+ res = coroparse_configparse(icmap_get_global_map(), &error_string);
+ if (res == -1) {
+ /*
+ * Logsys can't log properly at this early stage, and we need to get this message out
+ *
+ */
+ fprintf (stderr, "%s\n", error_string);
+ syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
+ corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
+ }
+
+ if (stats_map_init(api) != CS_OK) {
+ fprintf (stderr, "Corosync Executive couldn't initialize statistics component.\n");
+ syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize statistics component.");
+ corosync_exit_error (COROSYNC_DONE_STATS);
+ }
+
+ res = corosync_log_config_read (&error_string);
+ if (res == -1) {
+ /*
+ * if we are here, we _must_ flush the logsys queue
+ * and try to inform that we couldn't read the config.
+ * this is a desperate attempt before certain death
+ * and there is no guarantee that we can print to stderr
+ * nor that logsys is sending the messages where we expect.
+ */
+ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
+ fprintf(stderr, "%s", error_string);
+ syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
+ corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
+ }
+
+ if (!testonly) {
+ log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine %s starting up", VERSION);
+ log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
+ }
+
+ /*
+ * Create totem logsys subsys before totem_config_read so log functions can be used
+ */
+ log_subsys_id_totem = _logsys_subsys_create("TOTEM", "totem,"
+ "totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
+ "totempg.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c");
+
+ res = chdir(get_state_dir());
+ if (res == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to state directory %s. %s", get_state_dir(), strerror(errno));
+ corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
+ }
+
+ res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
+ if (res == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
+ corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
+ }
+
+ if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
+ log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
+ }
+
+ if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
+ log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
+ }
+
+ if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_SET) {
+ log_printf (LOGSYS_LEVEL_WARNING, "nodeid in totem section is deprecated and ignored. "
+ "Nodelist (or autogenerated) nodeid is going to be used.");
+ }
+
+ if (totem_config_warnings & TOTEM_CONFIG_BINDNETADDR_NODELIST_SET) {
+ log_printf (LOGSYS_LEVEL_WARNING, "interface section bindnetaddr is used together with nodelist. "
+ "Nodelist one is going to be used.");
+ }
+
+ if (totem_config_warnings != 0) {
+ log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
+ }
+
+ res = totem_config_validate (&totem_config, &error_string);
+ if (res == -1) {
+ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
+ corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
+ }
+
+ if (testonly) {
+ corosync_exit_error (COROSYNC_DONE_EXIT);
+ }
+
+
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO;
+ if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) {
+ /*
+ * Validity of move_to_root_cgroup values checked in coroparse.c
+ */
+ if (strcmp(tmp_str, "yes") == 0) {
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_ON;
+ } else if (strcmp(tmp_str, "no") == 0) {
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_OFF;
+ }
+ free(tmp_str);
+ }
+
+
+ sched_rr = 1;
+ if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) {
+ if (strcmp(tmp_str, "yes") != 0) {
+ sched_rr = 0;
+ }
+ free(tmp_str);
+ }
+
+ prio = 0;
+ if (icmap_get_string("system.priority", &tmp_str) == CS_OK) {
+ if (strcmp(tmp_str, "max") == 0) {
+ prio = INT_MIN;
+ } else if (strcmp(tmp_str, "min") == 0) {
+ prio = INT_MAX;
+ } else {
+ errno = 0;
+
+ tmpli = strtol(tmp_str, &ep, 10);
+ if (errno != 0 || *ep != '\0' || tmpli > INT_MAX || tmpli < INT_MIN) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Priority value %s is invalid", tmp_str);
+ corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
+ }
+
+ prio = tmpli;
+ }
+
+ free(tmp_str);
+ }
+
+ if (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_ON) {
+ /*
+ * Try to move corosync into root cpu cgroup. Failure is not fatal and
+ * error is deliberately ignored.
+ */
+ (void)corosync_move_to_root_cgroup();
+ }
+
+ /*
+ * Set round robin realtime scheduling with priority 99
+ */
+ if (sched_rr) {
+ silent = (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO);
+ res = corosync_set_rr_scheduler (silent);
+
+ if (res == -1 && move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO) {
+ /*
+ * Try to move process to root cgroup and try set priority again
+ */
+ (void)corosync_move_to_root_cgroup();
+
+ res = corosync_set_rr_scheduler (0);
+ }
+
+ if (res != 0) {
+ prio = INT_MIN;
+ } else {
+ prio = 0;
+ }
+ }
+
+ if (prio != 0) {
+ if (setpriority(PRIO_PGRP, 0, prio) != 0) {
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
+ "Could not set priority %d", prio);
+ }
+ }
+
+ totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
+ totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
+
+ totem_config.totem_logging_configuration = totem_logging_configuration;
+ totem_config.totem_logging_configuration.log_subsys_id = log_subsys_id_totem;
+
+ totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
+ totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
+ totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
+ totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
+ totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
+ totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE;
+ totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
+
+ logsys_config_apply();
+
+ /*
+ * Now we are fully initialized.
+ */
+ if (background) {
+ logsys_blackbox_prefork();
+
+ corosync_tty_detach ();
+
+ logsys_blackbox_postfork();
+
+ log_printf (LOGSYS_LEVEL_DEBUG, "Corosync TTY detached");
+ }
+
+ /*
+ * Lock all memory to avoid page faults which may interrupt
+ * application healthchecking
+ */
+ corosync_mlockall ();
+
+ corosync_poll_handle = qb_loop_create ();
+
+ memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
+ scheduler_pause_timeout_data.totem_config = &totem_config;
+ timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
+
+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
+ SIGUSR2, NULL, sig_diag_handler, NULL);
+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
+ SIGINT, NULL, sig_exit_handler, NULL);
+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
+ SIGQUIT, NULL, sig_exit_handler, NULL);
+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
+ SIGTERM, NULL, sig_exit_handler, NULL);
+
+ if (logsys_thread_start() != 0) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
+ corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
+ }
+
+ if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
+ corosync_exit_error (flock_err);
+ }
+
+ /*
+ * if totempg_initialize doesn't have root priveleges, it cannot
+ * bind to a specific interface. This only matters if
+ * there is more then one interface in a system, so
+ * in this case, only a warning is printed
+ */
+ /*
+ * Join multicast group and setup delivery
+ * and configuration change functions
+ */
+ if (totempg_initialize (
+ corosync_poll_handle,
+ &totem_config) != 0) {
+
+ log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize TOTEM layer");
+ corosync_exit_error (COROSYNC_DONE_FATAL_ERR);
+ }
+
+ totempg_service_ready_register (
+ main_service_ready);
+
+ totempg_groups_initialize (
+ &corosync_group_handle,
+ deliver_fn,
+ confchg_fn);
+
+ totempg_groups_join (
+ corosync_group_handle,
+ &corosync_group,
+ 1);
+
+ /*
+ * Drop root privleges to user 'corosync'
+ * TODO: Don't really need full root capabilities;
+ * needed capabilities are:
+ * CAP_NET_RAW (bindtodevice)
+ * CAP_SYS_NICE (setscheduler)
+ * CAP_IPC_LOCK (mlockall)
+ */
+ priv_drop ();
+
+ schedwrk_init (
+ serialize_lock,
+ serialize_unlock);
+
+ /*
+ * Start main processing loop
+ */
+ qb_loop_run (corosync_poll_handle);
+
+ /*
+ * Exit was requested
+ */
+ totempg_finalize ();
+
+ /*
+ * free the loop resources
+ */
+ qb_loop_destroy (corosync_poll_handle);
+
+ /*
+ * free up the icmap
+ */
+
+ /*
+ * Remove pid lock file
+ */
+ close (lockfile_fd);
+ unlink (corosync_lock_file);
+
+ corosync_exit_error (COROSYNC_DONE_EXIT);
+
+ return EXIT_SUCCESS;
+}